<a href="https://colab.research.google.com/github/DoItSon/Dacon/blob/main/%EA%B1%B4%EC%84%A4%EA%B8%B0%EA%B3%84%20%EC%98%A4%EC%9D%BC%20%EC%83%81%ED%83%9C%20%EB%B6%84%EB%A5%98%20AI%20%EA%B2%BD%EC%A7%84%EB%8C%80%ED%9A%8C/%EC%A7%80%EC%8B%9D_%EC%A6%9D%EB%A5%98_%EB%94%A5%EB%9F%AC%EB%8B%9D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.impute import KNNImputer

import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import random

import warnings
warnings.filterwarnings(action='ignore') 

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
CFG = {
    'EPOCHS': 30,
    'LEARNING_RATE':2e-2,
    'BATCH_SIZE':256,
    'SEED':42
}

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED'])

In [None]:
DATA_PATH = "/content/drive/MyDrive/"

In [None]:
train = pd.read_csv(f'{DATA_PATH}train.csv')
test = pd.read_csv(f'{DATA_PATH}test.csv')

In [None]:
categorical_features = ['COMPONENT_ARBITRARY_COMPONENT1', 'COMPONENT_ARBITRARY_COMPONENT2',
       'COMPONENT_ARBITRARY_COMPONENT3', 'COMPONENT_ARBITRARY_COMPONENT4',
       'YEAR_2007', 'YEAR_2008', 'YEAR_2009', 'YEAR_2010', 'YEAR_2011',
       'YEAR_2012', 'YEAR_2013', 'YEAR_2014', 'YEAR_2015', 'YEAR_2016',
       'YEAR_2017', 'YEAR_2018', 'YEAR_2019', 'YEAR_2020', 'YEAR_2021',
       'YEAR_2022']
# Inference(실제 진단 환경)에 사용하는 컬럼
test_stage_features = ['ANONYMOUS_1', 'ANONYMOUS_2', 'AG', 'CO', 'CR', 'CU', 'FE', 'H2O', 'MN',
       'MO', 'NI', 'PQINDEX', 'TI', 'V', 'V40', 'ZN',
       'COMPONENT_ARBITRARY_COMPONENT1', 'COMPONENT_ARBITRARY_COMPONENT2',
       'COMPONENT_ARBITRARY_COMPONENT3', 'COMPONENT_ARBITRARY_COMPONENT4',
       'YEAR_2007', 'YEAR_2008', 'YEAR_2009', 'YEAR_2010', 'YEAR_2011',
       'YEAR_2012', 'YEAR_2013', 'YEAR_2014', 'YEAR_2015', 'YEAR_2016',
       'YEAR_2017', 'YEAR_2018', 'YEAR_2019', 'YEAR_2020', 'YEAR_2021',
       'YEAR_2022']

In [None]:
test.columns

Index(['ID', 'COMPONENT_ARBITRARY', 'ANONYMOUS_1', 'YEAR', 'ANONYMOUS_2', 'AG',
       'CO', 'CR', 'CU', 'FE', 'H2O', 'MN', 'MO', 'NI', 'PQINDEX', 'TI', 'V',
       'V40', 'ZN'],
      dtype='object')

In [None]:
train

Unnamed: 0,ID,COMPONENT_ARBITRARY,ANONYMOUS_1,YEAR,SAMPLE_TRANSFER_DAY,ANONYMOUS_2,AG,AL,B,BA,...,U25,U20,U14,U6,U4,V,V100,V40,ZN,Y_LABEL
0,TRAIN_00000,COMPONENT3,1486,2011,7,200,0,3,93,0,...,,,,,,0,,154.0,75,0
1,TRAIN_00001,COMPONENT2,1350,2021,51,375,0,2,19,0,...,2.0,4.0,6.0,216.0,1454.0,0,,44.0,652,0
2,TRAIN_00002,COMPONENT2,2415,2015,2,200,0,110,1,1,...,0.0,3.0,39.0,11261.0,41081.0,0,,72.6,412,1
3,TRAIN_00003,COMPONENT3,7389,2010,2,200,0,8,3,0,...,,,,,,0,,133.3,7,0
4,TRAIN_00004,COMPONENT3,3954,2015,4,200,0,1,157,0,...,,,,,,0,,133.1,128,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14090,TRAIN_14090,COMPONENT3,1616,2014,8,200,0,2,201,1,...,,,,,,0,,135.4,16,0
14091,TRAIN_14091,COMPONENT1,2784,2013,2,200,0,3,85,0,...,,,,,,0,14.5,117.5,1408,0
14092,TRAIN_14092,COMPONENT3,1788,2008,9,550,0,6,0,1,...,,,,,,0,,54.0,1301,0
14093,TRAIN_14093,COMPONENT2,2498,2009,19,550,0,2,4,0,...,7.0,8.0,100.0,1625.0,18890.0,0,,44.3,652,0


In [None]:
enc = OneHotEncoder()
tmp = pd.DataFrame(
    enc.fit_transform(train[["COMPONENT_ARBITRARY",'YEAR']]).toarray(),
    columns = enc.get_feature_names_out()
)
train = pd.concat([train,tmp],axis = 1).drop(columns=["COMPONENT_ARBITRARY",'YEAR'])
train

Unnamed: 0,ID,ANONYMOUS_1,SAMPLE_TRANSFER_DAY,ANONYMOUS_2,AG,AL,B,BA,BE,CA,...,YEAR_2013,YEAR_2014,YEAR_2015,YEAR_2016,YEAR_2017,YEAR_2018,YEAR_2019,YEAR_2020,YEAR_2021,YEAR_2022
0,TRAIN_00000,1486,7,200,0,3,93,0,0,3059,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,TRAIN_00001,1350,51,375,0,2,19,0,0,2978,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,TRAIN_00002,2415,2,200,0,110,1,1,0,17,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,TRAIN_00003,7389,2,200,0,8,3,0,0,1960,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,TRAIN_00004,3954,4,200,0,1,157,0,0,71,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14090,TRAIN_14090,1616,8,200,0,2,201,1,0,6,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14091,TRAIN_14091,2784,2,200,0,3,85,0,0,2945,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14092,TRAIN_14092,1788,9,550,0,6,0,1,0,13,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14093,TRAIN_14093,2498,19,550,0,2,4,0,0,2244,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
enc = OneHotEncoder()
tmp = pd.DataFrame(
    enc.transform(test[["COMPONENT_ARBITRARY",'YEAR']]).toarray(),
    columns = enc.get_feature_names_out()
)
test = pd.concat([test,tmp],axis = 1).drop(columns=["COMPONENT_ARBITRARY",'YEAR'])
test

Unnamed: 0,ID,ANONYMOUS_1,ANONYMOUS_2,AG,CO,CR,CU,FE,H2O,MN,...,YEAR_2013,YEAR_2014,YEAR_2015,YEAR_2016,YEAR_2017,YEAR_2018,YEAR_2019,YEAR_2020,YEAR_2021,YEAR_2022
0,TEST_0000,2192,200,0,0,0,1,12,0.0,0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,TEST_0001,2794,200,0,0,2,1,278,0.0,3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,TEST_0002,1982,200,0,0,0,16,5,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,TEST_0003,1404,200,0,0,3,4,163,0.0,4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,TEST_0004,8225,200,0,0,0,6,13,0.0,0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,TEST_6036,1714,200,0,0,3,130,1047,0.0,65,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6037,TEST_6037,4131,200,0,0,5,2,736,0.0,5,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
6038,TEST_6038,4325,200,0,0,0,0,53,0.0,0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6039,TEST_6039,1364,200,0,0,0,62,2,0.0,0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


- SimpleImputer 결측치 채우기


In [None]:
# from sklearn.impute import SimpleImputer

# median_cols = ['CD','FOPTIMETHGLY',"FUEL",'K','U100','U75','U50']
# for i in median_cols:
#     imputer = SimpleImputer(strategy="median")
#     train[i] = imputer.fit_transform(train[[i]])

# mean_cols = ["FH2O","FNOX","FOXID","FSO4","FTBN","SOOTPERCENTAGE","U25","U20","U14","U6","U4","V100"]
# for i in mean_cols:
#     imputer = SimpleImputer(strategy="mean")
#     train[i] = imputer.fit_transform(train[[i]])

- enable_iterative_imputer 결측치 채우기

In [None]:
# from sklearn.experimental import enable_iterative_imputer
# from sklearn.impute import IterativeImputer
# SEED = 42

In [None]:
# for i in cols:
#     imputer = IterativeImputer(random_state=SEED) # 수치형
#     train[i] = imputer.fit_transform(train[[i]])

- Linear모델을 통한 결측치 처리

In [None]:
# from sklearn.linear_model import LinearRegression
# for i in cols:
#     imputer = IterativeImputer(estimator =  LinearRegression(),random_state=SEED) # IterativeImputer에서 다양한 옵션이 있다. (estimator는 머신러닝 옵션!)
#     train[i] = imputer.fit_transform(train[[i]])

- KNNImputer

In [None]:
# for i in cols:
#     imputer = KNNImputer(n_neighbors=10,weights="distance")
#     train[i] = imputer.fit_transform(train[[i]])

In [None]:
# mean = train[["FNOX", "FOXID","FSO4" ]].mean()
# mean

- 전처리

In [None]:
train = train.fillna(0)
test = test.fillna(0)

In [None]:
train.isnull().sum().sum()

0

In [None]:
all_X = train.drop(['ID', 'Y_LABEL'], axis = 1)
all_y = train['Y_LABEL']

test = test.drop(['ID'], axis = 1)

train_X, val_X, train_y, val_y = train_test_split(all_X, all_y, test_size=0.2, random_state=CFG['SEED'], stratify=all_y)
train_X.shape, val_X.shape, train_y.shape, val_y.shape

((11276, 70), (2819, 70), (11276,), (2819,))

In [None]:
def get_values(value):
    return value.values.reshape(-1, 1)

for col in train_X.columns:
    if col not in categorical_features:
        scaler = StandardScaler()
        train_X[col] = scaler.fit_transform(get_values(train_X[col]))
        val_X[col] = scaler.transform(get_values(val_X[col]))
        if col in test.columns:
            test[col] = scaler.transform(get_values(test[col]))

In [None]:
train_X

Unnamed: 0,ANONYMOUS_1,SAMPLE_TRANSFER_DAY,ANONYMOUS_2,AG,AL,B,BA,BE,CA,CD,...,YEAR_2013,YEAR_2014,YEAR_2015,YEAR_2016,YEAR_2017,YEAR_2018,YEAR_2019,YEAR_2020,YEAR_2021,YEAR_2022
6216,0.066945,-0.219402,-0.340807,-0.149910,-0.109649,0.429454,-0.308180,-0.040725,1.184617,-0.062577,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
13544,0.572677,-0.219402,-0.340807,-0.149910,-0.164876,-0.427729,-0.308180,-0.040725,-0.797281,-0.062577,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12499,0.535364,-0.557701,1.090950,-0.149910,-0.164876,0.410192,-0.308180,-0.040725,1.172504,-0.062577,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
10328,-0.096080,0.541770,0.296744,-0.149910,-0.151069,-0.524042,-0.308180,-0.040725,-0.867943,-0.062577,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4675,-0.330622,-0.473126,0.296744,-0.149910,-0.123456,0.949543,0.154541,-0.040725,1.096458,-0.062577,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2688,0.225972,-0.388552,-0.340807,5.699183,-0.109649,-0.610723,0.154541,-0.040725,-0.906976,-0.062577,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7511,0.802334,0.118897,-0.340807,-0.149910,-0.068229,0.371667,-0.308180,-0.040725,0.854188,-0.062577,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9907,0.422312,0.288046,-0.340807,-0.149910,-0.123456,-0.485517,-0.308180,-0.040725,1.240474,-0.062577,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7970,0.212424,-0.050253,-0.340807,-0.149910,1.740450,0.670236,0.617263,-0.040725,-0.668071,-0.062577,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
class CustomDataset(Dataset):
    def __init__(self, data_X, data_y, distillation=False):
        super(CustomDataset, self).__init__()
        self.data_X = data_X
        self.data_y = data_y
        self.distillation = distillation
        
    def __len__(self):
        return len(self.data_X)
    
    def __getitem__(self, index):
        if self.distillation:
            # 지식 증류 학습 시
            teacher_X = torch.Tensor(self.data_X.iloc[index])
            student_X = torch.Tensor(self.data_X[test_stage_features].iloc[index])
            y = self.data_y.values[index]
            return teacher_X, student_X, y
        else:
            if self.data_y is None:
                test_X = torch.Tensor(self.data_X.iloc[index])
                return test_X
            else:
                teacher_X = torch.Tensor(self.data_X.iloc[index])
                y = self.data_y.values[index]
                return teacher_X, y

In [None]:
train_dataset = CustomDataset(train_X, train_y, False)
val_dataset = CustomDataset(val_X, val_y, False)

In [None]:
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False)

In [None]:
class Teacher(nn.Module):
    def __init__(self):
        super(Teacher, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=70, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(negative_slope=0.2),
            nn.Linear(in_features=256, out_features=1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(negative_slope=0.2),
            nn.Linear(in_features=1024, out_features=256), # 여기에 layer 추가해보기
            nn.BatchNorm1d(256),
            nn.LeakyReLU(negative_slope=0.2),
            nn.Linear(in_features=256, out_features=1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        output = self.classifier(x)
        return output

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)

    best_score = 0
    best_model = None
    criterion = nn.BCELoss().to(device)

    for epoch in range(CFG["EPOCHS"]):
        train_loss = []
  
        model.train()
        for X, y in tqdm(train_loader):
            X = X.float().to(device)
            y = y.float().to(device)
            
            optimizer.zero_grad()
            
            y_pred = model(X)
            
            loss = criterion(y_pred, y.reshape(-1, 1))
            loss.backward()
            
            optimizer.step()

            train_loss.append(loss.item())

        val_loss, val_score = validation_teacher(model, val_loader, criterion, device)

        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score:.5f}]')

        if scheduler is not None:
            scheduler.step(val_score)
            
        if best_score < val_score:
            best_model = model
            best_score = val_score
        
    return best_model

In [None]:
def competition_metric(true, pred):
    return f1_score(true, pred, average="macro")

def validation_teacher(model, val_loader, criterion, device):
    model.eval()

    val_loss = []
    pred_labels = []
    true_labels = []
    threshold = 0.35
    
    with torch.no_grad():
        for X, y in tqdm(val_loader):
            X = X.float().to(device)
            y = y.float().to(device)
            
            model_pred = model(X.to(device))
            
            loss = criterion(model_pred, y.reshape(-1, 1))
            val_loss.append(loss.item())      
            
            model_pred = model_pred.squeeze(1).to('cpu')  
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
        val_f1 = competition_metric(true_labels, pred_labels)
    return val_loss, val_f1

In [None]:
model = Teacher()
model.eval()
optimizer = torch.optim.AdamW(model.parameters(), lr=CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, threshold_mode='abs',min_lr=1e-8, verbose=True)

teacher_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [0], Train Loss : [0.25928] Val Loss : [0.23231] Val F1 Score : [0.73775]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.20482] Val Loss : [0.19121] Val F1 Score : [0.78581]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.19328] Val Loss : [0.17858] Val F1 Score : [0.78762]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.18670] Val Loss : [0.17644] Val F1 Score : [0.79563]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.19662] Val Loss : [0.15641] Val F1 Score : [0.81755]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.18624] Val Loss : [0.16424] Val F1 Score : [0.80999]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.18466] Val Loss : [0.16053] Val F1 Score : [0.81884]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.18065] Val Loss : [0.16356] Val F1 Score : [0.80532]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.16483] Val Loss : [0.15618] Val F1 Score : [0.82531]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.16628] Val Loss : [0.15869] Val F1 Score : [0.81842]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.18421] Val Loss : [0.18285] Val F1 Score : [0.80363]
Epoch 00011: reducing learning rate of group 0 to 1.5000e-02.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.16942] Val Loss : [0.15029] Val F1 Score : [0.81740]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.15566] Val Loss : [0.17570] Val F1 Score : [0.79714]
Epoch 00013: reducing learning rate of group 0 to 7.5000e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.14486] Val Loss : [0.15354] Val F1 Score : [0.82297]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.14193] Val Loss : [0.14999] Val F1 Score : [0.81595]
Epoch 00015: reducing learning rate of group 0 to 3.7500e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.13722] Val Loss : [0.14977] Val F1 Score : [0.82309]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.13984] Val Loss : [0.14871] Val F1 Score : [0.82795]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.14923] Val Loss : [0.15034] Val F1 Score : [0.82452]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.13906] Val Loss : [0.14840] Val F1 Score : [0.82763]
Epoch 00019: reducing learning rate of group 0 to 1.8750e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.13363] Val Loss : [0.14696] Val F1 Score : [0.82240]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.13311] Val Loss : [0.15083] Val F1 Score : [0.82352]
Epoch 00021: reducing learning rate of group 0 to 9.3750e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.13399] Val Loss : [0.14635] Val F1 Score : [0.82456]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.13560] Val Loss : [0.14875] Val F1 Score : [0.82718]
Epoch 00023: reducing learning rate of group 0 to 4.6875e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.13861] Val Loss : [0.15407] Val F1 Score : [0.82332]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.13241] Val Loss : [0.14920] Val F1 Score : [0.82568]
Epoch 00025: reducing learning rate of group 0 to 2.3437e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.13209] Val Loss : [0.14612] Val F1 Score : [0.82826]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.12893] Val Loss : [0.14718] Val F1 Score : [0.82718]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.12898] Val Loss : [0.14731] Val F1 Score : [0.82703]
Epoch 00028: reducing learning rate of group 0 to 1.1719e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.12903] Val Loss : [0.14773] Val F1 Score : [0.82673]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.13357] Val Loss : [0.14708] Val F1 Score : [0.83090]


In [None]:
class Student(nn.Module):
    def __init__(self):
        super(Student, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=36, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(negative_slope=0.2),
            nn.Linear(in_features=128, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(negative_slope=0.2),
            nn.Linear(in_features=512, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(negative_slope=0.2),
            nn.Linear(in_features=128, out_features=1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        output = self.classifier(x)
        return output

In [None]:
def distillation(student_logits, labels, teacher_logits, alpha):
    distillation_loss = nn.BCELoss()(student_logits, teacher_logits)
    student_loss = nn.BCELoss()(student_logits, labels.reshape(-1, 1))
    return alpha * student_loss + (1-alpha) * distillation_loss

In [None]:
def distill_loss(output, target, teacher_output, loss_fn=distillation, opt=optimizer):
    loss_b = loss_fn(output, target, teacher_output, alpha=0.1)

    if opt is not None:
        opt.zero_grad()
        loss_b.backward()
        opt.step()

    return loss_b.item()

In [None]:
def student_train(s_model, t_model, optimizer, train_loader, val_loader, scheduler, device):
    s_model.to(device)
    t_model.to(device)
    
    best_score = 0
    best_model = None

    for epoch in range(CFG["EPOCHS"]):
        train_loss = []
        s_model.train()
        t_model.eval()
        
        for X_t, X_s, y in tqdm(train_loader):
            X_t = X_t.float().to(device)
            X_s = X_s.float().to(device)
            y = y.float().to(device)
            
            optimizer.zero_grad()

            output = s_model(X_s)
            with torch.no_grad():
                teacher_output = t_model(X_t)
                
            loss_b = distill_loss(output, y, teacher_output, loss_fn=distillation, opt=optimizer)

            train_loss.append(loss_b)

        val_loss, val_score = validation_student(s_model, t_model, val_loader, distill_loss, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_score)
            
        if best_score < val_score:
            best_model = s_model
            best_score = val_score
        
    return best_model

In [None]:
def validation_student(s_model, t_model, val_loader, criterion, device):
    s_model.eval()
    t_model.eval()

    val_loss = []
    pred_labels = []
    true_labels = []
    threshold = 0.35
    
    with torch.no_grad():
        for X_t, X_s, y in tqdm(val_loader):
            X_t = X_t.float().to(device)
            X_s = X_s.float().to(device)
            y = y.float().to(device)
            
            model_pred = s_model(X_s)
            teacher_output = t_model(X_t)
            
            loss_b = distill_loss(model_pred, y, teacher_output, loss_fn=distillation, opt=None)
            val_loss.append(loss_b)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
        val_f1 = competition_metric(true_labels, pred_labels)
    return val_loss, val_f1

In [None]:
train_dataset = CustomDataset(train_X, train_y, True)
val_dataset = CustomDataset(val_X, val_y, True)

train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False)

In [None]:
student_model = Student()
student_model.eval()
optimizer = torch.optim.AdamW(student_model.parameters(), lr=CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, threshold_mode='abs',min_lr=1e-8, verbose=True)

best_student_model = student_train(student_model, teacher_model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [0], Train Loss : [0.32058] Val Loss : [0.26925] Val F1 Score : [0.47748]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.27650] Val Loss : [0.27383] Val F1 Score : [0.47738]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.27251] Val Loss : [0.27017] Val F1 Score : [0.47767]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.26989] Val Loss : [0.26718] Val F1 Score : [0.48530]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.27549] Val Loss : [0.26863] Val F1 Score : [0.50170]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.27780] Val Loss : [0.26620] Val F1 Score : [0.48864]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.27454] Val Loss : [0.26398] Val F1 Score : [0.47767]
Epoch 00007: reducing learning rate of group 0 to 1.5000e-02.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.26884] Val Loss : [0.26476] Val F1 Score : [0.47767]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.26308] Val Loss : [0.26456] Val F1 Score : [0.48994]
Epoch 00009: reducing learning rate of group 0 to 7.5000e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.27195] Val Loss : [0.26306] Val F1 Score : [0.50134]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.26373] Val Loss : [0.26260] Val F1 Score : [0.49403]
Epoch 00011: reducing learning rate of group 0 to 3.7500e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.26004] Val Loss : [0.26197] Val F1 Score : [0.49791]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.26176] Val Loss : [0.26160] Val F1 Score : [0.49738]
Epoch 00013: reducing learning rate of group 0 to 1.8750e-03.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.25713] Val Loss : [0.26191] Val F1 Score : [0.50115]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.26034] Val Loss : [0.26197] Val F1 Score : [0.50134]
Epoch 00015: reducing learning rate of group 0 to 9.3750e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.26285] Val Loss : [0.26203] Val F1 Score : [0.50744]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.25871] Val Loss : [0.26168] Val F1 Score : [0.50096]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.26312] Val Loss : [0.26151] Val F1 Score : [0.50077]
Epoch 00018: reducing learning rate of group 0 to 4.6875e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.25654] Val Loss : [0.26149] Val F1 Score : [0.50466]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.25488] Val Loss : [0.26193] Val F1 Score : [0.50765]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.25473] Val Loss : [0.26209] Val F1 Score : [0.50425]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.25490] Val Loss : [0.26207] Val F1 Score : [0.50466]
Epoch 00022: reducing learning rate of group 0 to 2.3437e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.26536] Val Loss : [0.26162] Val F1 Score : [0.49720]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.25535] Val Loss : [0.26225] Val F1 Score : [0.50466]
Epoch 00024: reducing learning rate of group 0 to 1.1719e-04.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.25677] Val Loss : [0.26141] Val F1 Score : [0.50808]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.25934] Val Loss : [0.26154] Val F1 Score : [0.50486]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.25677] Val Loss : [0.26175] Val F1 Score : [0.50808]
Epoch 00027: reducing learning rate of group 0 to 5.8594e-05.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.26371] Val Loss : [0.26142] Val F1 Score : [0.50058]


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.26157] Val Loss : [0.26228] Val F1 Score : [0.50466]
Epoch 00029: reducing learning rate of group 0 to 2.9297e-05.


  0%|          | 0/45 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.25580] Val Loss : [0.26218] Val F1 Score : [0.50058]


In [None]:
def choose_threshold(model, val_loader, device):
    model.to(device)
    model.eval()
    
    thresholds = [0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
    pred_labels = []
    true_labels = []
    
    best_score = 0
    best_thr = None
    with torch.no_grad():
        for _, x_s, y in tqdm(iter(val_loader)):
            x_s = x_s.float().to(device)
            y = y.float().to(device)
            
            model_pred = model(x_s)
            
            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()
        
        for threshold in thresholds:
            pred_labels_thr = np.where(np.array(pred_labels) > threshold, 1, 0)
            score_thr = competition_metric(true_labels, pred_labels_thr)
            if best_score < score_thr:
                best_score = score_thr
                best_thr = threshold
    return best_thr, best_score

In [None]:
best_threshold, best_score = choose_threshold(best_student_model, val_loader, device)
print(f'Best Threshold : [{best_threshold}], Score : [{best_score:.5f}]')
# 0.58403
# [0.57187]
# mean[0.57812]
# 원핫인코딩 = [0.54376]
# lr를 2로 변경 = [0.55327]
# 위에 + standard스케일링 = [0.57166]
# 위에 AdamW = [0.57277] *************
# 에폭 50 = [0.56790]
# line9 + elu = [0.56184]
# line9 + median(결측치) = [0.57021] 
# mode = [0.57251]
# mean과 mode =  [0.55979]
# mode + 0 = [0.56870]
# 기존(high) + SimpleImputer = [0.57027] V (median = 0.56837)
# 기존(high) + IterativeImputer = [0.56683]
# 기존(high) + Linear = [0.56683]
# 기존(high) + KNNImputer = [0.56921] (다운)
# 기존(high) + LeakyReLU =  [0.57476] VV 내일 제출해보자!
# 기존(high) + LeakyReLU + mean + median = [0.56723]
# 위 0+mean =[0.57057]
# 에폭 32 조정해보기 + batchsize512 = [0.56892]
# 최고점 + mean + median = 0.57157
# 최고점 + mean + 0 = [0.57940]
# 위에 + 전치리 시도(mean) = [0.57114]
# ㅣr = [0.56898], batch128 lr 2e-3 = [0.56916]
# 3e-2 전처리 맨위= [0.57567] 확정
# 전처리 3개 짬뽕 + 2e-2 = [0.56938]
# 전처리 3개 짬뽕 + 3e-2 = [0.55876]
# 전처리 2개 = [0.57811]
# KNN = [0.56746]
# 3개 mean + 전처리 2개 = [0.56780]
# 전처리 2개 + SimpleImputer(mean) = [0.56647]
# 0으로 채움 = [0.57805]

  0%|          | 0/12 [00:00<?, ?it/s]

Best Threshold : [0.2], Score : [0.55186]


In [None]:
test_datasets = CustomDataset(test, None, False)
test_loaders = DataLoader(test_datasets, batch_size = CFG['BATCH_SIZE'], shuffle=False)

In [None]:
def inference(model, test_loader, threshold, device):
    model.to(device)
    model.eval()
    
    test_predict = []
    with torch.no_grad():
        for x in tqdm(test_loader):
            x = x.float().to(device)
            model_pred = model(x)

            model_pred = model_pred.squeeze(1).to('cpu')
            test_predict += model_pred
        
    test_predict = np.where(np.array(test_predict) > threshold, 1, 0)
    print('Done.')
    return test_predict

In [None]:
preds = inference(best_student_model, test_loaders, best_threshold, device)

  0%|          | 0/24 [00:00<?, ?it/s]

Done.


In [None]:
submit = pd.read_csv('/content/drive/MyDrive/sample_submission.csv')
submit['Y_LABEL'] = preds
submit.head()

Unnamed: 0,ID,Y_LABEL
0,TEST_0000,0
1,TEST_0001,0
2,TEST_0002,0
3,TEST_0003,0
4,TEST_0004,0


In [None]:
submit.to_csv('/content/drive/MyDrive/submit.csv', index=False)