In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

import os
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import random

import warnings
warnings.filterwarnings(action='ignore')

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [2]:
os.chdir("/mnt/d/데이터분석/Dacon/Classification_of_oil_condition")
project_path = os.getcwd()
project_path

'/mnt/d/데이터분석/Dacon/Classification_of_oil_condition'

## Hyperparameter Setting

In [3]:
CFG = {
    'EPOCHS':30,
    'LEARNING_RATE':1e-3,
    'BATCH_SIZE':128,
    'SEED':40
}

## Fixed RandomSeed

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED'])

## Data Load

In [35]:
train = pd.read_csv('dataset/train.csv')
test = pd.read_csv('dataset/test.csv')

In [36]:
train.head()

Unnamed: 0,ID,COMPONENT_ARBITRARY,ANONYMOUS_1,YEAR,SAMPLE_TRANSFER_DAY,ANONYMOUS_2,AG,AL,B,BA,...,U25,U20,U14,U6,U4,V,V100,V40,ZN,Y_LABEL
0,TRAIN_00000,COMPONENT3,1486,2011,7,200,0,3,93,0,...,,,,,,0,,154.0,75,0
1,TRAIN_00001,COMPONENT2,1350,2021,51,375,0,2,19,0,...,2.0,4.0,6.0,216.0,1454.0,0,,44.0,652,0
2,TRAIN_00002,COMPONENT2,2415,2015,2,200,0,110,1,1,...,0.0,3.0,39.0,11261.0,41081.0,0,,72.6,412,1
3,TRAIN_00003,COMPONENT3,7389,2010,2,200,0,8,3,0,...,,,,,,0,,133.3,7,0
4,TRAIN_00004,COMPONENT3,3954,2015,4,200,0,1,157,0,...,,,,,,0,,133.1,128,0


## Data Preprocessing

In [37]:
categorical_features = ["COMPONENT_ARBITRARY", "YEAR"]
#inference(실제 진단 환경)에 사용하는 컬럼
test_stage_features = ['COMPONENT_ARBITRARY', 'ANONYMOUS_1',  'YEAR' , 'ANONYMOUS_2', 'AG', 'CO', 'CR', 'CU', 'FE', 'H2O', 'MN', 'MO', 'NI', 'PQINDEX', 'TI', 'V', 'V40', 'ZN']
# test_stage_features = ['ANONYMOUS_1', 'ANONYMOUS_2', 'AG', 'CO', 'CR', 'CU', 'FE', 'H2O', 'MN', 'MO', 'NI', 'PQINDEX', 'TI', 'V', 'V40', 'ZN']

In [38]:
train = train.fillna(0)
test = test.fillna(0)

In [39]:
all_X = train.drop(['ID', 'Y_LABEL'], axis=1)
# all_X = train.drop(categorical_features, axis=1)
all_y = train['Y_LABEL']

test = test.drop('ID', axis=1)

train_X, val_X, train_y, val_y = train_test_split(all_X, all_y, test_size=0.2, random_state=CFG['SEED'], stratify=all_y)

In [40]:
def get_values(value):
    return value.values.reshape(-1,1)

for col in train_X.columns:
    if col not in categorical_features:
        scaler = StandardScaler()
        train_X[col] = scaler.fit_transform(get_values(train_X[col]))
        val_X[col] = scaler.transform(get_values(val_X[col]))
        if col in test.columns:
            test[col] = scaler.transform(get_values(test[col]))

le = LabelEncoder()
for col in categorical_features:
    train_X[col] = le.fit_transform(train_X[col])
    val_X[col] = le.transform(val_X[col])
    if col in test.columns:
        test[col] = le.transform(test[col])

## CustomDataset

In [41]:
class CustomDataset(Dataset):
    def __init__(self, data_X, data_y, distillation=False):
        super(CustomDataset, self).__init__()
        self.data_X = data_X
        self.data_y = data_y
        self.distillation = distillation

    def __len__(self):
        return len(self.data_X)

    def __getitem__(self, index):
        if self.distillation:
            # 지식 증류 학습 시
            teacher_X = torch.Tensor(self.data_X.iloc[index])
            student_X = torch.Tensor(self.data_X[test_stage_features].iloc[index])
            y = self.data_y.values[index]
            return teacher_X, student_X, y
        else:
            if self.data_y is None:
                test_X = torch.Tensor(self.data_X.iloc[index])
                return test_X
            else:
                teacher_X = torch.Tensor(self.data_X.iloc[index])
                y = self.data_y.values[index]
                return teacher_X, y

In [42]:
train_dataset = CustomDataset(train_X, train_y, False)
val_dataset = CustomDataset(val_X, val_y, False)

In [43]:
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)

## Define Teacher Model

In [44]:
# class Teacher(nn.Module):
#     def __init__(self):
#         super(Teacher, self).__init__()
#         self.classifier = nn.Sequential(
#             nn.Linear(in_features=52, out_features=256),
#             nn.BatchNorm1d(256),
#             nn.LeakyReLU(),
#             nn.Linear(in_features=256, out_features=1024),
#             nn.BatchNorm1d(1024),
#             nn.LeakyReLU(),
#             nn.Linear(in_features=1024, out_features=256),
#             nn.BatchNorm1d(256),
#             nn.LeakyReLU(),
#             nn.Linear(in_features=256, out_features=1),
#             nn.Sigmoid()
#         )

class Teacher(nn.Module):
    def __init__(self):
        super(Teacher, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=52, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Linear(in_features=256, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Linear(in_features=512, out_features=1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(),
            nn.Linear(in_features=1024, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Linear(in_features=512, out_features=256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Linear(in_features=256, out_features=1),
            nn.Sigmoid()
        )

    def forward(self,x):
        output = self.classifier(x)
        return output

## Teacher Train / Validation

In [45]:
def competition_metric(true, pred):
    return f1_score(true, pred, average="macro")

def validation_teacher(model, val_loader, criterion, device):
    model.eval()

    val_loss = []
    pred_labels = []
    true_labels = []
    threshold = 0.35

    with torch.no_grad():
        for X, y in tqdm(val_loader):
            X = X.float().to(device)
            y = y.float().to(device)

            model_pred = model(X.to(device))

            loss = criterion(model_pred, y.reshape(-1, 1))
            val_loss.append(loss.item())

            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()

        pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
        val_f1 = competition_metric(true_labels, pred_labels)
    return val_loss, val_f1

In [46]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)

    best_score = 0
    best_model = None
    criterion = nn.BCELoss().to(device)

    for epoch in range(CFG["EPOCHS"]):
        train_loss = []

        model.train()
        for X, y in tqdm(train_loader):
            X = X.float().to(device)
            y = y.float().to(device)

            optimizer.zero_grad()

            y_pred = model(X)

            loss = criterion(y_pred, y.reshape(-1, 1))
            loss.backward()

            optimizer.step()

            train_loss.append(loss.item())

        val_loss, val_score = validation_teacher(model, val_loader, criterion, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score:.5f}]')

        if scheduler is not None:
            scheduler.step(val_score)

        if best_score < val_score:
            best_model = model
            best_score = val_score

    return best_model

## Run (teacher Model)

In [47]:
model = Teacher()
model.eval()
optimizer = torch.optim.Adam(model.parameters(), lr=CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, threshold_mode='abs',min_lr=1e-8, verbose=True)

teacher_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [0], Train Loss : [0.24825] Val Loss : [0.24611] Val F1 Score : [0.73867]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.18557] Val Loss : [0.18744] Val F1 Score : [0.77031]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.17719] Val Loss : [0.18222] Val F1 Score : [0.76361]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.17080] Val Loss : [0.18654] Val F1 Score : [0.78127]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.16642] Val Loss : [0.18279] Val F1 Score : [0.76751]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.15949] Val Loss : [0.17777] Val F1 Score : [0.79549]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.15283] Val Loss : [0.19108] Val F1 Score : [0.77813]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.15394] Val Loss : [0.18069] Val F1 Score : [0.79174]
Epoch 00008: reducing learning rate of group 0 to 5.0000e-04.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.14169] Val Loss : [0.17555] Val F1 Score : [0.80274]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.13152] Val Loss : [0.18484] Val F1 Score : [0.78509]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.12852] Val Loss : [0.17173] Val F1 Score : [0.79698]
Epoch 00011: reducing learning rate of group 0 to 2.5000e-04.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.11878] Val Loss : [0.18206] Val F1 Score : [0.78111]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.11625] Val Loss : [0.18312] Val F1 Score : [0.79915]
Epoch 00013: reducing learning rate of group 0 to 1.2500e-04.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.10981] Val Loss : [0.18413] Val F1 Score : [0.79587]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.10782] Val Loss : [0.18857] Val F1 Score : [0.78877]
Epoch 00015: reducing learning rate of group 0 to 6.2500e-05.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.10045] Val Loss : [0.19216] Val F1 Score : [0.77566]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.09943] Val Loss : [0.22510] Val F1 Score : [0.77469]
Epoch 00017: reducing learning rate of group 0 to 3.1250e-05.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.09568] Val Loss : [0.21805] Val F1 Score : [0.73453]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.09561] Val Loss : [0.22546] Val F1 Score : [0.79520]
Epoch 00019: reducing learning rate of group 0 to 1.5625e-05.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.09441] Val Loss : [0.21064] Val F1 Score : [0.74496]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.09641] Val Loss : [0.21786] Val F1 Score : [0.77442]
Epoch 00021: reducing learning rate of group 0 to 7.8125e-06.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.09361] Val Loss : [0.19100] Val F1 Score : [0.79242]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.09149] Val Loss : [0.19722] Val F1 Score : [0.76789]
Epoch 00023: reducing learning rate of group 0 to 3.9063e-06.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.09349] Val Loss : [0.19404] Val F1 Score : [0.78795]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.09209] Val Loss : [0.19468] Val F1 Score : [0.78159]
Epoch 00025: reducing learning rate of group 0 to 1.9531e-06.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.09286] Val Loss : [0.20136] Val F1 Score : [0.78034]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.09248] Val Loss : [0.19001] Val F1 Score : [0.80108]
Epoch 00027: reducing learning rate of group 0 to 9.7656e-07.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.09192] Val Loss : [0.19458] Val F1 Score : [0.78982]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.09349] Val Loss : [0.20400] Val F1 Score : [0.76501]
Epoch 00029: reducing learning rate of group 0 to 4.8828e-07.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.09389] Val Loss : [0.19543] Val F1 Score : [0.78800]


## Define Student Model

In [48]:
# class Student(nn.Module):
#     def __init__(self):
#         super(Student, self).__init__()
#         self.classifier = nn.Sequential(
#             nn.Linear(in_features=18, out_features=128),
#             nn.BatchNorm1d(128),
#             nn.LeakyReLU(),
#             nn.Linear(in_features=128, out_features=512),
#             nn.BatchNorm1d(512),
#             nn.LeakyReLU(),
#             nn.Linear(in_features=512, out_features=128),
#             nn.BatchNorm1d(128),
#             nn.LeakyReLU(),
#             nn.Linear(in_features=128, out_features=1),
#             nn.Sigmoid()
#         )
#
#     def forward(self, x):
#         output = self.classifier(x)
#         return output

class Student(nn.Module):
    def __init__(self):
        super(Student, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(in_features=18, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Linear(in_features=512, out_features=128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(in_features=128, out_features=1),
            nn.Sigmoid()
        )


    def forward(self,x):
        output = self.classifier(x)
        return output

In [49]:
def distillation(student_logits, labels, teacher_logits, alpha):
    distillation_loss = nn.BCELoss()(student_logits, teacher_logits)
    student_loss = nn.BCELoss()(student_logits, labels.reshape(-1, 1))
    return alpha * student_loss + (1-alpha) * distillation_loss

In [50]:
def distill_loss(output, target, teacher_output, loss_fn=distillation, opt=optimizer):
    loss_b = loss_fn(output, target, teacher_output, alpha=0.1)

    if opt is not None:
        opt.zero_grad()
        loss_b.backward()
        opt.step()

    return loss_b.item()

In [51]:
def validation_student(s_model, t_model, val_loader, criterion, device):
    s_model.eval()
    t_model.eval()

    val_loss = []
    pred_labels = []
    true_labels = []
    threshold = 0.35

    with torch.no_grad():
        for X_t, X_s, y in tqdm(val_loader):
            X_t = X_t.float().to(device)
            X_s = X_s.float().to(device)
            y = y.float().to(device)

            model_pred = s_model(X_s)
            teacher_output = t_model(X_t)

            loss_b = distill_loss(model_pred, y, teacher_output, loss_fn=distillation, opt=None)
            val_loss.append(loss_b)

            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()

        pred_labels = np.where(np.array(pred_labels) > threshold, 1, 0)
        val_f1 = competition_metric(true_labels, pred_labels)
    return val_loss, val_f1

def student_train(s_model, t_model, optimizer, train_loader, val_loader, scheduler, device):
    s_model.to(device)
    t_model.to(device)

    best_score = 0
    best_model = None

    for epoch in range(CFG["EPOCHS"]):
        train_loss = []
        s_model.train()
        t_model.eval()

        for X_t, X_s, y in tqdm(train_loader):
            X_t = X_t.float().to(device)
            X_s = X_s.float().to(device)
            y = y.float().to(device)

            optimizer.zero_grad()

            output = s_model(X_s)
            with torch.no_grad():
                teacher_output = t_model(X_t)

            loss_b = distill_loss(output, y, teacher_output, loss_fn=distillation, opt=optimizer)

            train_loss.append(loss_b)

        val_loss, val_score = validation_student(s_model, t_model, val_loader, distill_loss, device)
        print(f'Epoch [{epoch}], Train Loss : [{np.mean(train_loss) :.5f}] Val Loss : [{np.mean(val_loss) :.5f}] Val F1 Score : [{val_score:.5f}]')

        if scheduler is not None:
            scheduler.step(val_score)

        if best_score < val_score:
            best_model = s_model
            best_score = val_score

    return best_model

In [52]:
train_dataset = CustomDataset(train_X, train_y, True)
val_dataset = CustomDataset(val_X, val_y, True)

train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False)

In [53]:
student_model = Student()
student_model.eval()
optimizer = torch.optim.Adam(student_model.parameters(), lr=CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=1, threshold_mode='abs',min_lr=1e-8, verbose=True)

best_student_model = student_train(student_model, teacher_model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [0], Train Loss : [0.34682] Val Loss : [0.32669] Val F1 Score : [0.49873]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.30465] Val Loss : [0.32738] Val F1 Score : [0.50275]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.30297] Val Loss : [0.32556] Val F1 Score : [0.49403]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.30235] Val Loss : [0.32178] Val F1 Score : [0.50589]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.29895] Val Loss : [0.32290] Val F1 Score : [0.50597]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.29628] Val Loss : [0.31992] Val F1 Score : [0.50702]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.30158] Val Loss : [0.31976] Val F1 Score : [0.51764]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.29601] Val Loss : [0.31752] Val F1 Score : [0.51593]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.29294] Val Loss : [0.32245] Val F1 Score : [0.52405]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.29259] Val Loss : [0.31827] Val F1 Score : [0.51399]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.29134] Val Loss : [0.31977] Val F1 Score : [0.52326]
Epoch 00011: reducing learning rate of group 0 to 5.0000e-04.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [11], Train Loss : [0.28662] Val Loss : [0.32380] Val F1 Score : [0.53350]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [12], Train Loss : [0.28568] Val Loss : [0.32075] Val F1 Score : [0.53089]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [13], Train Loss : [0.28814] Val Loss : [0.32367] Val F1 Score : [0.53350]
Epoch 00014: reducing learning rate of group 0 to 2.5000e-04.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [14], Train Loss : [0.28643] Val Loss : [0.32277] Val F1 Score : [0.52352]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [15], Train Loss : [0.28257] Val Loss : [0.32162] Val F1 Score : [0.52659]
Epoch 00016: reducing learning rate of group 0 to 1.2500e-04.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [16], Train Loss : [0.28243] Val Loss : [0.31943] Val F1 Score : [0.54259]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [17], Train Loss : [0.27894] Val Loss : [0.31925] Val F1 Score : [0.52605]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [18], Train Loss : [0.28161] Val Loss : [0.31934] Val F1 Score : [0.54165]
Epoch 00019: reducing learning rate of group 0 to 6.2500e-05.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [19], Train Loss : [0.27727] Val Loss : [0.31987] Val F1 Score : [0.53409]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [20], Train Loss : [0.27706] Val Loss : [0.31882] Val F1 Score : [0.54010]
Epoch 00021: reducing learning rate of group 0 to 3.1250e-05.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [21], Train Loss : [0.27801] Val Loss : [0.32028] Val F1 Score : [0.52906]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [22], Train Loss : [0.27852] Val Loss : [0.31858] Val F1 Score : [0.53118]
Epoch 00023: reducing learning rate of group 0 to 1.5625e-05.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [23], Train Loss : [0.27887] Val Loss : [0.32102] Val F1 Score : [0.52659]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [24], Train Loss : [0.27659] Val Loss : [0.32158] Val F1 Score : [0.52906]
Epoch 00025: reducing learning rate of group 0 to 7.8125e-06.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [25], Train Loss : [0.27735] Val Loss : [0.32232] Val F1 Score : [0.53488]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [26], Train Loss : [0.27707] Val Loss : [0.31681] Val F1 Score : [0.52823]
Epoch 00027: reducing learning rate of group 0 to 3.9063e-06.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [27], Train Loss : [0.27822] Val Loss : [0.32344] Val F1 Score : [0.52686]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [28], Train Loss : [0.27563] Val Loss : [0.32341] Val F1 Score : [0.52632]
Epoch 00029: reducing learning rate of group 0 to 1.9531e-06.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

Epoch [29], Train Loss : [0.27831] Val Loss : [0.31850] Val F1 Score : [0.53706]


## Choose Inference Threshold

In [54]:
def choose_threshold(model, val_loader, device):
    model.to(device)
    model.eval()

    thresholds = [0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
    pred_labels = []
    true_labels = []

    best_score = 0
    best_thr = None
    with torch.no_grad():
        for _, x_s, y in tqdm(iter(val_loader)):
            x_s = x_s.float().to(device)
            y = y.float().to(device)

            model_pred = model(x_s)

            model_pred = model_pred.squeeze(1).to('cpu')
            pred_labels += model_pred.tolist()
            true_labels += y.tolist()

        for threshold in thresholds:
            pred_labels_thr = np.where(np.array(pred_labels) > threshold, 1, 0)
            score_thr = competition_metric(true_labels, pred_labels_thr)
            if best_score < score_thr:
                best_score = score_thr
                best_thr = threshold
    return best_thr, best_score

In [55]:
best_threshold, best_score = choose_threshold(best_student_model, val_loader, device)
print(f'Best Threshold : [{best_threshold}], Score : [{best_score:.5f}]')

  0%|          | 0/23 [00:00<?, ?it/s]

Best Threshold : [0.25], Score : [0.57426]


In [56]:
test_datasets = CustomDataset(test, None, False)
test_loaders = DataLoader(test_datasets, batch_size = CFG['BATCH_SIZE'], shuffle=False)

In [57]:
def inference(model, test_loader, threshold, device):
    model.to(device)
    model.eval()

    test_predict = []
    with torch.no_grad():
        for x in tqdm(test_loader):
            x = x.float().to(device)
            model_pred = model(x)

            model_pred = model_pred.squeeze(1).to('cpu')
            test_predict += model_pred

    test_predict = np.where(np.array(test_predict) > threshold, 1, 0)
    print('Done.')
    return test_predict

In [58]:
preds = inference(best_student_model, test_loaders, best_threshold, device)

  0%|          | 0/48 [00:00<?, ?it/s]

Done.


In [59]:
submit = pd.read_csv('dataset/sample_submission.csv')
submit['Y_LABEL'] = preds
submit.head()

Unnamed: 0,ID,Y_LABEL
0,TEST_0000,0
1,TEST_0001,0
2,TEST_0002,0
3,TEST_0003,0
4,TEST_0004,0


In [60]:
submit.to_csv('dataset/submit.csv', index=False)