In [1]:
#.venv\Scripts\activate
#!pip install pandas
#!pip install scikit-learn
#!pip install wandb
#!pip install torch
#!pip install numpy==1.24.4
#!pip install matplotlib

## 항공사 고객 만족도 예측 경진대회 베이스라인
이 대회는 성별, 이동 거리, 이륙 딜레이 시간 등으로 부터 고객 만족 여부를 예측하는 대회입니다.

언뜻보면 간단해 보이지만 처음 데이터 분석을 접하시는 분들에게는 어디부터 어떻게 시작해야 하는지 막막하실 겁니다.

## 데이터 불러오기
먼저 분석하려는 데이터를 작업장으로 가져오는 작업이 필요합니다.

이를 위해서 파이썬 라이브러리 중 하나인 Pandas를 이용합니다.

pandas 라이브러리는 엑셀과 같은 행과 열로 이루어진 테이블(table) 형식의 데이터를 편하게 다루도록 해주는 라이브러리입니다.

데이터 분석에는 반드시 사용된다고 해도 과언이 아닌 라이브러리이니, 시간이 날때마다 공부해둡시다.

In [2]:
import pandas as pd

train = pd.read_csv(r"train_final.csv")

print(f'train set은 {train.shape[1]} 개의 feature를 가진 {train.shape[0]} 개의 데이터 샘플로 이루어져 있습니다.')

train.head()

train set은 24 개의 feature를 가진 2999 개의 데이터 샘플로 이루어져 있습니다.


Unnamed: 0,id,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Seat comfort,Departure/Arrival time convenient,Food and drink,...,Ease of Online booking,On-board service,Leg room service,Baggage handling,Checkin service,Cleanliness,Online boarding,Departure Delay in Minutes,Arrival Delay in Minutes,target
0,1,Male,Loyal Customer,72,Business travel,Business,1784,1,1,4,...,2,2,3,2,5,2,0,0,0,1
1,2,Male,Loyal Customer,35,Business travel,Business,2228,2,5,5,...,2,2,2,2,4,2,0,0,8,0
2,3,Male,disloyal Customer,37,Business travel,Eco,1970,4,3,4,...,1,3,2,5,1,4,0,1,2,1
3,4,Female,Loyal Customer,20,Personal Travel,Eco,2249,5,5,1,...,1,4,1,4,1,5,0,0,0,1
4,5,Male,disloyal Customer,38,Business travel,Eco,1995,2,2,2,...,5,2,5,3,1,2,0,110,102,0


데이터를 자세히 살펴보는 것은 EDA 글을 통해 알아보는 것으로 하겠습니다.

여기에서는 분석 방법에 초점을 맞추겠습니다.

## 결측치 확인

결측치(NA: Not Available)란 값이 누락된 데이터를 말합니다.

보다 정확한 분석을 하기 위해서는 데이터의 결측치를 확인하고 적절히 처리해주어야 합니다.

이번 데이터에 결측치가 있나 확인해볼까요?

In [3]:
def check_missing_col(dataframe):
    missing_col = []
    for col in dataframe.columns:
        missing_values = sum(dataframe[col].isna())
        is_missing = True if missing_values >= 1 else False
        if is_missing:
            print(f'결측치가 있는 컬럼은: {col} 입니다')
            print(f'해당 컬럼에 총 {missing_values} 개의 결측치가 존재합니다.')
            missing_col.append([col, dataframe[col].dtype])
    if missing_col == []:
        print('결측치가 존재하지 않습니다')
    return missing_col

missing_col = check_missing_col(train)

결측치가 존재하지 않습니다


이번 데이터에는 결측치가 존재하지 않네요.

만약 결측치가 존재한다면 결측치 삭제, 평균 보간법, 최빈값 보간법 등 다양한 방식으로 처리할 수 있습니다

## 데이터 전처리 

### 라벨 인코딩

데이터를 모델에 투입하기 전 몇 가지 전처리 과정이 필요합니다. 

우선은 카테고리 형식의 features 들을 바꿔주어야 합니다. 

본 데이터에서는 'Gender', 'Customer Type','Type of Travel', 'Class' 의 카테고리 형식의 feature 들이 있습니다. 

이러한 데이터의 라벨을 숫자로 바꾸어주는 전처리를 라벨 인코딩 (label encoding) 이라고 합니다.

본격적으로 데이터 전처리를 하기 전에 id 열을 제거해주고, data 와 target 을 나누어줍니다.

In [4]:
train_x = train.drop(["id","target"],axis=1) 
train_y = train.target 

#라벨인코딩을 하기 위함 dictionary map 생성 함수
def make_label_map(dataframe):
    label_maps = {}
    for col in dataframe.columns:
        if dataframe[col].dtype=='object':
            label_map = {'unknown':0}
            for i, key in enumerate(dataframe[col].unique()): #enumerate는 인덱스와 함께 고유값 반환, enumerate(['a', 'b', 'c'])는 (0, 'a'), (1, 'b'), (2, 'c')와 같은 튜플을 반환
                label_map[key] = i+1  
            label_maps[col] = label_map
    return label_maps

# 각 범주형 변수에 인코딩 값을 부여하는 함수
def label_encoder(dataframe, label_map):
    for col in dataframe.columns:
        if dataframe[col].dtype=='object':
            dataframe[col] = dataframe[col].map(label_map[col])
            dataframe[col] = dataframe[col].fillna(label_map[col]['unknown']) 
    return dataframe

# train 데이터 라벨 인코딩
label_map = make_label_map(train_x) # train 사용해 label map 생성
train_x = label_encoder(train_x, label_map) # train 라벨 인코딩

train_x.head()

Unnamed: 0,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Seat comfort,Departure/Arrival time convenient,Food and drink,Gate location,...,Online support,Ease of Online booking,On-board service,Leg room service,Baggage handling,Checkin service,Cleanliness,Online boarding,Departure Delay in Minutes,Arrival Delay in Minutes
0,1,1,72,1,1,1784,1,1,4,1,...,5,2,2,3,2,5,2,0,0,0
1,1,1,35,1,1,2228,2,5,5,5,...,2,2,2,2,2,4,2,0,0,8
2,1,2,37,1,2,1970,4,3,4,1,...,1,1,3,2,5,1,4,0,1,2
3,2,1,20,2,2,2249,5,5,1,3,...,1,1,4,1,4,1,5,0,0,0
4,1,2,38,1,2,1995,2,2,2,2,...,5,5,2,5,3,1,2,0,110,102


라벨 인코딩 후 원래 문자열 형식의 데이터가 숫자로 표현되어 있음을 볼 수 있습니다.

## 정규화 

수치형 데이터들을 정규화 시켜줍니다.

머신러닝 과정에서 모델은 데이터의 특성(feature)들을 추출해 학습을 진행합니다.

하지만 학습을 하는 과정에서 데이터의 값이 너무 크거나, 분산이 너무 크면 학습 과정에 악영향을 끼칠 수 있습니다.

따라서 정규화를 통해 데이터 값의 크기를 줄이고 분산을 줄여 모델이 데이터를 이상하게 해석하는 것을 방지합니다.

이번 베이스라인에서는 min-max 정규화를 이용해 봅시다.

min-max 정규화는 수치형 데이터의 값을 0~1 사이의 값으로 변환해줍니다.

min-max 정규화의 수식은 아래와 같습니다.

X' = (X - MIN) / (MAX-MIN)

In [5]:
from sklearn.preprocessing import MinMaxScaler

num_features = ['Age','Flight Distance','Departure Delay in Minutes','Arrival Delay in Minutes']

scaler = MinMaxScaler()
train_x[num_features] = scaler.fit_transform(train_x[num_features]) 
train_x.head()

Unnamed: 0,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Seat comfort,Departure/Arrival time convenient,Food and drink,Gate location,...,Online support,Ease of Online booking,On-board service,Leg room service,Baggage handling,Checkin service,Cleanliness,Online boarding,Departure Delay in Minutes,Arrival Delay in Minutes
0,1,1,0.890411,1,1,0.264656,1,1,4,1,...,5,2,2,3,2,5,2,0,0.0,0.0
1,1,1,0.383562,1,1,0.332619,2,5,5,5,...,2,2,2,2,2,4,2,0,0.0,0.017021
2,1,2,0.410959,1,2,0.293127,4,3,4,1,...,1,1,3,2,5,1,4,0,0.002299,0.004255
3,2,1,0.178082,2,2,0.335833,5,5,1,3,...,1,1,4,1,4,1,5,0,0.0,0.0
4,1,2,0.424658,1,2,0.296954,2,2,2,2,...,5,5,2,5,3,1,2,0,0.252874,0.217021


정규화 후 수치형 데이터들의 값이 0과 1 사이의 값으로 변환 되었음을 볼 수 있습니다.

## 모델 학습

모델을 설계했으니 이제 모델을 학습하면 됩니다.

객체에 Logistic Regression 클래스를 할당 후, 클래스 내 fit 메소드를 이용해 학습을 진행하면 됩니다.

In [6]:
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, roc_curve, auc
import matplotlib.pyplot as plt

# GPU 사용이 가능한지 확인
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sweep_config = {
    'method': 'grid',  # 또는 'random' 혹은 'bayes'로 변경 가능
    'metric': {
        'name': 'train_loss',  # 주로 모니터링할 메트릭
        'goal': 'minimize'  # 'maximize'로 변경 가능
    },
    'parameters': {
        'epochs': {
            'values': [200]  # 에포크 수를 탐색할 값들로 정의
        },
        'learning_rate': {
            'values': [0.5, 0.1, 0.05]  # 학습률을 탐색할 값들로 정의
        },
        'weight_decay': {
            'values': [0.0005, 0.0001]  # weight_decay 값을 탐색할 값들로 정의
        },
        'architecture': {
            'values': ['LogisticRegression', 'RandomForest']  # 모델 아키텍처를 탐색할 값들로 정의
        },
        'seed': {
            'value': 42  # 고정된 시드 값 (변경하지 않음)
        }
    }
}

In [7]:
# torch 기반 Logistic Regression 모델 정의
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
    
    def forward(self, x):
        return self.linear(x)

class RandomForestModel(nn.Module):
    def __init__(self, input_dim, num_trees=10, max_depth=5):
        super(RandomForestModel, self).__init__()
        self.num_trees = num_trees
        self.trees = nn.ModuleList([self._build_tree(input_dim, max_depth) for _ in range(num_trees)])
    
    def _build_tree(self, input_dim, max_depth):
        # 간단한 트리 구조를 생성 (이 예제에서는 트리를 Linear 레이어로 대체)
        layers = []
        for _ in range(max_depth):
            layers.append(nn.Linear(input_dim, input_dim))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(input_dim, 1))  # 최종 레이어는 1개의 출력 노드를 가짐
        return nn.Sequential(*layers)
    
    def forward(self, x):
        # 모든 트리의 출력을 평균화
        tree_outputs = [tree(x) for tree in self.trees]
        avg_output = torch.mean(torch.stack(tree_outputs), dim=0)
        return avg_output

class XGBoostLikeModel(nn.Module):
    def __init__(self, input_dim, num_trees=10, max_depth=5, learning_rate=0.1):
        super(XGBoostLikeModel, self).__init__()
        self.num_trees = num_trees
        self.learning_rate = learning_rate
        self.trees = nn.ModuleList([self._build_tree(input_dim, max_depth) for _ in range(num_trees)])
    
    def _build_tree(self, input_dim, max_depth):
        # 간단한 트리 구조를 생성 (이 예제에서는 트리를 Linear 레이어로 대체)
        layers = []
        for _ in range(max_depth):
            layers.append(nn.Linear(input_dim, input_dim))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(input_dim, 1))  # 최종 레이어는 1개의 출력 노드를 가짐
        return nn.Sequential(*layers)
    
    def forward(self, x):
        # 초기 예측값 (예: 0 또는 평균값)
        output = torch.zeros_like(x[:, :1])  # 초기값을 0으로 설정 (mean value를 사용할 수도 있음)
        
        # 각 트리의 예측을 적용하여 점진적으로 결과를 갱신
        for tree in self.trees:
            residual = x - output  # 남은 오류 (잔차)
            tree_output = tree(residual)  # 트리의 예측
            output = output + self.learning_rate * tree_output  # 트리의 예측을 학습률과 곱하여 누적
        
        return output

In [8]:
# 전역 변수로 설정
best_overall_accuracy = 0.0
best_overall_hyperparameters = None

# Sweep 실행 후 모든 실험이 완료된 후, 최고 성능 모델과 하이퍼파라미터 저장
def save_best_hyperparameters(best_overall_hyperparameters):
    with open("best_hyperparameters.txt", "w") as f:
        for key, value in best_overall_hyperparameters.items():
            f.write(f"{key}: {value}\n")
    print(f"Best hyperparameters saved to best_hyperparameters.txt with accuracy: {best_overall_accuracy:.4f}")

In [9]:
# 범주형 변수 인코딩
train_x_encoded = pd.get_dummies(train_x)

In [10]:
# 학습 함수
def train():
    global best_overall_accuracy, best_overall_hyperparameters  # 전역 변수를 참조하도록 설정

    # WandB 초기화
    with wandb.init() as run:
        config = run.config
        X_train, X_val, y_train, y_val = train_test_split(train_x_encoded, train_y, test_size=0.2, random_state=config.seed)

        X_train = torch.tensor(X_train.values, dtype=torch.float32)
        y_train = torch.tensor(y_train.values, dtype=torch.float32)
        X_val = torch.tensor(X_val.values, dtype=torch.float32)
        y_val = torch.tensor(y_val.values, dtype=torch.float32)

        # DataLoader 설정
        #train_dataset = TensorDataset(X_train, y_train)
        #train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
        
        # 모델 초기화
        input_dim = X_train.shape[1]
        if config.architecture == 'LogisticRegression':
            model = LogisticRegressionModel(input_dim)
        elif config.architecture == 'RandomForest':
            model = RandomForestModel(input_dim)
        elif config.architecture == 'XGBoostLike':
            model = XGBoostLikeModel(input_dim)

        # 손실함수 및 옵티마이저 정의
        criterion = nn.BCEWithLogitsLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)

        best_val_accuracy = 0.0  # 최고 검증 정확도 초기화
        best_epoch = 0  # 최고 성능을 기록한 에포크 초기화

        # 학습 과정
        for epoch in range(config.epochs):
            model.train()
            optimizer.zero_grad()
            outputs = model(X_train)
            loss = criterion(outputs.squeeze(), y_train)
            loss.backward()
            optimizer.step()

            # 검증 과정
            model.eval()
            with torch.no_grad():
                val_outputs = model(X_val)
                val_outputs = torch.sigmoid(val_outputs)
                val_predictions = (val_outputs.squeeze() > 0.5).float()
                val_accuracy = accuracy_score(y_val.numpy(), val_predictions.numpy())

                # 최고 검증 정확도 업데이트
                if val_accuracy > best_val_accuracy:
                    best_val_accuracy = val_accuracy
                    best_epoch = epoch + 1

                # ROC Curve 계산
                fpr, tpr, _ = roc_curve(y_val.numpy(), val_outputs.numpy())
                roc_auc = auc(fpr, tpr)

                # ROC Curve 플롯 생성
                plt.figure()
                plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
                plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
                plt.xlim([0.0, 1.0])
                plt.ylim([0.0, 1.05])
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title('Receiver Operating Characteristic')
                plt.legend(loc="lower right")

                # ROC Curve를 WandB에 이미지로 로깅
                wandb.log({"train_loss": loss.item(), "val_accuracy": val_accuracy, "roc_auc": roc_auc, 
                           "roc_curve": wandb.Image(plt), "best_val_accuracy": best_val_accuracy, 
                           "best_epoch": best_epoch}, step=epoch)
                plt.close()

            print(f"Epoch {epoch+1}/{config.epochs}, Loss: {loss.item():.4f}, Validation Accuracy: {val_accuracy:.4f}, AUC: {roc_auc:.4f}")
        
        # 최고 성능의 하이퍼파라미터를 추적하여 저장
        if best_val_accuracy > best_overall_accuracy:
            best_overall_accuracy = best_val_accuracy
            best_overall_hyperparameters = config
            torch.save(model.state_dict(), "best_overall_model.pth")  # 모델 가중치 저장

        

        # 최종적으로 최고 성능을 기록한 에포크 시각화
        wandb.log({"Final Best Val Accuracy": best_val_accuracy, "Final Best Epoch": best_epoch})

sweep_id = wandb.sweep(sweep_config)
wandb.agent(sweep_id, function=train)

# 모든 실험이 끝난 후 최종 하이퍼파라미터 저장
save_best_hyperparameters(best_overall_hyperparameters)

# 학습 실행
#train(model, X_train, y_train, criterion, optimizer, wandb.config)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: xrxgo72j
Sweep URL: https://wandb.ai/zamtol/uncategorized/sweeps/xrxgo72j


[34m[1mwandb[0m: Agent Starting Run: ajhrjy65 with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.5
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkkang15634[0m ([33mzamtol[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/200, Loss: 1.4158, Validation Accuracy: 0.5300, AUC: 0.5236
Epoch 2/200, Loss: 9.9642, Validation Accuracy: 0.5300, AUC: 0.5181
Epoch 3/200, Loss: 10.2530, Validation Accuracy: 0.5300, AUC: 0.7878
Epoch 4/200, Loss: 6.6677, Validation Accuracy: 0.5417, AUC: 0.8567
Epoch 5/200, Loss: 1.1462, Validation Accuracy: 0.4700, AUC: 0.4303
Epoch 6/200, Loss: 7.2999, Validation Accuracy: 0.4700, AUC: 0.4145
Epoch 7/200, Loss: 9.6468, Validation Accuracy: 0.4700, AUC: 0.4911
Epoch 8/200, Loss: 7.5726, Validation Accuracy: 0.4733, AUC: 0.7031
Epoch 9/200, Loss: 2.5389, Validation Accuracy: 0.5450, AUC: 0.8776
Epoch 10/200, Loss: 2.1191, Validation Accuracy: 0.5300, AUC: 0.8798
Epoch 11/200, Loss: 4.8489, Validation Accuracy: 0.5300, AUC: 0.8441
Epoch 12/200, Loss: 5.8863, Validation Accuracy: 0.5300, AUC: 0.8623
Epoch 13/200, Loss: 5.4835, Validation Accuracy: 0.5367, AUC: 0.8828
Epoch 14/200, Loss: 3.8681, Validation Accuracy: 0.5900, AUC: 0.8690
Epoch 15/200, Loss: 1.4223, Validation Acc

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▂▃▃▄▄▅▅▅▇▇████████████████████████████
best_val_accuracy,▁▁▁▄▆▇▇▇████████████████████████████████
roc_auc,▂▁▇▆█▇██████████████████████████████████
train_loss,█▆▅▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▂▁▂▂▃▄▆▆████████████████████████████████

0,1
Final Best Epoch,59.0
Final Best Val Accuracy,0.83
best_epoch,59.0
best_val_accuracy,0.83
roc_auc,0.90898
train_loss,0.35407
val_accuracy,0.82167


[34m[1mwandb[0m: Agent Starting Run: l5b85luo with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.5
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.7146, Validation Accuracy: 0.5300, AUC: 0.5126
Epoch 2/200, Loss: 10.7189, Validation Accuracy: 0.5300, AUC: 0.7788
Epoch 3/200, Loss: 6.3561, Validation Accuracy: 0.4950, AUC: 0.6993
Epoch 4/200, Loss: 0.9437, Validation Accuracy: 0.4700, AUC: 0.5905
Epoch 5/200, Loss: 2.7257, Validation Accuracy: 0.6083, AUC: 0.8587
Epoch 6/200, Loss: 0.6548, Validation Accuracy: 0.5417, AUC: 0.8749
Epoch 7/200, Loss: 1.3972, Validation Accuracy: 0.6783, AUC: 0.8665
Epoch 8/200, Loss: 0.5685, Validation Accuracy: 0.5167, AUC: 0.8082
Epoch 9/200, Loss: 1.3046, Validation Accuracy: 0.6933, AUC: 0.8452
Epoch 10/200, Loss: 0.6540, Validation Accuracy: 0.6200, AUC: 0.8818
Epoch 11/200, Loss: 0.8142, Validation Accuracy: 0.5967, AUC: 0.8859
Epoch 12/200, Loss: 1.0487, Validation Accuracy: 0.7517, AUC: 0.8800
Epoch 13/200, Loss: 0.5121, Validation Accuracy: 0.6850, AUC: 0.8593
Epoch 14/200, Loss: 0.7592, Validation Accuracy: 0.6833, AUC: 0.8629
Epoch 15/200, Loss: 0.7782, Validation Acc

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▂▂▂▃▃▄▄▄▄▄▄▄▄▇▇▇██████████████████████
best_val_accuracy,▁▃▆▇████████████████████████████████████
roc_auc,▁▆▆▇▇▇▇▇▇█▇▇████████████████████████████
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▆▅▆▇██████████████████████████████████

0,1
Final Best Epoch,92.0
Final Best Val Accuracy,0.83167
best_epoch,92.0
best_val_accuracy,0.83167
roc_auc,0.90947
train_loss,0.35263
val_accuracy,0.82833


[34m[1mwandb[0m: Agent Starting Run: 54z1grm3 with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 1.2394, Validation Accuracy: 0.5300, AUC: 0.6190
Epoch 2/200, Loss: 1.6156, Validation Accuracy: 0.5300, AUC: 0.6527
Epoch 3/200, Loss: 1.5992, Validation Accuracy: 0.5300, AUC: 0.5916
Epoch 4/200, Loss: 0.9444, Validation Accuracy: 0.4683, AUC: 0.4007
Epoch 5/200, Loss: 0.8436, Validation Accuracy: 0.4683, AUC: 0.3682
Epoch 6/200, Loss: 1.2089, Validation Accuracy: 0.4733, AUC: 0.4617
Epoch 7/200, Loss: 0.9596, Validation Accuracy: 0.6000, AUC: 0.6517
Epoch 8/200, Loss: 0.6460, Validation Accuracy: 0.5300, AUC: 0.7621
Epoch 9/200, Loss: 0.7852, Validation Accuracy: 0.5283, AUC: 0.7954
Epoch 10/200, Loss: 0.9161, Validation Accuracy: 0.5317, AUC: 0.8073
Epoch 11/200, Loss: 0.8261, Validation Accuracy: 0.5833, AUC: 0.8025
Epoch 12/200, Loss: 0.6293, Validation Accuracy: 0.7017, AUC: 0.7758
Epoch 13/200, Loss: 0.5854, Validation Accuracy: 0.5783, AUC: 0.7479
Epoch 14/200, Loss: 0.7122, Validation Accuracy: 0.5817, AUC: 0.7620
Epoch 15/200, Loss: 0.7115, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅██████
best_val_accuracy,▁▁▅▅▇▇▇▇████████████████████████████████
roc_auc,▄▁▆▇▇███████████████████████████████████
train_loss,█▆▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▂▁▅▆▆▆▇▇████████████████████████████████

0,1
Final Best Epoch,176.0
Final Best Val Accuracy,0.83167
best_epoch,176.0
best_val_accuracy,0.83167
roc_auc,0.90769
train_loss,0.35733
val_accuracy,0.83


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5j624rvj with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.9654, Validation Accuracy: 0.5300, AUC: 0.7334
Epoch 2/200, Loss: 1.7463, Validation Accuracy: 0.5300, AUC: 0.7508
Epoch 3/200, Loss: 1.4990, Validation Accuracy: 0.5317, AUC: 0.7081
Epoch 4/200, Loss: 0.7490, Validation Accuracy: 0.4683, AUC: 0.5014
Epoch 5/200, Loss: 1.0202, Validation Accuracy: 0.4700, AUC: 0.5005
Epoch 6/200, Loss: 1.2184, Validation Accuracy: 0.5050, AUC: 0.6442
Epoch 7/200, Loss: 0.7958, Validation Accuracy: 0.5783, AUC: 0.7941
Epoch 8/200, Loss: 0.6148, Validation Accuracy: 0.5300, AUC: 0.8351
Epoch 9/200, Loss: 0.8427, Validation Accuracy: 0.5300, AUC: 0.8487
Epoch 10/200, Loss: 0.9170, Validation Accuracy: 0.5367, AUC: 0.8545
Epoch 11/200, Loss: 0.7624, Validation Accuracy: 0.6317, AUC: 0.8472
Epoch 12/200, Loss: 0.5612, Validation Accuracy: 0.7050, AUC: 0.8175
Epoch 13/200, Loss: 0.5873, Validation Accuracy: 0.5750, AUC: 0.7970
Epoch 14/200, Loss: 0.7249, Validation Accuracy: 0.6050, AUC: 0.8129
Epoch 15/200, Loss: 0.6800, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▂▂▂▃▃▃▃▄▄▄▄▄▄▆▆▆▇▇▇███████████████████
best_val_accuracy,▁▁▅▆▇▇▇█████████████████████████████████
roc_auc,▄▁▆▇▇█▇█████████████████████████████████
train_loss,█▅▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▂▁▅▆▆▇▇▇████████████████████████████████

0,1
Final Best Epoch,103.0
Final Best Val Accuracy,0.83167
best_epoch,103.0
best_val_accuracy,0.83167
roc_auc,0.90729
train_loss,0.3577
val_accuracy,0.82833


[34m[1mwandb[0m: Agent Starting Run: qmjqnp7g with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.7610, Validation Accuracy: 0.5300, AUC: 0.6715
Epoch 2/200, Loss: 1.0901, Validation Accuracy: 0.5600, AUC: 0.6386
Epoch 3/200, Loss: 0.7480, Validation Accuracy: 0.4850, AUC: 0.5513
Epoch 4/200, Loss: 0.7936, Validation Accuracy: 0.4700, AUC: 0.5621
Epoch 5/200, Loss: 0.8481, Validation Accuracy: 0.5467, AUC: 0.6465
Epoch 6/200, Loss: 0.6939, Validation Accuracy: 0.6450, AUC: 0.7404
Epoch 7/200, Loss: 0.6331, Validation Accuracy: 0.5700, AUC: 0.7936
Epoch 8/200, Loss: 0.6905, Validation Accuracy: 0.5700, AUC: 0.8189
Epoch 9/200, Loss: 0.6978, Validation Accuracy: 0.6083, AUC: 0.8292
Epoch 10/200, Loss: 0.6303, Validation Accuracy: 0.7350, AUC: 0.8272
Epoch 11/200, Loss: 0.5671, Validation Accuracy: 0.7500, AUC: 0.8188
Epoch 12/200, Loss: 0.5751, Validation Accuracy: 0.6633, AUC: 0.8167
Epoch 13/200, Loss: 0.6043, Validation Accuracy: 0.7100, AUC: 0.8285
Epoch 14/200, Loss: 0.5817, Validation Accuracy: 0.7767, AUC: 0.8461
Epoch 15/200, Loss: 0.5338, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅█████
best_val_accuracy,▁▃▆▇▇▇▇▇████████████████████████████████
roc_auc,▁▄▆▇▇▇▇▇████████████████████████████████
train_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▃▄▅▇▆▇▇████████████████████████████████

0,1
Final Best Epoch,179.0
Final Best Val Accuracy,0.83
best_epoch,179.0
best_val_accuracy,0.83
roc_auc,0.90671
train_loss,0.35967
val_accuracy,0.82833


[34m[1mwandb[0m: Agent Starting Run: v3i1vm83 with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.7308, Validation Accuracy: 0.4817, AUC: 0.5413
Epoch 2/200, Loss: 0.7366, Validation Accuracy: 0.5350, AUC: 0.7097
Epoch 3/200, Loss: 0.7028, Validation Accuracy: 0.5350, AUC: 0.7550
Epoch 4/200, Loss: 0.6810, Validation Accuracy: 0.6517, AUC: 0.7607
Epoch 5/200, Loss: 0.5871, Validation Accuracy: 0.6433, AUC: 0.7592
Epoch 6/200, Loss: 0.6214, Validation Accuracy: 0.6800, AUC: 0.7930
Epoch 7/200, Loss: 0.5998, Validation Accuracy: 0.7267, AUC: 0.8337
Epoch 8/200, Loss: 0.5408, Validation Accuracy: 0.6483, AUC: 0.8555
Epoch 9/200, Loss: 0.5469, Validation Accuracy: 0.6200, AUC: 0.8656
Epoch 10/200, Loss: 0.5533, Validation Accuracy: 0.6783, AUC: 0.8678
Epoch 11/200, Loss: 0.5203, Validation Accuracy: 0.7683, AUC: 0.8647
Epoch 12/200, Loss: 0.4954, Validation Accuracy: 0.7833, AUC: 0.8611
Epoch 13/200, Loss: 0.5041, Validation Accuracy: 0.7850, AUC: 0.8623
Epoch 14/200, Loss: 0.5023, Validation Accuracy: 0.8000, AUC: 0.8682
Epoch 15/200, Loss: 0.4786, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄██████████████
best_val_accuracy,▁▄▇▇▇▇██████████████████████████████████
roc_auc,▁▄▆▇▇▇▇█████████████████████████████████
train_loss,█▆▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▄▇▆▇▇▇█████████████████████████████████

0,1
Final Best Epoch,140.0
Final Best Val Accuracy,0.83333
best_epoch,140.0
best_val_accuracy,0.83333
roc_auc,0.90798
train_loss,0.35614
val_accuracy,0.83


[34m[1mwandb[0m: Agent Starting Run: hqckfq9x with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.5
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6911, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 2/200, Loss: 133.3985, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 3/200, Loss: 1223.1190, Validation Accuracy: 0.5300, AUC: 0.5340
Epoch 4/200, Loss: 10.2221, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 5/200, Loss: 118.5226, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 6/200, Loss: 143.4308, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 7/200, Loss: 141.8136, Validation Accuracy: 0.4700, AUC: 0.4982
Epoch 8/200, Loss: 96.3729, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 9/200, Loss: 97.4814, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 10/200, Loss: 247.2188, Validation Accuracy: 0.4700, AUC: 0.4664
Epoch 11/200, Loss: 81.4257, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 12/200, Loss: 159.9387, Validation Accuracy: 0.4450, AUC: 0.4061
Epoch 13/200, Loss: 9.0090, Validation Accuracy: 0.5300, AUC: 0.7348
Epoch 14/200, Loss: 6.3267, Validation Accuracy: 0.7317, AUC: 0.7745
Epoch 15/200, Loss: 0.62

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁█████████████████████████████████████
best_val_accuracy,▁▁▁█████████████████████████████████████
roc_auc,▄▄▃▄▄▄▄▄▄▄▃█▇▄▅▄▄▇█▄▄▄▄▄▄▄▄▄▄▄▁▄▄██▄▄▄▄▄
train_loss,▃▃▃▁▁▁█▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▄▄▃█▄███▄█▁▄███▄█▄█▄█▄████▄██▄▄██▄██████

0,1
Final Best Epoch,14.0
Final Best Val Accuracy,0.73167
best_epoch,14.0
best_val_accuracy,0.73167
roc_auc,0.5
train_loss,0.69084
val_accuracy,0.53


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8ev1n6le with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.5
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6931, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 2/200, Loss: 1527.8683, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 3/200, Loss: 656.3652, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 4/200, Loss: 71.7698, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 5/200, Loss: 457.7717, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 6/200, Loss: 2041.1146, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 7/200, Loss: 172.3205, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 8/200, Loss: 627.5569, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 9/200, Loss: 43.8465, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 10/200, Loss: 868.0389, Validation Accuracy: 0.5300, AUC: 0.5213
Epoch 11/200, Loss: 11.0836, Validation Accuracy: 0.4700, AUC: 0.2574
Epoch 12/200, Loss: 37.6819, Validation Accuracy: 0.4700, AUC: 0.2842
Epoch 13/200, Loss: 2.7729, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 14/200, Loss: 19.0696, Validation Accuracy: 0.4700, AUC: 0.2600
Epoch 15/200, Loss: 34

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▁▁▁██████████████████████████████████
best_val_accuracy,▁▁▁▁▁▁██████████████████████████████████
roc_auc,▆▆▁▆▃▆█▆▆▆▆▆▆▆▆▆▆▆▆▆▄▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
train_loss,▆█▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▄▁▄█▁▄▁▄▄▁▁▁▁▄▁▄▁▁▁▄▄▁▄▁▄▁▄▁▄▄▄▄▄▄▄▄▄

0,1
Final Best Epoch,32.0
Final Best Val Accuracy,0.59167
best_epoch,32.0
best_val_accuracy,0.59167
roc_auc,0.5
train_loss,0.6878
val_accuracy,0.53


[34m[1mwandb[0m: Agent Starting Run: 1x1y0cho with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6911, Validation Accuracy: 0.5300, AUC: 0.8144
Epoch 2/200, Loss: 0.7033, Validation Accuracy: 0.5300, AUC: 0.1993
Epoch 3/200, Loss: 0.6896, Validation Accuracy: 0.5300, AUC: 0.8431
Epoch 4/200, Loss: 0.6884, Validation Accuracy: 0.5300, AUC: 0.8492
Epoch 5/200, Loss: 0.6784, Validation Accuracy: 0.5300, AUC: 0.8550
Epoch 6/200, Loss: 0.8685, Validation Accuracy: 0.7133, AUC: 0.8446
Epoch 7/200, Loss: 0.6844, Validation Accuracy: 0.4700, AUC: 0.8204
Epoch 8/200, Loss: 0.6990, Validation Accuracy: 0.4700, AUC: 0.7925
Epoch 9/200, Loss: 0.7018, Validation Accuracy: 0.4700, AUC: 0.7544
Epoch 10/200, Loss: 0.6984, Validation Accuracy: 0.5567, AUC: 0.7509
Epoch 11/200, Loss: 0.6900, Validation Accuracy: 0.5300, AUC: 0.7637
Epoch 12/200, Loss: 0.6792, Validation Accuracy: 0.5300, AUC: 0.7899
Epoch 13/200, Loss: 0.6800, Validation Accuracy: 0.5300, AUC: 0.8156
Epoch 14/200, Loss: 0.6729, Validation Accuracy: 0.5300, AUC: 0.8388
Epoch 15/200, Loss: 0.6900, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▁▂▂▂▃▃▄▄▄▅▅▅▅▆▆▇▇▇███████████████████
best_val_accuracy,▁▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇████████████████████████
roc_auc,▁▇▆▇▇▇▇▇▇▇██████████████████████████████
train_loss,▆█▆▆▆▅▅▄▃▃▂▂▂▂▂▂▂▄▃▂▁▁▁▁▁▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▂▅▂▁▃▂▅▆▆▆▇▇▇▇▇▇█▆█████████▇████████████

0,1
Final Best Epoch,115.0
Final Best Val Accuracy,0.88833
best_epoch,115.0
best_val_accuracy,0.88833
roc_auc,0.95294
train_loss,0.23181
val_accuracy,0.87833


[34m[1mwandb[0m: Agent Starting Run: o2n7az0g with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6963, Validation Accuracy: 0.5300, AUC: 0.8031
Epoch 2/200, Loss: 1.0246, Validation Accuracy: 0.5300, AUC: 0.1858
Epoch 3/200, Loss: 0.6919, Validation Accuracy: 0.5300, AUC: 0.7824
Epoch 4/200, Loss: 0.6821, Validation Accuracy: 0.5300, AUC: 0.7854
Epoch 5/200, Loss: 0.8398, Validation Accuracy: 0.4700, AUC: 0.1615
Epoch 6/200, Loss: 0.7032, Validation Accuracy: 0.4700, AUC: 0.8151
Epoch 7/200, Loss: 0.7052, Validation Accuracy: 0.4700, AUC: 0.8360
Epoch 8/200, Loss: 0.7066, Validation Accuracy: 0.4700, AUC: 0.8334
Epoch 9/200, Loss: 0.7000, Validation Accuracy: 0.5950, AUC: 0.8190
Epoch 10/200, Loss: 0.6884, Validation Accuracy: 0.5300, AUC: 0.8102
Epoch 11/200, Loss: 0.6767, Validation Accuracy: 0.5300, AUC: 0.8373
Epoch 12/200, Loss: 0.6905, Validation Accuracy: 0.5300, AUC: 0.8780
Epoch 13/200, Loss: 0.6682, Validation Accuracy: 0.5300, AUC: 0.8658
Epoch 14/200, Loss: 0.6551, Validation Accuracy: 0.5567, AUC: 0.8834
Epoch 15/200, Loss: 0.6279, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▆▆▆▆▆▇▇██████████
best_val_accuracy,▁▁▂▃▄▄▄▄▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████
roc_auc,▁▇▇▄▆▇▇▇▇▇▇▇▇███████████████████████████
train_loss,█▅▅▄▅▅▅▄▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁
val_accuracy,▂▁▂▂▁▂▂▃▄▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇██▇▇██████████▇█

0,1
Final Best Epoch,161.0
Final Best Val Accuracy,0.89833
best_epoch,161.0
best_val_accuracy,0.89833
roc_auc,0.95022
train_loss,0.2445
val_accuracy,0.86667


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: th375swl with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6940, Validation Accuracy: 0.5300, AUC: 0.8057
Epoch 2/200, Loss: 0.6826, Validation Accuracy: 0.5300, AUC: 0.8470
Epoch 3/200, Loss: 0.6906, Validation Accuracy: 0.5300, AUC: 0.8804
Epoch 4/200, Loss: 0.6840, Validation Accuracy: 0.5300, AUC: 0.8862
Epoch 5/200, Loss: 0.6886, Validation Accuracy: 0.5300, AUC: 0.8817
Epoch 6/200, Loss: 0.6851, Validation Accuracy: 0.5300, AUC: 0.8696
Epoch 7/200, Loss: 0.6755, Validation Accuracy: 0.5300, AUC: 0.8570
Epoch 8/200, Loss: 0.6731, Validation Accuracy: 0.5300, AUC: 0.8763
Epoch 9/200, Loss: 0.6601, Validation Accuracy: 0.5300, AUC: 0.8800
Epoch 10/200, Loss: 0.6440, Validation Accuracy: 0.5550, AUC: 0.8784
Epoch 11/200, Loss: 0.6129, Validation Accuracy: 0.7483, AUC: 0.8589
Epoch 12/200, Loss: 0.5777, Validation Accuracy: 0.6250, AUC: 0.8732
Epoch 13/200, Loss: 0.6379, Validation Accuracy: 0.7033, AUC: 0.7708
Epoch 14/200, Loss: 0.6161, Validation Accuracy: 0.5750, AUC: 0.7034
Epoch 15/200, Loss: 0.6736, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆███████████████
best_val_accuracy,▁▁▅▆▆▆▇▇▇▇▇▇▇███████████████████████████
roc_auc,▁▂▃▂▃▄▄▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇██████████████████
train_loss,██▆▇▅▄▃▃▃▃▃▂▂▅▄▂▂▂▇▃▂▂▂▂▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▃▆▆▆▇▇▇▇▇▇▇█▇████▇▇█▇█████████████████

0,1
Final Best Epoch,125.0
Final Best Val Accuracy,0.89333
best_epoch,125.0
best_val_accuracy,0.89333
roc_auc,0.95143
train_loss,0.22298
val_accuracy,0.88167


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: l5cg3nic with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6930, Validation Accuracy: 0.5300, AUC: 0.7841
Epoch 2/200, Loss: 0.6848, Validation Accuracy: 0.5300, AUC: 0.8778
Epoch 3/200, Loss: 0.6843, Validation Accuracy: 0.5300, AUC: 0.8644
Epoch 4/200, Loss: 0.6692, Validation Accuracy: 0.5300, AUC: 0.8684
Epoch 5/200, Loss: 0.6912, Validation Accuracy: 0.5783, AUC: 0.8723
Epoch 6/200, Loss: 0.6710, Validation Accuracy: 0.5017, AUC: 0.8672
Epoch 7/200, Loss: 0.6891, Validation Accuracy: 0.4700, AUC: 0.8631
Epoch 8/200, Loss: 0.6908, Validation Accuracy: 0.5650, AUC: 0.8727
Epoch 9/200, Loss: 0.6861, Validation Accuracy: 0.6633, AUC: 0.8792
Epoch 10/200, Loss: 0.6771, Validation Accuracy: 0.5300, AUC: 0.8806
Epoch 11/200, Loss: 0.6625, Validation Accuracy: 0.5300, AUC: 0.8786
Epoch 12/200, Loss: 0.6516, Validation Accuracy: 0.5300, AUC: 0.8799
Epoch 13/200, Loss: 0.6634, Validation Accuracy: 0.5300, AUC: 0.8826
Epoch 14/200, Loss: 0.6304, Validation Accuracy: 0.6133, AUC: 0.8764
Epoch 15/200, Loss: 0.6235, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▇▇▇▇▇▇▇▇██
best_val_accuracy,▁▂▃▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████████
roc_auc,▂▂▂▂▁▃▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇██████████
train_loss,███▇▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▁▅▆▇▇▇▇▇▇▇▇▇▇▇▇████▇██▆█▇▇▇███████████

0,1
Final Best Epoch,189.0
Final Best Val Accuracy,0.92
best_epoch,189.0
best_val_accuracy,0.92
roc_auc,0.96783
train_loss,0.1693
val_accuracy,0.91


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


Best hyperparameters saved to best_hyperparameters.txt with accuracy: 0.9200


## 추론

모델을 학습했으니 test 데이터를 예측하는 일만 남았습니다! 

test 데이터에도 train 데이터에 했던 전처리를 진행한 후, 추론을 진행하면 됩니다.(data leakage 에 주의하도록 합니다!)

In [11]:
test = pd.read_csv('test_final.csv')
test = test.drop(["id"],axis=1) 

test = label_encoder(test, label_map) #test data 라벨 인코딩

test[num_features] = scaler.transform(test[num_features]) #test 데이터 정규화

# 범주형 변수 인코딩
test_x_encoded = pd.get_dummies(test)

LogisticRegression.predict() 는 Sigmoid Function 의 출력 값을 출력합니다.

따라서 예측 값들을 threshold 0.5 기준으로 0 또는 1 의 값으로 변환해주어야 합니다. 

In [12]:
#pred = lr.predict(test)
#pred_bin = lr.to_bin(pred)

In [13]:
# 저장된 모델 가중치를 불러와서 예측하는 함수
def load_and_predict(model, input_data):
    # 입력 데이터의 차원을 사용하여 모델 초기화
    input_dim = input_data.shape[1]
    # 모델 선택
    if model == 'LogisticRegression':
        model = LogisticRegressionModel(input_dim)
    elif model == 'RandomForest':
        model = RandomForestModel(input_dim)
    elif model == 'XGBoostLike':
        model = XGBoostLikeModel(input_dim)
    else:
        raise ValueError(f"Unknown architecture: {model}")

    # 모델 가중치 로드
    model.load_state_dict(torch.load("best_overall_model.pth"))
    model.eval()  # 예측 모드로 전환

    # 입력 데이터를 텐서로 변환
    input_tensor = torch.tensor(input_data, dtype=torch.float32)

    # 예측 수행
    with torch.no_grad():
        outputs = model(input_tensor)
        outputs = torch.sigmoid(outputs)  # 이진 분류일 경우 시그모이드 함수 적용
        predictions = (outputs.squeeze() > 0.5).float()  # 0.5 기준으로 클래스 예측

    return predictions.numpy()

In [14]:
model = 'RandomForest'

sample_submission = pd.read_csv('sample_submission_final.csv')
pred_bin = load_and_predict(model, test_x_encoded.values)
sample_submission.target = pred_bin
sample_submission.to_csv("submission.csv",index=False)

  model.load_state_dict(torch.load("best_overall_model.pth"))
