In [59]:
#.venv\Scripts\activate
#!pip install pandas
#!pip install scikit-learn
#!pip install wandb
#!pip install torch
#!pip install numpy==1.24.4
#!pip install matplotlib

## 항공사 고객 만족도 예측 경진대회 베이스라인
이 대회는 성별, 이동 거리, 이륙 딜레이 시간 등으로 부터 고객 만족 여부를 예측하는 대회입니다.

언뜻보면 간단해 보이지만 처음 데이터 분석을 접하시는 분들에게는 어디부터 어떻게 시작해야 하는지 막막하실 겁니다.

## 데이터 불러오기
먼저 분석하려는 데이터를 작업장으로 가져오는 작업이 필요합니다.

이를 위해서 파이썬 라이브러리 중 하나인 Pandas를 이용합니다.

pandas 라이브러리는 엑셀과 같은 행과 열로 이루어진 테이블(table) 형식의 데이터를 편하게 다루도록 해주는 라이브러리입니다.

데이터 분석에는 반드시 사용된다고 해도 과언이 아닌 라이브러리이니, 시간이 날때마다 공부해둡시다.

In [60]:
import pandas as pd

train = pd.read_csv(r"train_final.csv")

print(f'train set은 {train.shape[1]} 개의 feature를 가진 {train.shape[0]} 개의 데이터 샘플로 이루어져 있습니다.')

train.head()

train set은 24 개의 feature를 가진 2999 개의 데이터 샘플로 이루어져 있습니다.


Unnamed: 0,id,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Seat comfort,Departure/Arrival time convenient,Food and drink,...,Ease of Online booking,On-board service,Leg room service,Baggage handling,Checkin service,Cleanliness,Online boarding,Departure Delay in Minutes,Arrival Delay in Minutes,target
0,1,Male,Loyal Customer,72,Business travel,Business,1784,1,1,4,...,2,2,3,2,5,2,0,0,0,1
1,2,Male,Loyal Customer,35,Business travel,Business,2228,2,5,5,...,2,2,2,2,4,2,0,0,8,0
2,3,Male,disloyal Customer,37,Business travel,Eco,1970,4,3,4,...,1,3,2,5,1,4,0,1,2,1
3,4,Female,Loyal Customer,20,Personal Travel,Eco,2249,5,5,1,...,1,4,1,4,1,5,0,0,0,1
4,5,Male,disloyal Customer,38,Business travel,Eco,1995,2,2,2,...,5,2,5,3,1,2,0,110,102,0


데이터를 자세히 살펴보는 것은 EDA 글을 통해 알아보는 것으로 하겠습니다.

여기에서는 분석 방법에 초점을 맞추겠습니다.

## 결측치 확인

결측치(NA: Not Available)란 값이 누락된 데이터를 말합니다.

보다 정확한 분석을 하기 위해서는 데이터의 결측치를 확인하고 적절히 처리해주어야 합니다.

이번 데이터에 결측치가 있나 확인해볼까요?

In [61]:
def check_missing_col(dataframe):
    missing_col = []
    for col in dataframe.columns:
        missing_values = sum(dataframe[col].isna())
        is_missing = True if missing_values >= 1 else False
        if is_missing:
            print(f'결측치가 있는 컬럼은: {col} 입니다')
            print(f'해당 컬럼에 총 {missing_values} 개의 결측치가 존재합니다.')
            missing_col.append([col, dataframe[col].dtype])
    if missing_col == []:
        print('결측치가 존재하지 않습니다')
    return missing_col

missing_col = check_missing_col(train)

결측치가 존재하지 않습니다


이번 데이터에는 결측치가 존재하지 않네요.

만약 결측치가 존재한다면 결측치 삭제, 평균 보간법, 최빈값 보간법 등 다양한 방식으로 처리할 수 있습니다

## 데이터 전처리 

### 라벨 인코딩

데이터를 모델에 투입하기 전 몇 가지 전처리 과정이 필요합니다. 

우선은 카테고리 형식의 features 들을 바꿔주어야 합니다. 

본 데이터에서는 'Gender', 'Customer Type','Type of Travel', 'Class' 의 카테고리 형식의 feature 들이 있습니다. 

이러한 데이터의 라벨을 숫자로 바꾸어주는 전처리를 라벨 인코딩 (label encoding) 이라고 합니다.

본격적으로 데이터 전처리를 하기 전에 id 열을 제거해주고, data 와 target 을 나누어줍니다.

In [62]:
train_x = train.drop(["id","target"],axis=1) 
train_y = train.target 

#라벨인코딩을 하기 위함 dictionary map 생성 함수
def make_label_map(dataframe):
    label_maps = {}
    for col in dataframe.columns:
        if dataframe[col].dtype=='object':
            label_map = {'unknown':0}
            for i, key in enumerate(dataframe[col].unique()): #enumerate는 인덱스와 함께 고유값 반환, enumerate(['a', 'b', 'c'])는 (0, 'a'), (1, 'b'), (2, 'c')와 같은 튜플을 반환
                label_map[key] = i+1  
            label_maps[col] = label_map
    return label_maps

# 각 범주형 변수에 인코딩 값을 부여하는 함수
def label_encoder(dataframe, label_map):
    for col in dataframe.columns:
        if dataframe[col].dtype=='object':
            dataframe[col] = dataframe[col].map(label_map[col])
            dataframe[col] = dataframe[col].fillna(label_map[col]['unknown']) 
    return dataframe

# train 데이터 라벨 인코딩
label_map = make_label_map(train_x) # train 사용해 label map 생성
train_x = label_encoder(train_x, label_map) # train 라벨 인코딩

train_x.head()

Unnamed: 0,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Seat comfort,Departure/Arrival time convenient,Food and drink,Gate location,...,Online support,Ease of Online booking,On-board service,Leg room service,Baggage handling,Checkin service,Cleanliness,Online boarding,Departure Delay in Minutes,Arrival Delay in Minutes
0,1,1,72,1,1,1784,1,1,4,1,...,5,2,2,3,2,5,2,0,0,0
1,1,1,35,1,1,2228,2,5,5,5,...,2,2,2,2,2,4,2,0,0,8
2,1,2,37,1,2,1970,4,3,4,1,...,1,1,3,2,5,1,4,0,1,2
3,2,1,20,2,2,2249,5,5,1,3,...,1,1,4,1,4,1,5,0,0,0
4,1,2,38,1,2,1995,2,2,2,2,...,5,5,2,5,3,1,2,0,110,102


라벨 인코딩 후 원래 문자열 형식의 데이터가 숫자로 표현되어 있음을 볼 수 있습니다.

## 정규화 

수치형 데이터들을 정규화 시켜줍니다.

머신러닝 과정에서 모델은 데이터의 특성(feature)들을 추출해 학습을 진행합니다.

하지만 학습을 하는 과정에서 데이터의 값이 너무 크거나, 분산이 너무 크면 학습 과정에 악영향을 끼칠 수 있습니다.

따라서 정규화를 통해 데이터 값의 크기를 줄이고 분산을 줄여 모델이 데이터를 이상하게 해석하는 것을 방지합니다.

이번 베이스라인에서는 min-max 정규화를 이용해 봅시다.

min-max 정규화는 수치형 데이터의 값을 0~1 사이의 값으로 변환해줍니다.

min-max 정규화의 수식은 아래와 같습니다.

X' = (X - MIN) / (MAX-MIN)

In [63]:
from sklearn.preprocessing import MinMaxScaler

num_features = ['Age','Flight Distance','Departure Delay in Minutes','Arrival Delay in Minutes']

scaler = MinMaxScaler()
train_x[num_features] = scaler.fit_transform(train_x[num_features]) 
train_x.head()

Unnamed: 0,Gender,Customer Type,Age,Type of Travel,Class,Flight Distance,Seat comfort,Departure/Arrival time convenient,Food and drink,Gate location,...,Online support,Ease of Online booking,On-board service,Leg room service,Baggage handling,Checkin service,Cleanliness,Online boarding,Departure Delay in Minutes,Arrival Delay in Minutes
0,1,1,0.890411,1,1,0.264656,1,1,4,1,...,5,2,2,3,2,5,2,0,0.0,0.0
1,1,1,0.383562,1,1,0.332619,2,5,5,5,...,2,2,2,2,2,4,2,0,0.0,0.017021
2,1,2,0.410959,1,2,0.293127,4,3,4,1,...,1,1,3,2,5,1,4,0,0.002299,0.004255
3,2,1,0.178082,2,2,0.335833,5,5,1,3,...,1,1,4,1,4,1,5,0,0.0,0.0
4,1,2,0.424658,1,2,0.296954,2,2,2,2,...,5,5,2,5,3,1,2,0,0.252874,0.217021


정규화 후 수치형 데이터들의 값이 0과 1 사이의 값으로 변환 되었음을 볼 수 있습니다.

## 모델 학습

모델을 설계했으니 이제 모델을 학습하면 됩니다.

객체에 Logistic Regression 클래스를 할당 후, 클래스 내 fit 메소드를 이용해 학습을 진행하면 됩니다.

In [64]:
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, roc_curve, auc
import matplotlib.pyplot as plt


sweep_config = {
    'method': 'grid',  # 또는 'random' 혹은 'bayes'로 변경 가능
    'metric': {
        'name': 'train_loss',  # 주로 모니터링할 메트릭
        'goal': 'minimize'  # 'maximize'로 변경 가능
    },
    'parameters': {
        'epochs': {
            'values': [200]  # 에포크 수를 탐색할 값들로 정의
        },
        'learning_rate': {
            'values': [0.5, 0.1, 0.05]  # 학습률을 탐색할 값들로 정의
        },
        'weight_decay': {
            'values': [0.0005, 0.0001]  # weight_decay 값을 탐색할 값들로 정의
        },
        'architecture': {
            'values': ['LogisticRegression', 'RandomForest']  # 모델 아키텍처를 탐색할 값들로 정의
        },
        'seed': {
            'value': 42  # 고정된 시드 값 (변경하지 않음)
        }
    }
}

In [65]:
# torch 기반 Logistic Regression 모델 정의
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
    
    def forward(self, x):
        return self.linear(x)

class RandomForestModel(nn.Module):
    def __init__(self, input_dim, num_trees=10, max_depth=5):
        super(RandomForestModel, self).__init__()
        self.num_trees = num_trees
        self.trees = nn.ModuleList([self._build_tree(input_dim, max_depth) for _ in range(num_trees)])
    
    def _build_tree(self, input_dim, max_depth):
        # 간단한 트리 구조를 생성 (이 예제에서는 트리를 Linear 레이어로 대체)
        layers = []
        for _ in range(max_depth):
            layers.append(nn.Linear(input_dim, input_dim))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(input_dim, 1))  # 최종 레이어는 1개의 출력 노드를 가짐
        return nn.Sequential(*layers)
    
    def forward(self, x):
        # 모든 트리의 출력을 평균화
        tree_outputs = [tree(x) for tree in self.trees]
        avg_output = torch.mean(torch.stack(tree_outputs), dim=0)
        return avg_output

class XGBoostLikeModel(nn.Module):
    def __init__(self, input_dim, num_trees=10, max_depth=5, learning_rate=0.1):
        super(XGBoostLikeModel, self).__init__()
        self.num_trees = num_trees
        self.learning_rate = learning_rate
        self.trees = nn.ModuleList([self._build_tree(input_dim, max_depth) for _ in range(num_trees)])
    
    def _build_tree(self, input_dim, max_depth):
        # 간단한 트리 구조를 생성 (이 예제에서는 트리를 Linear 레이어로 대체)
        layers = []
        for _ in range(max_depth):
            layers.append(nn.Linear(input_dim, input_dim))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(input_dim, 1))  # 최종 레이어는 1개의 출력 노드를 가짐
        return nn.Sequential(*layers)
    
    def forward(self, x):
        # 초기 예측값 (예: 0 또는 평균값)
        output = torch.zeros_like(x[:, :1])  # 초기값을 0으로 설정 (mean value를 사용할 수도 있음)
        
        # 각 트리의 예측을 적용하여 점진적으로 결과를 갱신
        for tree in self.trees:
            residual = x - output  # 남은 오류 (잔차)
            tree_output = tree(residual)  # 트리의 예측
            output = output + self.learning_rate * tree_output  # 트리의 예측을 학습률과 곱하여 누적
        
        return output

In [66]:
# 전역 변수로 설정
best_overall_accuracy = 0.0
best_overall_hyperparameters = None

# Sweep 실행 후 모든 실험이 완료된 후, 최고 성능 모델과 하이퍼파라미터 저장
def save_best_hyperparameters(best_overall_hyperparameters):
    with open("best_hyperparameters.txt", "w") as f:
        for key, value in best_overall_hyperparameters.items():
            f.write(f"{key}: {value}\n")
    print(f"Best hyperparameters saved to best_hyperparameters.txt with accuracy: {best_overall_accuracy:.4f}")

In [67]:
# 범주형 변수 인코딩
train_x_encoded = pd.get_dummies(train_x)

In [68]:
# 학습 함수
def train():
    global best_overall_accuracy, best_overall_hyperparameters  # 전역 변수를 참조하도록 설정

    # WandB 초기화
    with wandb.init() as run:
        config = run.config
        X_train, X_val, y_train, y_val = train_test_split(train_x_encoded, train_y, test_size=0.2, random_state=config.seed)

        X_train = torch.tensor(X_train.values, dtype=torch.float32)
        y_train = torch.tensor(y_train.values, dtype=torch.float32)
        X_val = torch.tensor(X_val.values, dtype=torch.float32)
        y_val = torch.tensor(y_val.values, dtype=torch.float32)

        # DataLoader 설정
        #train_dataset = TensorDataset(X_train, y_train)
        #train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
        
        # 모델 초기화
        input_dim = X_train.shape[1]
        if config.architecture == 'LogisticRegression':
            model = LogisticRegressionModel(input_dim)
        elif config.architecture == 'RandomForest':
            model = RandomForestModel(input_dim)
        elif config.architecture == 'XGBoostLike':
            model = XGBoostLikeModel(input_dim)

        # 손실함수 및 옵티마이저 정의
        criterion = nn.BCEWithLogitsLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)

        best_val_accuracy = 0.0  # 최고 검증 정확도 초기화
        best_epoch = 0  # 최고 성능을 기록한 에포크 초기화

        # 학습 과정
        for epoch in range(config.epochs):
            model.train()
            optimizer.zero_grad()
            outputs = model(X_train)
            loss = criterion(outputs.squeeze(), y_train)
            loss.backward()
            optimizer.step()

            # 검증 과정
            model.eval()
            with torch.no_grad():
                val_outputs = model(X_val)
                val_outputs = torch.sigmoid(val_outputs)
                val_predictions = (val_outputs.squeeze() > 0.5).float()
                val_accuracy = accuracy_score(y_val.numpy(), val_predictions.numpy())

                # 최고 검증 정확도 업데이트
                if val_accuracy > best_val_accuracy:
                    best_val_accuracy = val_accuracy
                    best_epoch = epoch + 1

                # ROC Curve 계산
                fpr, tpr, _ = roc_curve(y_val.numpy(), val_outputs.numpy())
                roc_auc = auc(fpr, tpr)

                # ROC Curve 플롯 생성
                plt.figure()
                plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
                plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
                plt.xlim([0.0, 1.0])
                plt.ylim([0.0, 1.05])
                plt.xlabel('False Positive Rate')
                plt.ylabel('True Positive Rate')
                plt.title('Receiver Operating Characteristic')
                plt.legend(loc="lower right")

                # ROC Curve를 WandB에 이미지로 로깅
                wandb.log({"train_loss": loss.item(), "val_accuracy": val_accuracy, "roc_auc": roc_auc, 
                           "roc_curve": wandb.Image(plt), "best_val_accuracy": best_val_accuracy, 
                           "best_epoch": best_epoch}, step=epoch)
                plt.close()

            print(f"Epoch {epoch+1}/{config.epochs}, Loss: {loss.item():.4f}, Validation Accuracy: {val_accuracy:.4f}, AUC: {roc_auc:.4f}")
        
        # 최고 성능의 하이퍼파라미터를 추적하여 저장
        if best_val_accuracy > best_overall_accuracy:
            best_overall_accuracy = best_val_accuracy
            best_overall_hyperparameters = config
            torch.save(model.state_dict(), "best_overall_model.pth")  # 모델 가중치 저장

        

        # 최종적으로 최고 성능을 기록한 에포크 시각화
        wandb.log({"Final Best Val Accuracy": best_val_accuracy, "Final Best Epoch": best_epoch})

sweep_id = wandb.sweep(sweep_config)
wandb.agent(sweep_id, function=train)

# 모든 실험이 끝난 후 최종 하이퍼파라미터 저장
save_best_hyperparameters(best_overall_hyperparameters)

# 학습 실행
#train(model, X_train, y_train, criterion, optimizer, wandb.config)

Create sweep with ID: y1r7j6mo
Sweep URL: https://wandb.ai/zamtol/uncategorized/sweeps/y1r7j6mo


[34m[1mwandb[0m: Agent Starting Run: sp4ymd5p with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.5
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 1.0661, Validation Accuracy: 0.4700, AUC: 0.2324
Epoch 2/200, Loss: 14.5998, Validation Accuracy: 0.4700, AUC: 0.2629
Epoch 3/200, Loss: 9.0065, Validation Accuracy: 0.5650, AUC: 0.7511
Epoch 4/200, Loss: 0.7216, Validation Accuracy: 0.5300, AUC: 0.8179
Epoch 5/200, Loss: 4.4274, Validation Accuracy: 0.5300, AUC: 0.8297
Epoch 6/200, Loss: 5.0685, Validation Accuracy: 0.5300, AUC: 0.8512
Epoch 7/200, Loss: 3.4402, Validation Accuracy: 0.6667, AUC: 0.8365
Epoch 8/200, Loss: 0.5862, Validation Accuracy: 0.4700, AUC: 0.6365
Epoch 9/200, Loss: 3.7831, Validation Accuracy: 0.4700, AUC: 0.6431
Epoch 10/200, Loss: 4.2704, Validation Accuracy: 0.5100, AUC: 0.7760
Epoch 11/200, Loss: 1.5869, Validation Accuracy: 0.5800, AUC: 0.8693
Epoch 12/200, Loss: 1.2850, Validation Accuracy: 0.5383, AUC: 0.8778
Epoch 13/200, Loss: 2.8752, Validation Accuracy: 0.5383, AUC: 0.8793
Epoch 14/200, Loss: 3.0665, Validation Accuracy: 0.5700, AUC: 0.8782
Epoch 15/200, Loss: 2.0214, Validation Acc

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▂▃▃▃▄▅▅▅▅▅████████████████████████████
best_val_accuracy,▁▃▅▆▇▇██████████████████████████████████
roc_auc,▁▇█▇████████████████████████████████████
train_loss,█▃▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▂▂▄▄▆██▇▇██████████████████████████████

0,1
Final Best Epoch,64.0
Final Best Val Accuracy,0.83333
best_epoch,64.0
best_val_accuracy,0.83333
roc_auc,0.90916
train_loss,0.35379
val_accuracy,0.82167


[34m[1mwandb[0m: Agent Starting Run: oc768fgt with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.5
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 1.0070, Validation Accuracy: 0.4700, AUC: 0.2358
Epoch 2/200, Loss: 14.7533, Validation Accuracy: 0.4700, AUC: 0.2672
Epoch 3/200, Loss: 8.8950, Validation Accuracy: 0.5417, AUC: 0.8178
Epoch 4/200, Loss: 0.8471, Validation Accuracy: 0.5300, AUC: 0.8373
Epoch 5/200, Loss: 4.1789, Validation Accuracy: 0.5300, AUC: 0.8489
Epoch 6/200, Loss: 4.4916, Validation Accuracy: 0.5300, AUC: 0.8738
Epoch 7/200, Loss: 2.6289, Validation Accuracy: 0.6700, AUC: 0.7977
Epoch 8/200, Loss: 0.6691, Validation Accuracy: 0.4700, AUC: 0.6937
Epoch 9/200, Loss: 2.6680, Validation Accuracy: 0.5233, AUC: 0.7774
Epoch 10/200, Loss: 1.3749, Validation Accuracy: 0.6117, AUC: 0.8723
Epoch 11/200, Loss: 0.9310, Validation Accuracy: 0.5500, AUC: 0.8840
Epoch 12/200, Loss: 1.8984, Validation Accuracy: 0.5733, AUC: 0.8835
Epoch 13/200, Loss: 1.6011, Validation Accuracy: 0.7233, AUC: 0.8678
Epoch 14/200, Loss: 0.5978, Validation Accuracy: 0.6317, AUC: 0.8279
Epoch 15/200, Loss: 1.0387, Validation Acc

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▂▃▄▄▅▅▅▅▇█████████████████████████████
best_val_accuracy,▁▂▅▇▇▇██████████████████████████████████
roc_auc,▁██▇████████████████████████████████████
train_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▂▃▇▅▆▇▇████████████████████████████████

0,1
Final Best Epoch,54.0
Final Best Val Accuracy,0.83333
best_epoch,54.0
best_val_accuracy,0.83333
roc_auc,0.90924
train_loss,0.35307
val_accuracy,0.825


[34m[1mwandb[0m: Agent Starting Run: 9xinokq1 with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 1.4359, Validation Accuracy: 0.4700, AUC: 0.5510
Epoch 2/200, Loss: 1.1385, Validation Accuracy: 0.4683, AUC: 0.6816
Epoch 3/200, Loss: 0.8347, Validation Accuracy: 0.5567, AUC: 0.7968
Epoch 4/200, Loss: 0.6644, Validation Accuracy: 0.5317, AUC: 0.8179
Epoch 5/200, Loss: 0.8728, Validation Accuracy: 0.5400, AUC: 0.8314
Epoch 6/200, Loss: 0.7597, Validation Accuracy: 0.6883, AUC: 0.8360
Epoch 7/200, Loss: 0.5623, Validation Accuracy: 0.5983, AUC: 0.8174
Epoch 8/200, Loss: 0.6537, Validation Accuracy: 0.5300, AUC: 0.8151
Epoch 9/200, Loss: 0.7109, Validation Accuracy: 0.7450, AUC: 0.8404
Epoch 10/200, Loss: 0.5668, Validation Accuracy: 0.7067, AUC: 0.8591
Epoch 11/200, Loss: 0.5270, Validation Accuracy: 0.6150, AUC: 0.8666
Epoch 12/200, Loss: 0.6012, Validation Accuracy: 0.6150, AUC: 0.8690
Epoch 13/200, Loss: 0.6032, Validation Accuracy: 0.6900, AUC: 0.8675
Epoch 14/200, Loss: 0.5255, Validation Accuracy: 0.7800, AUC: 0.8617
Epoch 15/200, Loss: 0.4878, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▃▃▅▅▆▆▆▆▆▆▆██████████████
best_val_accuracy,▁▅▆▇▇▇██████████████████████████████████
roc_auc,▁▆▇▆▇▇▇█████████████████████████████████
train_loss,█▅▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▅▄▆▇▇█▇████████████████████████████████

0,1
Final Best Epoch,143.0
Final Best Val Accuracy,0.83333
best_epoch,143.0
best_val_accuracy,0.83333
roc_auc,0.90762
train_loss,0.35685
val_accuracy,0.82833


[34m[1mwandb[0m: Agent Starting Run: 9jrpvgfw with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.7510, Validation Accuracy: 0.4700, AUC: 0.2218
Epoch 2/200, Loss: 2.7166, Validation Accuracy: 0.4650, AUC: 0.3352
Epoch 3/200, Loss: 1.0296, Validation Accuracy: 0.5300, AUC: 0.6506
Epoch 4/200, Loss: 1.1662, Validation Accuracy: 0.5300, AUC: 0.7084
Epoch 5/200, Loss: 1.7164, Validation Accuracy: 0.5300, AUC: 0.7241
Epoch 6/200, Loss: 1.6459, Validation Accuracy: 0.5300, AUC: 0.7213
Epoch 7/200, Loss: 1.1744, Validation Accuracy: 0.5617, AUC: 0.6785
Epoch 8/200, Loss: 0.6682, Validation Accuracy: 0.4817, AUC: 0.5615
Epoch 9/200, Loss: 0.8689, Validation Accuracy: 0.4700, AUC: 0.5282
Epoch 10/200, Loss: 1.2043, Validation Accuracy: 0.4750, AUC: 0.5847
Epoch 11/200, Loss: 1.0487, Validation Accuracy: 0.5917, AUC: 0.6913
Epoch 12/200, Loss: 0.6681, Validation Accuracy: 0.6200, AUC: 0.7736
Epoch 13/200, Loss: 0.6040, Validation Accuracy: 0.5533, AUC: 0.8057
Epoch 14/200, Loss: 0.7976, Validation Accuracy: 0.5450, AUC: 0.8187
Epoch 15/200, Loss: 0.8941, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▇▇▇▇▇██
best_val_accuracy,▁▂▄▄▆▆▇▇████████████████████████████████
roc_auc,▁▆▆▇▇▇██████████████████████████████████
train_loss,█▅▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▂▄▄▆▆▇▇████████████████████████████████

0,1
Final Best Epoch,193.0
Final Best Val Accuracy,0.82833
best_epoch,193.0
best_val_accuracy,0.82833
roc_auc,0.90711
train_loss,0.35861
val_accuracy,0.82667


[34m[1mwandb[0m: Agent Starting Run: jk0wgmtm with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.7517, Validation Accuracy: 0.5400, AUC: 0.6889
Epoch 2/200, Loss: 0.7622, Validation Accuracy: 0.5250, AUC: 0.5773
Epoch 3/200, Loss: 0.7714, Validation Accuracy: 0.6017, AUC: 0.6433
Epoch 4/200, Loss: 0.6850, Validation Accuracy: 0.5883, AUC: 0.7333
Epoch 5/200, Loss: 0.6527, Validation Accuracy: 0.5683, AUC: 0.7753
Epoch 6/200, Loss: 0.6813, Validation Accuracy: 0.5983, AUC: 0.7803
Epoch 7/200, Loss: 0.6301, Validation Accuracy: 0.6833, AUC: 0.7691
Epoch 8/200, Loss: 0.5957, Validation Accuracy: 0.6950, AUC: 0.7644
Epoch 9/200, Loss: 0.6147, Validation Accuracy: 0.7150, AUC: 0.7864
Epoch 10/200, Loss: 0.5948, Validation Accuracy: 0.7100, AUC: 0.8191
Epoch 11/200, Loss: 0.5573, Validation Accuracy: 0.6683, AUC: 0.8441
Epoch 12/200, Loss: 0.5561, Validation Accuracy: 0.6583, AUC: 0.8567
Epoch 13/200, Loss: 0.5587, Validation Accuracy: 0.6917, AUC: 0.8604
Epoch 14/200, Loss: 0.5366, Validation Accuracy: 0.7417, AUC: 0.8587
Epoch 15/200, Loss: 0.5156, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆▆▆███████████████
best_val_accuracy,▁▂▅▇▇███████████████████████████████████
roc_auc,▁▅▇▇████████████████████████████████████
train_loss,█▇▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▃▄▇▇▇██████████████████████████████████

0,1
Final Best Epoch,137.0
Final Best Val Accuracy,0.83167
best_epoch,137.0
best_val_accuracy,0.83167
roc_auc,0.90786
train_loss,0.35663
val_accuracy,0.83167


[34m[1mwandb[0m: Agent Starting Run: gcr25oe1 with config:
[34m[1mwandb[0m: 	architecture: LogisticRegression
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 1.0073, Validation Accuracy: 0.5933, AUC: 0.7411
Epoch 2/200, Loss: 0.7367, Validation Accuracy: 0.5467, AUC: 0.7749
Epoch 3/200, Loss: 0.9060, Validation Accuracy: 0.5650, AUC: 0.7764
Epoch 4/200, Loss: 0.7874, Validation Accuracy: 0.6617, AUC: 0.7544
Epoch 5/200, Loss: 0.6328, Validation Accuracy: 0.6183, AUC: 0.7182
Epoch 6/200, Loss: 0.6643, Validation Accuracy: 0.5283, AUC: 0.7070
Epoch 7/200, Loss: 0.7429, Validation Accuracy: 0.5867, AUC: 0.7337
Epoch 8/200, Loss: 0.6894, Validation Accuracy: 0.7000, AUC: 0.7765
Epoch 9/200, Loss: 0.5983, Validation Accuracy: 0.6833, AUC: 0.8113
Epoch 10/200, Loss: 0.5876, Validation Accuracy: 0.6250, AUC: 0.8309
Epoch 11/200, Loss: 0.6278, Validation Accuracy: 0.6183, AUC: 0.8403
Epoch 12/200, Loss: 0.6375, Validation Accuracy: 0.6550, AUC: 0.8426
Epoch 13/200, Loss: 0.5993, Validation Accuracy: 0.7117, AUC: 0.8386
Epoch 14/200, Loss: 0.5522, Validation Accuracy: 0.7433, AUC: 0.8312
Epoch 15/200, Loss: 0.5418, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆█
best_val_accuracy,▁▃▄▅▆▆▇▇▇███████████████████████████████
roc_auc,▃▁▆▅▇▇▇▇▇▇██████████████████████████████
train_loss,█▇▆▅▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▄▆▆▇▇▇▇█▇█████████████████████████████

0,1
Final Best Epoch,195.0
Final Best Val Accuracy,0.83167
best_epoch,195.0
best_val_accuracy,0.83167
roc_auc,0.90633
train_loss,0.36035
val_accuracy,0.83167


[34m[1mwandb[0m: Agent Starting Run: hq5nfdv6 with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.5
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6970, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 2/200, Loss: 280.3904, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 3/200, Loss: 3595.7673, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 4/200, Loss: 305.1496, Validation Accuracy: 0.5333, AUC: 0.6237
Epoch 5/200, Loss: 4.3300, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 6/200, Loss: 127.8271, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 7/200, Loss: 2581.8813, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 8/200, Loss: 152.9128, Validation Accuracy: 0.5700, AUC: 0.6734
Epoch 9/200, Loss: 0.8207, Validation Accuracy: 0.4700, AUC: 0.5063
Epoch 10/200, Loss: 2.1454, Validation Accuracy: 0.4700, AUC: 0.2055
Epoch 11/200, Loss: 1.7950, Validation Accuracy: 0.4700, AUC: 0.2023
Epoch 12/200, Loss: 1.1006, Validation Accuracy: 0.5300, AUC: 0.4201
Epoch 13/200, Loss: 1.7134, Validation Accuracy: 0.5300, AUC: 0.7224
Epoch 14/200, Loss: 0.9195, Validation Accuracy: 0.2817, AUC: 0.2150
Epoch 15/200, Loss: 1.1188, V

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▄██████████████████████████████████████
best_val_accuracy,▁▂██████████████████████████████████████
roc_auc,▅▅▄▅▅▂▄▁▅▅▂▅▅▄▅█▁▅▅▅▅▅▅▅▁▅▁▆▅▅▅▅▅▅▅▅▅▅▅▅
train_loss,▆▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▇▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▅██▅█▅█▅█▅▅█▅███▅██▅▅▅▅█▁▅▇█▅████▅██████

0,1
Final Best Epoch,8.0
Final Best Val Accuracy,0.57
best_epoch,8.0
best_val_accuracy,0.57
roc_auc,0.5
train_loss,0.68793
val_accuracy,0.53


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dyc7jryh with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.5
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6901, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 2/200, Loss: 1679.8802, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 3/200, Loss: 8374.9297, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 4/200, Loss: 43.3045, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 5/200, Loss: 1722.6357, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 6/200, Loss: 1183.8209, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 7/200, Loss: 31.5482, Validation Accuracy: 0.4700, AUC: 0.5000
Epoch 8/200, Loss: 680.1323, Validation Accuracy: 0.4700, AUC: 0.3222
Epoch 9/200, Loss: 40.0696, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 10/200, Loss: 291.9463, Validation Accuracy: 0.4700, AUC: 0.4646
Epoch 11/200, Loss: 78.1983, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 12/200, Loss: 31.9389, Validation Accuracy: 0.4700, AUC: 0.6777
Epoch 13/200, Loss: 57.6220, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 14/200, Loss: 10.6498, Validation Accuracy: 0.5300, AUC: 0.5000
Epoch 15/200, Loss: 

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
roc_auc,▅▅▇▁▅▅▄▅▅▅▅▄▅▅▄▄▅▅▅█▄▅▅▅▄▄▄▄▅▄▅▅▅▄▅▄▅▅▅▅
train_loss,█▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁█▁██▁▁▁▁██▁█▁████▁▁█▁█▁█▁███████████▁██

0,1
Final Best Epoch,1.0
Final Best Val Accuracy,0.53
best_epoch,1.0
best_val_accuracy,0.53
roc_auc,0.50157
train_loss,0.69337
val_accuracy,0.53


[34m[1mwandb[0m: Agent Starting Run: t5vlfldq with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6926, Validation Accuracy: 0.5300, AUC: 0.8069
Epoch 2/200, Loss: 0.8484, Validation Accuracy: 0.5300, AUC: 0.1955
Epoch 3/200, Loss: 0.6909, Validation Accuracy: 0.5300, AUC: 0.8050
Epoch 4/200, Loss: 0.6921, Validation Accuracy: 0.5300, AUC: 0.7982
Epoch 5/200, Loss: 0.6841, Validation Accuracy: 0.5300, AUC: 0.8101
Epoch 6/200, Loss: 0.7640, Validation Accuracy: 0.5300, AUC: 0.8454
Epoch 7/200, Loss: 0.6891, Validation Accuracy: 0.4700, AUC: 0.3391
Epoch 8/200, Loss: 0.6973, Validation Accuracy: 0.4700, AUC: 0.8193
Epoch 9/200, Loss: 0.6996, Validation Accuracy: 0.4700, AUC: 0.7953
Epoch 10/200, Loss: 0.6987, Validation Accuracy: 0.4700, AUC: 0.7814
Epoch 11/200, Loss: 0.6933, Validation Accuracy: 0.5300, AUC: 0.7877
Epoch 12/200, Loss: 0.6756, Validation Accuracy: 0.5300, AUC: 0.8169
Epoch 13/200, Loss: 0.9922, Validation Accuracy: 0.5300, AUC: 0.8317
Epoch 14/200, Loss: 0.6687, Validation Accuracy: 0.4700, AUC: 0.8511
Epoch 15/200, Loss: 0.6968, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▅▅▆▆▇▇▇▇███████████████████
best_val_accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂███████████████████████
roc_auc,▁█▇▇█▇▇███▁▇███▇██▇▇██▇██▇█▄█▇▆▆▆▇▇▆▇▇▇▇
train_loss,▆▅▄▄▄▄▄▄▃▄▄▄▄▄▄▄▄▁▄▄▃▇▄▄▄▄▃▄▄▄▄▄▃█▄▄▄▃▄▄
val_accuracy,▇▇▇▁▁▇▇▁▁▁▁▇▇▇▇▁▇▅▇▇▇▁▇▇▇▇▇▁▇▇▇▇█▁▇▇▇▇▇▇

0,1
Final Best Epoch,106.0
Final Best Val Accuracy,0.78
best_epoch,106.0
best_val_accuracy,0.78
roc_auc,0.78365
train_loss,0.68702
val_accuracy,0.53


[34m[1mwandb[0m: Agent Starting Run: pmzynv4t with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6887, Validation Accuracy: 0.5300, AUC: 0.8075
Epoch 2/200, Loss: 1.3565, Validation Accuracy: 0.4700, AUC: 0.2130
Epoch 3/200, Loss: 1.0441, Validation Accuracy: 0.5300, AUC: 0.3380
Epoch 4/200, Loss: 0.6890, Validation Accuracy: 0.5300, AUC: 0.7729
Epoch 5/200, Loss: 0.6985, Validation Accuracy: 0.5300, AUC: 0.2808
Epoch 6/200, Loss: 0.6885, Validation Accuracy: 0.5300, AUC: 0.7971
Epoch 7/200, Loss: 0.6863, Validation Accuracy: 0.5300, AUC: 0.7958
Epoch 8/200, Loss: 0.6778, Validation Accuracy: 0.5300, AUC: 0.8258
Epoch 9/200, Loss: 0.6743, Validation Accuracy: 0.4700, AUC: 0.2305
Epoch 10/200, Loss: 0.7249, Validation Accuracy: 0.5300, AUC: 0.8813
Epoch 11/200, Loss: 0.6625, Validation Accuracy: 0.5300, AUC: 0.8712
Epoch 12/200, Loss: 0.6853, Validation Accuracy: 0.5333, AUC: 0.8807
Epoch 13/200, Loss: 0.6548, Validation Accuracy: 0.7000, AUC: 0.8728
Epoch 14/200, Loss: 0.6458, Validation Accuracy: 0.5533, AUC: 0.8753
Epoch 15/200, Loss: 0.6214, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆███
best_val_accuracy,▁▁▁▅▆▆▇▇████████████████████████████████
roc_auc,▁▇█▇▇███████████████████████████████████
train_loss,█▄▄▃▅▃▂▂▂▂▂▁▁▂▂▁▂▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▂▂▅▂▄▇▇▆█████▆▇███▆▇███████████████████

0,1
Final Best Epoch,187.0
Final Best Val Accuracy,0.86
best_epoch,187.0
best_val_accuracy,0.86
roc_auc,0.9249
train_loss,0.29191
val_accuracy,0.855


[34m[1mwandb[0m: Agent Starting Run: 4snffz3r with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0005
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6914, Validation Accuracy: 0.5300, AUC: 0.8264
Epoch 2/200, Loss: 0.6852, Validation Accuracy: 0.5300, AUC: 0.8684
Epoch 3/200, Loss: 0.6866, Validation Accuracy: 0.5300, AUC: 0.8719
Epoch 4/200, Loss: 0.6849, Validation Accuracy: 0.5300, AUC: 0.8684
Epoch 5/200, Loss: 0.6738, Validation Accuracy: 0.5300, AUC: 0.8662
Epoch 6/200, Loss: 0.6878, Validation Accuracy: 0.5300, AUC: 0.8799
Epoch 7/200, Loss: 0.6662, Validation Accuracy: 0.7267, AUC: 0.8811
Epoch 8/200, Loss: 0.6782, Validation Accuracy: 0.7533, AUC: 0.8700
Epoch 9/200, Loss: 0.6718, Validation Accuracy: 0.5567, AUC: 0.8722
Epoch 10/200, Loss: 0.6424, Validation Accuracy: 0.5300, AUC: 0.8771
Epoch 11/200, Loss: 0.6609, Validation Accuracy: 0.5733, AUC: 0.8668
Epoch 12/200, Loss: 0.6083, Validation Accuracy: 0.7600, AUC: 0.8548
Epoch 13/200, Loss: 0.6055, Validation Accuracy: 0.6667, AUC: 0.8636
Epoch 14/200, Loss: 0.5508, Validation Accuracy: 0.6167, AUC: 0.8708
Epoch 15/200, Loss: 0.6109, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▆▆▆▆▇▇▇████████████████████
best_val_accuracy,▁▁▅▅▆▆▇▇▇▇▇▇▇███████████████████████████
roc_auc,▃▄▃▁▄▄▅▅▅▆▆▇▇▇▇████████▆▇▇▇▇▇███████████
train_loss,▆▆▅▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val_accuracy,▁▁▅▄▆▆▇▇▇▇▇▇▇▇██▇█▇▇██▆▆▇▇██████████▇███

0,1
Final Best Epoch,102.0
Final Best Val Accuracy,0.90667
best_epoch,102.0
best_val_accuracy,0.90667
roc_auc,0.95887
train_loss,0.22536
val_accuracy,0.89167


[34m[1mwandb[0m: Agent Starting Run: lpou64d0 with config:
[34m[1mwandb[0m: 	architecture: RandomForest
[34m[1mwandb[0m: 	epochs: 200
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	seed: 42
[34m[1mwandb[0m: 	weight_decay: 0.0001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/200, Loss: 0.6993, Validation Accuracy: 0.5300, AUC: 0.7864
Epoch 2/200, Loss: 0.6824, Validation Accuracy: 0.4900, AUC: 0.4059
Epoch 3/200, Loss: 0.6955, Validation Accuracy: 0.5300, AUC: 0.8500
Epoch 4/200, Loss: 0.6716, Validation Accuracy: 0.5300, AUC: 0.8673
Epoch 5/200, Loss: 0.6605, Validation Accuracy: 0.6467, AUC: 0.8441
Epoch 6/200, Loss: 0.6581, Validation Accuracy: 0.5383, AUC: 0.8681
Epoch 7/200, Loss: 0.6498, Validation Accuracy: 0.6917, AUC: 0.8541
Epoch 8/200, Loss: 0.6233, Validation Accuracy: 0.7517, AUC: 0.8390
Epoch 9/200, Loss: 0.6207, Validation Accuracy: 0.7017, AUC: 0.8588
Epoch 10/200, Loss: 0.5513, Validation Accuracy: 0.7033, AUC: 0.8619
Epoch 11/200, Loss: 0.5384, Validation Accuracy: 0.5900, AUC: 0.7977
Epoch 12/200, Loss: 0.6917, Validation Accuracy: 0.7700, AUC: 0.8576
Epoch 13/200, Loss: 0.4718, Validation Accuracy: 0.6283, AUC: 0.8727
Epoch 14/200, Loss: 0.6953, Validation Accuracy: 0.7800, AUC: 0.8644
Epoch 15/200, Loss: 0.4668, Validation Accu

0,1
Final Best Epoch,▁
Final Best Val Accuracy,▁
best_epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆██████
best_val_accuracy,▁▃▅▆▆▆▇▇▇▇▇▇████████████████████████████
roc_auc,▁▇▇▇▇▇▇▇▇███████████████████████████████
train_loss,███▇▅▄▄▃▃▄▃▂▂▅▅▃▃▂▂▂▂▂▂▂▂▁▁▂▂▂▂▁▁▁▁▁▁▁▁▁
val_accuracy,▁▂▆▄▆▇▇▇▆▇▇▇▇▇▇▇▇██████████████████████▇

0,1
Final Best Epoch,182.0
Final Best Val Accuracy,0.915
best_epoch,182.0
best_val_accuracy,0.915
roc_auc,0.96186
train_loss,0.17294
val_accuracy,0.87667


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


Best hyperparameters saved to best_hyperparameters.txt with accuracy: 0.9150


## 추론

모델을 학습했으니 test 데이터를 예측하는 일만 남았습니다! 

test 데이터에도 train 데이터에 했던 전처리를 진행한 후, 추론을 진행하면 됩니다.(data leakage 에 주의하도록 합니다!)

In [74]:
test = pd.read_csv('test_final.csv')
test = test.drop(["id"],axis=1) 

test = label_encoder(test, label_map) #test data 라벨 인코딩

test[num_features] = scaler.transform(test[num_features]) #test 데이터 정규화

# 범주형 변수 인코딩
test_x_encoded = pd.get_dummies(test)

LogisticRegression.predict() 는 Sigmoid Function 의 출력 값을 출력합니다.

따라서 예측 값들을 threshold 0.5 기준으로 0 또는 1 의 값으로 변환해주어야 합니다. 

In [75]:
#pred = lr.predict(test)
#pred_bin = lr.to_bin(pred)

In [78]:
# 저장된 모델 가중치를 불러와서 예측하는 함수
def load_and_predict(model, input_data):
    # 입력 데이터의 차원을 사용하여 모델 초기화
    input_dim = input_data.shape[1]
    # 모델 선택
    if model == 'LogisticRegression':
        model = LogisticRegressionModel(input_dim)
    elif model == 'RandomForest':
        model = RandomForestModel(input_dim)
    elif model == 'XGBoostLike':
        model = XGBoostLikeModel(input_dim)
    else:
        raise ValueError(f"Unknown architecture: {model}")

    # 모델 가중치 로드
    model.load_state_dict(torch.load("best_overall_model.pth"))
    model.eval()  # 예측 모드로 전환

    # 입력 데이터를 텐서로 변환
    input_tensor = torch.tensor(input_data, dtype=torch.float32)

    # 예측 수행
    with torch.no_grad():
        outputs = model(input_tensor)
        outputs = torch.sigmoid(outputs)  # 이진 분류일 경우 시그모이드 함수 적용
        predictions = (outputs.squeeze() > 0.5).float()  # 0.5 기준으로 클래스 예측

    return predictions.numpy()

In [80]:
model = 'RandomForest'

sample_submission = pd.read_csv('sample_submission_final.csv')
pred_bin = load_and_predict(model, test_x_encoded.values)
sample_submission.target = pred_bin
sample_submission.to_csv("submission.csv",index=False)

  model.load_state_dict(torch.load("best_overall_model.pth"))
