9. Deep Neural Network (DNN)
    1. Начните с простой MLP из двух слоев и функции активации посредине
    2. Попробуйте добавить больше слоев
    3. Добавьте Batchnorm
    4. Попробуйте добавить Dropout с разными значениями
    5. Попробуйте разные размеры линейных слоев и разные функции активации
    6. Попробуйте разные оптимизаторы 
    7. Попробуйте использовать scheduler (например, косинусовый)
    8. Попробуйте разные параметры для обучения (LR, batchsize, n_epochs, loss_fn)
    9. (Задание со звездочкой) попройте добавить Embedding слой для категориальных фичей

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score
import config

torch.manual_seed(config.CONFIG['seed'])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [2]:
df = pd.read_csv(config.CONFIG['paths']['train_with_folds'])
TARGET_COL = config.CONFIG['validation']['target_column']
N_SPLITS = config.CONFIG['validation']['n_splits']

feature_cols = [c for c in df.columns if c not in [TARGET_COL, 'fold'] and pd.api.types.is_numeric_dtype(df[c])]
n_features = len(feature_cols)

cfg_dl = config.CONFIG['models']['dl']
print('Фич', n_features, feature_cols)

Фич 8 ['PassengerId', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']


In [3]:
class TitanicDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


def get_fold_data(df, fold, feature_cols, target_col):
    train_mask = df['fold'] != fold
    val_mask = df['fold'] == fold
    X_train = df.loc[train_mask, feature_cols].values
    y_train = df.loc[train_mask, target_col].values
    X_val = df.loc[val_mask, feature_cols].values
    y_val = df.loc[val_mask, target_col].values
    return X_train, y_train, X_val, y_val

In [4]:
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    for X_b, y_b in loader:
        X_b = X_b.to(device)
        y_b = y_b.to(device)
        optimizer.zero_grad()
        logits = model(X_b)
        loss = criterion(logits, y_b)
        loss.backward()
        optimizer.step()


def evaluate(model, loader, device):
    model.eval()
    preds = []
    targets = []
    with torch.no_grad():
        for X_b, y_b in loader:
            X_b = X_b.to(device)
            out = model(X_b)
            preds.append(out.argmax(1).cpu().numpy())
            targets.append(y_b.numpy())
    return accuracy_score(np.concatenate(targets), np.concatenate(preds))

## 1. Простой MLP: два слоя и функция активации посередине

In [5]:
class MLP2Layers(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.fc1 = nn.Linear(n_features, 64)
        self.fc2 = nn.Linear(64, 2)
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.fc2(x)
        return x

In [6]:
def run_cv_dl(model_class, model_kwargs, df, lr=None, batch_size=None, n_epochs=None, verbose=True):
    cfg = config.CONFIG['models']['dl']
    if lr is None: lr = cfg['lr']
    if batch_size is None: batch_size = cfg['batch_size']
    if n_epochs is None: n_epochs = cfg['n_epochs']
    scores = []
    for fold in range(N_SPLITS):
        X_tr, y_tr, X_val, y_val = get_fold_data(df, fold, feature_cols, TARGET_COL)
        train_ds = TitanicDataset(X_tr, y_tr)
        val_ds = TitanicDataset(X_val, y_val)
        train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_ds, batch_size=batch_size)

        model = model_class(**model_kwargs).to(device)
        criterion = nn.CrossEntropyLoss()
        opt_name = cfg.get('optimizer', 'adam')
        if opt_name == 'sgd':
            optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
        elif opt_name == 'adamw':
            optimizer = optim.AdamW(model.parameters(), lr=lr)
        else:
            optimizer = optim.Adam(model.parameters(), lr=lr)

        use_scheduler = cfg.get('scheduler') == 'cosine'
        if use_scheduler:
            scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs)
        for epoch in range(n_epochs):
            train_epoch(model, train_loader, criterion, optimizer, device)
            if use_scheduler:
                scheduler.step()
        acc = evaluate(model, val_loader, device)
        scores.append(acc)
        if verbose:
            print('Fold', fold, ':', round(acc, 4))
    print('Среднее accuracy:', round(np.mean(scores), 4), '+-', round(np.std(scores), 4))
    return scores

In [7]:
scores_mlp2 = run_cv_dl(MLP2Layers, {'n_features': n_features}, df)

Fold 0 : 0.7542
Fold 1 : 0.7303
Fold 2 : 0.7472
Fold 3 : 0.7584
Fold 4 : 0.7135
Среднее accuracy: 0.7407 +- 0.0167


## 2. MLP с большим числом слоёв

In [8]:
class MLPDeep(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.fc1 = nn.Linear(n_features, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc_out = nn.Linear(16, 2)
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.fc2(x)
        x = self.act(x)
        x = self.fc3(x)
        x = self.act(x)
        x = self.fc_out(x)
        return x

scores_deep = run_cv_dl(MLPDeep, {'n_features': n_features}, df)

Fold 0 : 0.7207
Fold 1 : 0.7416
Fold 2 : 0.7303
Fold 3 : 0.7584
Fold 4 : 0.7191
Среднее accuracy: 0.734 +- 0.0146


## 3. Добавление BatchNorm

In [9]:
class MLPWithBN(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.fc1 = nn.Linear(n_features, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)
        self.fc_out = nn.Linear(32, 2)
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.act(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.act(x)
        x = self.fc_out(x)
        return x


scores_bn = run_cv_dl(MLPWithBN, {'n_features': n_features}, df)

Fold 0 : 0.8436
Fold 1 : 0.7921
Fold 2 : 0.7978
Fold 3 : 0.7809
Fold 4 : 0.809
Среднее accuracy: 0.8047 +- 0.0215


## 4. Dropout с разными значениями

In [10]:
class MLPWithDropout(nn.Module):
    def __init__(self, n_features, dropout=0.2):
        super().__init__()
        self.fc1 = nn.Linear(n_features, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32)
        self.fc_out = nn.Linear(32, 2)
        self.act = nn.ReLU()
        self.drop = nn.Dropout(dropout)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc_out(x)
        return x


scores_drop02 = run_cv_dl(MLPWithDropout, {'n_features': n_features, 'dropout': cfg_dl['dropout']}, df)
scores_drop05 = run_cv_dl(MLPWithDropout, {'n_features': n_features, 'dropout': 0.5}, df)

Fold 0 : 0.8268
Fold 1 : 0.7978
Fold 2 : 0.7865
Fold 3 : 0.7865
Fold 4 : 0.8202
Среднее accuracy: 0.8036 +- 0.0169
Fold 0 : 0.7709
Fold 1 : 0.809
Fold 2 : 0.7697
Fold 3 : 0.7921
Fold 4 : 0.7697
Среднее accuracy: 0.7823 +- 0.0159


## 5. Разные размеры слоёв и функции активации

In [11]:
class MLPBig(nn.Module):
    def __init__(self, n_features, dropout=0.2):
        super().__init__()
        self.fc1 = nn.Linear(n_features, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc_out = nn.Linear(64, 2)
        self.act = nn.ReLU()
        self.drop = nn.Dropout(dropout)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc_out(x)
        return x


scores_big = run_cv_dl(MLPBig, {'n_features': n_features, 'dropout': cfg_dl['dropout']}, df)

Fold 0 : 0.8156
Fold 1 : 0.7921
Fold 2 : 0.7921
Fold 3 : 0.7809
Fold 4 : 0.8146
Среднее accuracy: 0.7991 +- 0.0137


## 6. Разные оптимизаторы

In [12]:
def run_cv_dl_optimizer(optim_name, df):
    cfg = config.CONFIG['models']['dl']
    lr = cfg['lr'] if optim_name != 'sgd' else 0.01
    scores = []
    for fold in range(N_SPLITS):
        X_tr, y_tr, X_val, y_val = get_fold_data(df, fold, feature_cols, TARGET_COL)
        train_loader = DataLoader(TitanicDataset(X_tr, y_tr), batch_size=cfg['batch_size'], shuffle=True)
        val_loader = DataLoader(TitanicDataset(X_val, y_val), batch_size=cfg['batch_size'])
        model = MLPWithDropout(n_features=n_features, dropout=cfg['dropout']).to(device)
        criterion = nn.CrossEntropyLoss()
        if optim_name == 'adam': opt = optim.Adam(model.parameters(), lr=lr)
        elif optim_name == 'sgd': opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
        else: opt = optim.AdamW(model.parameters(), lr=lr)

        for epoch in range(cfg['n_epochs']):
            train_epoch(model, train_loader, criterion, opt, device)
            
        acc = evaluate(model, val_loader, device)
        scores.append(acc)
        print('Fold', fold, ':', round(acc, 4))
    print(optim_name, 'среднее:', round(np.mean(scores), 4))
    return scores

scores_adam = run_cv_dl_optimizer('adam', df)
scores_sgd = run_cv_dl_optimizer('sgd', df)
scores_adamw = run_cv_dl_optimizer('adamw', df)

Fold 0 : 0.7989
Fold 1 : 0.7697
Fold 2 : 0.7753
Fold 3 : 0.7528
Fold 4 : 0.5899
adam среднее: 0.7373
Fold 0 : 0.6145
Fold 1 : 0.618
Fold 2 : 0.618
Fold 3 : 0.618
Fold 4 : 0.6124
sgd среднее: 0.6162
Fold 0 : 0.7933
Fold 1 : 0.7697
Fold 2 : 0.7809
Fold 3 : 0.8034
Fold 4 : 0.7921
adamw среднее: 0.7879


## 7. Scheduler (косинусный)

In [13]:
def run_cv_dl_scheduler(df):
    cfg = config.CONFIG['models']['dl']
    scores = []
    for fold in range(N_SPLITS):
        X_tr, y_tr, X_val, y_val = get_fold_data(df, fold, feature_cols, TARGET_COL)
        train_loader = DataLoader(TitanicDataset(X_tr, y_tr), batch_size=cfg['batch_size'], shuffle=True)
        val_loader = DataLoader(TitanicDataset(X_val, y_val), batch_size=cfg['batch_size'])
        model = MLPWithDropout(n_features=n_features, dropout=cfg['dropout']).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=cfg['lr'])
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=cfg['n_epochs'])

        for epoch in range(cfg['n_epochs']):
            train_epoch(model, train_loader, criterion, optimizer, device)
            scheduler.step()
        
        acc = evaluate(model, val_loader, device)
        scores.append(acc)
        print('Fold', fold, ':', round(acc, 4))
    print('Cosine среднее:', round(np.mean(scores), 4))
    return scores

scores_cosine = run_cv_dl_scheduler(df)

Fold 0 : 0.8212
Fold 1 : 0.7921
Fold 2 : 0.7921
Fold 3 : 0.7978
Fold 4 : 0.7921
Cosine среднее: 0.7991


## 8. Разные параметры обучения (LR, batch_size, n_epochs, loss_fn)

In [14]:
scores_lr_small = run_cv_dl(MLPWithDropout, {'n_features': n_features, 'dropout': cfg_dl['dropout']}, df, lr=1e-4)
scores_bs64 = run_cv_dl(MLPWithDropout, {'n_features': n_features, 'dropout': cfg_dl['dropout']}, df, batch_size=64)
scores_epochs100 = run_cv_dl(MLPWithDropout, {'n_features': n_features, 'dropout': cfg_dl['dropout']}, df, n_epochs=100)

Fold 0 : 0.6145
Fold 1 : 0.7022
Fold 2 : 0.6011
Fold 3 : 0.6404
Fold 4 : 0.6292
Среднее accuracy: 0.6375 +- 0.035
Fold 0 : 0.838
Fold 1 : 0.809
Fold 2 : 0.7809
Fold 3 : 0.7865
Fold 4 : 0.8258
Среднее accuracy: 0.808 +- 0.022
Fold 0 : 0.7877
Fold 1 : 0.7865
Fold 2 : 0.7753
Fold 3 : 0.8034
Fold 4 : 0.8315
Среднее accuracy: 0.7969 +- 0.0195


In [15]:
dl_results = pd.DataFrame({
    'model': ['MLP2', 'MLPDeep', 'MLP+BN', 'Dropout0.2', 'Dropout0.5', 'Big', 'Cosine'],
    'mean_accuracy': [
        np.mean(scores_mlp2), np.mean(scores_deep), np.mean(scores_bn),
        np.mean(scores_drop02), np.mean(scores_drop05), np.mean(scores_big), np.mean(scores_cosine)
    ],
    'std_accuracy': [
        np.std(scores_mlp2), np.std(scores_deep), np.std(scores_bn),
        np.std(scores_drop02), np.std(scores_drop05), np.std(scores_big), np.std(scores_cosine)
    ],
})
dl_results = dl_results.sort_values('mean_accuracy', ascending=False)
print(dl_results)


path_dl = config.CONFIG['paths'].get('dl_results', config.CONFIG['paths']['checkpoint_dir'] / 'dl_results.csv')
dl_results[['model', 'mean_accuracy', 'std_accuracy']].to_csv(path_dl, index=False)
print('Сохранено:', path_dl)

        model  mean_accuracy  std_accuracy
2      MLP+BN       0.804670      0.021458
3  Dropout0.2       0.803565      0.016930
5         Big       0.799084      0.013729
6      Cosine       0.799077      0.011288
4  Dropout0.5       0.782280      0.015857
0        MLP2       0.740726      0.016652
1     MLPDeep       0.734022      0.014609
Сохранено: C:\newTry2\classicMLpractice\ProjectKaggle\checkpoints\dl_results.csv
