In [112]:
import torch
import os
import pandas as pd
import torch.nn as nn
from torch.utils.data import random_split, DataLoader, TensorDataset
import torch.nn.functional as F
import numpy as np
import random
import torch.optim as optim
from torch.optim import Adam
from sklearn.preprocessing import LabelEncoder, StandardScaler, LabelBinarizer,OneHotEncoder
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings('ignore')

## Data pre (for Loan Dataset)

In [4]:
def featureEnginering(df, train= False):
  df = df.replace({"10+ years": 10, "9 years": 9,"8 years": 8,"7 years": 7,
                                  "6 years": 6,"5 years": 5,"4 years": 4,"3 years": 3,
                                  "2 years": 2,"1 year": 1, "< 1 year": 0})

  df = df.drop(['verification_status', 'pymnt_plan', 'addr_state',
           'initial_list_status', 'hardship_flag', 'disbursement_method', 'debt_settlement_flag',
           # drop columns not issued to borrowers
           'funded_amnt_inv', 'total_pymnt_inv', 'out_prncp_inv', 'funded_amnt_inv', 'funded_amnt'], axis=1)

  # label_encoding
  if train == True:
    columnsTolabel = ['grade','application_type','term','loan_status']
  else:
    columnsTolabel = ['application_type' , 'term','loan_status']

  for column in columnsTolabel:
    label_encoder = LabelEncoder()
    df[column] = label_encoder.fit_transform(df[column])

  # onehot encoding
  encoder = OneHotEncoder(sparse=False)
  columnsToOnehot = ['purpose','home_ownership']
  for column in columnsToOnehot:
    encoded = encoder.fit_transform(df[[column]])
    encoded = pd.DataFrame(encoded, columns=encoder.get_feature_names_out())
    df = pd.concat([df, encoded], axis=1)
    df.drop(column, axis=1, inplace=True)

  # Date-Time Features
  df["credit_history_length"] = (df['issue_d_year'] - df['earliest_cr_line_year']) * 12 + (df['issue_d_month'] - df['earliest_cr_line_month'])
  df["time_since_last_payment"] = -(df['issue_d_year'] - df['last_pymnt_d_year']) * 12 + (df['issue_d_month'] - df['last_pymnt_d_month'])
  df["time_since_last_credit_pull"] = -(df['issue_d_year'] - df['last_credit_pull_d_year']) * 12 + (df['issue_d_month'] - df['last_credit_pull_d_month'])
  df["loan_age_at_last_payment"] = (df['last_pymnt_d_year'] - df['issue_d_year']) * 12 + (df['last_pymnt_d_month'] - df['issue_d_month'])

  return df

train = pd.read_csv('/content/loan-10k.lrn.csv')
train = featureEnginering(train,True)

feature_names = list(
    filter(lambda x: x not in ['grade', 'ID', 'kfold'], train.columns))



## Dataset

In [None]:
class Dataset:
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

X = train[feature_names].values
y = train['grade'].values
dataset = Dataset(X, y)

# model

In [137]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, output_size, layer_sizes, activation_funcs):
        super(SimpleNN, self).__init__()

        self.layers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()
        self.dropouts = nn.ModuleList()
        self.activation_funcs = activation_funcs

        self.batch_norm1 = nn.BatchNorm1d(input_size)
        # input layer
        self.layers.append(nn.utils.weight_norm(nn.Linear(input_size, layer_sizes[0])))
        self.batch_norms.append(nn.BatchNorm1d(layer_sizes[0]))
        self.dropouts.append(nn.Dropout(0.2))

        # Hidden layers
        for i in range(1, len(layer_sizes)):
            self.layers.append(nn.utils.weight_norm(nn.Linear(layer_sizes[i-1], layer_sizes[i])))
            if i < len(layer_sizes) - 1:
                self.batch_norms.append(nn.BatchNorm1d(layer_sizes[i]))
            self.dropouts.append(nn.Dropout(0.5))

        # Output layer
        self.output = nn.utils.weight_norm(nn.Linear(layer_sizes[-1], output_size))


    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.dropouts[0](x)
        x = self.layers[0](x)
        x = self.activation_funcs[0](x)

        for i in range(1, len(self.layers)):
            if i < len(self.batch_norms):
                x = self.batch_norms[i](x)
            x = self.dropouts[i](x)
            x = self.layers[i](x)
            if i < len(self.activation_funcs):
                x = self.activation_funcs[i](x)

        x = self.output(x)
        return x

## Search space

In [155]:
def grid_search(configs, dataset):

    best_config = None

    for config in configs:
        best_score = 0
        layer_size = config['layer_sizes']
        activation = config['activations']

        model = SimpleNN(
            input_size = INPUT_SIZE, output_size = OUTPUT_SIZE,
            layer_sizes = config['layer_sizes'],  activation_funcs = config['activations']
        )

        print(f"Layer sizes: {layer_size}, Activations: {[type(act).__name__ for act in activation]}")

        score, loss = train_and_evaluate_model(model, dataset)

        if score > best_score:
            best_score = score
            best_config = config

    print(f"Best F1 Score: {best_score:.2f}, Layer sizes: {layer_size}, Activations: {[type(act).__name__ for act in activation]}")
    return best_config, best_score



# Run the Grid Search
configs = [
    {'layer_sizes': [64, 64], 'activations': [nn.ReLU(), nn.ReLU()]},
    {'layer_sizes': [128, 64], 'activations': [nn.Tanh(), nn.Tanh()]},
    {'layer_sizes': [64, 64, 32], 'activations': [nn.ReLU(), nn.Tanh(), nn.ReLU()]}]

best_configuration, best_score = grid_search(configs, dataset)

Layer sizes: [64, 64], Activations: ['ReLU', 'ReLU']
Completed training fold 0, and best f1 score is 0.82
Completed training fold 1, and best f1 score is 0.81
Completed training fold 2, and best f1 score is 0.77
Completed training fold 3, and best f1 score is 0.70
Completed training fold 4, and best f1 score is 0.69
Layer sizes: [128, 64], Activations: ['Tanh', 'Tanh']
Completed training fold 0, and best f1 score is 0.83
Completed training fold 1, and best f1 score is 0.84
Completed training fold 2, and best f1 score is 0.85
Completed training fold 3, and best f1 score is 0.83
Completed training fold 4, and best f1 score is 0.84
Layer sizes: [64, 64, 32], Activations: ['ReLU', 'Tanh', 'ReLU']
Completed training fold 0, and best f1 score is 0.81
Completed training fold 1, and best f1 score is 0.83
Completed training fold 2, and best f1 score is 0.85
Completed training fold 3, and best f1 score is 0.84
Completed training fold 4, and best f1 score is 0.84
Best F1 Score: 0.83, Layer sizes:

In [None]:
def local_search(base_config, dataset, iterations=10):
    def get_neighbors(configuration):
      neighbors = []
      # Example: Add a layer
      neighbors.append({'layer_sizes': configuration['layer_sizes'] + [64],
                        'activations': configuration['activations'] + [nn.ReLU()]})
      # Example: Remove a layer
      if len(configuration['layer_sizes']) > 1:
          neighbors.append({'layer_sizes': configuration['layer_sizes'][:-1],
                            'activations': configuration['activations'][:-1]})
      return neighbors

    def get_model(congfig):
      layer_size = congfig['layer_sizes']
      activation = congfig['activations']
      model = SimpleNN( INPUT_SIZE, OUTPUT_SIZE, layer_size, activation)
      return model

    current_configuration = base_config
    model = get_model(current_configuration)
    current_score, current_loss = train_and_evaluate_model(model, dataset)

    while True:
        neighbors = get_neighbors(current_configuration)
        any_improvement = False

        for neighbor in neighbors:
          neighbor = get_model(neighbor)
          score , loss = train_and_evaluate_model(neighbor, dataset)

          if score > current_score:
              current_configuration = neighbor
              current_score = score
              any_improvement = True
              break  # Move to the first improving neighbor

        if not any_improvement:
            break  # No improvement found, stop the search

    return current_configuration, current_score


# Run the Local Search
base_config = {'layer_sizes': [64, 64], 'activations': [nn.ReLU(), nn.ReLU()]}
best_configuration, best_score = local_search(base_config, dataset)


## Train

In [156]:
# HyperParameters

INPUT_SIZE=len(train[feature_names].columns)
OUTPUT_SIZE=len(train['grade'].unique())

DEVICE = ('cuda' if torch.cuda.is_available() else 'cpu')
EPOCHS = 25
BATCH_SIZE = 128
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-5
EARLY_STOPPING_STEPS = 10
EARLY_STOP = False

In [None]:
def train_fn(model, optimizer, scheduler, loss_fn, dataloader):
    model.train()
    final_loss = 0

    for data, target in dataloader:
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output, target.long())
        loss.backward()
        optimizer.step()
        scheduler.step()

        final_loss += loss.item()
    final_loss /= len(dataloader)

    return final_loss

def valid_fn(model, loss_fn, dataloader):
    model.eval()
    final_loss = 0
    final_f1_score = 0

    for data, target in dataloader:
        outputs = model(data)
        loss = loss_fn(outputs, target.long())
        final_loss += loss.item()

        preds = outputs.argmax(dim=1).detach().cpu().numpy()
        final_f1_score += (f1_score(target.cpu().numpy(), preds.round(), average='weighted'))

    final_f1_score /= len(dataloader)
    final_loss /= len(dataloader)

    return final_loss, final_f1_score

In [150]:
# Function to create and train a network
def train_and_evaluate_model(model, dataset, n_splits=5):


    labels = [label for _, label in dataset]
    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True)

    for fold, (train_ids, valid_ids) in enumerate(kfold.split(range(len(dataset)), labels)):

        # Splitting the dataset
        train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
        valid_subsampler = torch.utils.data.SubsetRandomSampler(valid_ids)

        # Creating data loaders
        train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=train_subsampler)
        valid_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=valid_subsampler)

        # initialization
        model.to(DEVICE)

        optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e3,
                                              max_lr=1e-2, epochs=EPOCHS, steps_per_epoch=len(train_loader))
        loss_fn = nn.CrossEntropyLoss()
        early_stopping_steps = EARLY_STOPPING_STEPS
        early_step = 0


        # Training loop
        best_loss = np.inf
        for epoch in range(EPOCHS):

            train_loss = train_fn(model, optimizer,scheduler, loss_fn, train_loader)
            valid_loss, score = valid_fn(model, loss_fn, valid_loader)

            if valid_loss < best_loss:
              best_loss = valid_loss
              f1_score = score
              torch.save(model.state_dict(), f"fold{fold}_.pth")

            elif(EARLY_STOP == True):
              early_step += 1
              if (early_step >= early_stopping_steps):
                  break


        print(f"Completed training fold {fold}, and best f1 score is {f1_score:.2f}")

    return best_score, best_loss