In [1]:
!git clone https://github.com/RiskModellingResearch/DeepLearning_Autumn22.git

Cloning into 'DeepLearning_Autumn22'...
remote: Enumerating objects: 71, done.[K
remote: Counting objects: 100% (71/71), done.[K
remote: Compressing objects: 100% (58/58), done.[K
remote: Total 71 (delta 20), reused 62 (delta 11), pack-reused 0[K
Unpacking objects: 100% (71/71), done.


In [None]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-0.7.2-py3-none-any.whl (397 kB)
[?25l[K     |▉                               | 10 kB 16.0 MB/s eta 0:00:01[K     |█▋                              | 20 kB 21.4 MB/s eta 0:00:01[K     |██▌                             | 30 kB 14.6 MB/s eta 0:00:01[K     |███▎                            | 40 kB 10.7 MB/s eta 0:00:01[K     |████▏                           | 51 kB 3.8 MB/s eta 0:00:01[K     |█████                           | 61 kB 4.5 MB/s eta 0:00:01[K     |█████▊                          | 71 kB 4.8 MB/s eta 0:00:01[K     |██████▋                         | 81 kB 4.5 MB/s eta 0:00:01[K     |███████▍                        | 92 kB 5.0 MB/s eta 0:00:01[K     |████████▎                       | 102 kB 4.3 MB/s eta 0:00:01[K     |█████████                       | 112 kB 4.3 MB/s eta 0:00:01[K     |██████████                      | 122 kB 4.3 MB/s eta 0:00:01[K     |██████████▊                     | 133 kB 4.3 MB/s eta 

In [None]:
import numpy as np
import pandas as pd
import pickle

import torch
print(torch.__version__)

import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchmetrics import Accuracy
from torch.utils.data import Dataset, DataLoader

1.10.0+cu111


In [None]:
class CustomDataset(Dataset):

    def __init__(self, dataset_path):
        with open(dataset_path, 'rb') as f:
            data, self.nrof_emb_categories, self.unique_categories = pickle.load(f)

        self.embedding_columns = ['workclass_cat', 'education_cat', 'marital-status_cat', 'occupation_cat',
                                  'relationship_cat', 'race_cat', 'sex_cat', 'native-country_cat']
        self.nrof_emb_categories = {key + '_cat': val for key, val in self.nrof_emb_categories.items()}
        self.numeric_columns = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']

        self.columns = self.embedding_columns + self.numeric_columns
        self.X = data[self.columns].reset_index(drop=True)
        self.y = np.asarray([0 if el == '<50k' else 1 for el in data['salary'].values], dtype=np.int32)
        return

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        row = self.X.take([idx], axis=0)
        row = {col: torch.tensor(row[col].values, dtype=torch.float32) for i, col in enumerate(self.columns)}
        return row, np.float32(self.y[idx])

In [None]:
class DenseFeatureLayer(nn.Module):

    def __init__(self, nrof_cat, emb_dim, emb_columns, numeric_columns):
        super(DenseFeatureLayer, self).__init__()

        self.emb_columns = emb_columns
        self.numeric_columns = numeric_columns
        self.numeric_feature_bn = torch.nn.BatchNorm1d(len(numeric_columns))

        input_size = len(emb_columns) + len(numeric_columns)
        self.first_feature_bn = torch.nn.BatchNorm1d(input_size)
        self.second_feature_bn = torch.nn.BatchNorm1d(input_size)

        # first order feature interactions
        self.first_order_embd = nn.ModuleDict()
        for i, col in enumerate(self.emb_columns):
            self.first_order_embd[col] = torch.nn.Embedding(nrof_cat[col], 1)
        self.first_order_scalar = nn.ParameterDict({})
        for i, col in enumerate(numeric_columns):
            self.first_order_scalar[col] = nn.Parameter(torch.nn.init.xavier_uniform_(torch.empty(1,1)))

        # second order feature interactions
        self.second_order_embd = nn.ModuleDict({})
        for i, col in enumerate(self.emb_columns):
            self.second_order_embd[col] = torch.nn.Embedding(nrof_cat[col], emb_dim)
        self.second_order_scalar = nn.ParameterDict({})
        for i, col in enumerate(numeric_columns):
            self.second_order_scalar[col] = nn.Parameter(torch.nn.init.xavier_uniform_(torch.empty(emb_dim, 1)))
        return

    def forward(self, input_data):
        numeric_features = torch.stack([input_data[col] for col in self.numeric_columns], dim=1)
        numeric_features = self.numeric_feature_bn(numeric_features)

        # first order feature interactions
        # categorical_columns
        first_order_embd_output = None
        for i, col in enumerate(self.emb_columns):
            if first_order_embd_output is None:
                first_order_embd_output = self.first_order_embd[col](
                    torch.tensor(input_data[self.emb_columns[i]], dtype=torch.int64))
            else:
                first_order_embd_output = torch.cat(
                    [first_order_embd_output, self.first_order_embd[col](
                        torch.tensor(input_data[self.emb_columns[i]], dtype=torch.int64))], dim=1)

        # numeric_columns
        first_order_embd_output = torch.squeeze(first_order_embd_output, dim=2)
        for i, col in enumerate(self.numeric_columns):
            if first_order_embd_output is None:
                first_order_embd_output = torch.mul(numeric_features[i], self.first_order_scalar[col])
            else:
                first_order_embd_output = torch.cat(
                    [first_order_embd_output, torch.mul(numeric_features[:, i], self.first_order_scalar[col])], dim=1)
                
        # second order feature interactions
        # categorical_columns
        second_order_embd_output = None
        for i, col in enumerate(self.emb_columns):
            if second_order_embd_output is None:
                second_order_embd_output = self.second_order_embd[col](
                    torch.tensor(input_data[self.emb_columns[i]], dtype=torch.int64))
            else:
                second_order_embd_output = torch.cat(
                    [second_order_embd_output, self.second_order_embd[col](
                        torch.tensor(input_data[self.emb_columns[i]], dtype=torch.int64))], dim=1)

        # numeric_columns
        for i, col in enumerate(self.numeric_columns):
            if second_order_embd_output is None:
                second_order_embd_output = torch.mul(numeric_features[i], self.second_order_scalar[col])
            else:
                second_order_embd_output = torch.cat(
                    [second_order_embd_output, torch.unsqueeze(torch.mul(
                        numeric_features[:, i], torch.squeeze(
                            torch.stack([self.second_order_scalar[col]] * len(numeric_features)), 2)), 1)], dim=1)

        first_order_embd_output = self.first_feature_bn(first_order_embd_output)
        second_order_embd_output = self.second_feature_bn(second_order_embd_output)
        return first_order_embd_output, second_order_embd_output

In [None]:
class FMLayer(nn.Module):

    def __init__(self, ):
        super(FMLayer, self).__init__()
        return

    def forward(self, first_order_embd, second_order_embd):
        # sum_square part
        summed_features_embd = torch.sum(second_order_embd, dim=1)
        summed_features_embd_square = torch.square(summed_features_embd)

        # square_sum part
        squared_features_embd = torch.square(second_order_embd)
        squared_sum_features_embd = torch.sum(squared_features_embd, dim=1)

        # second order
        second_order = 0.5 * torch.sub(summed_features_embd_square, squared_sum_features_embd)
        return first_order_embd, second_order

In [None]:
class MLPLayer(nn.Module):

    def __init__(self, input_size, nrof_layers, nrof_neurons, output_size):
        super(MLPLayer, self).__init__()
        all_input_sizes = [input_size]
        [all_input_sizes.append(nrof_neurons) for i in range(nrof_layers - 1)]
        list_layers = []
        [list_layers.extend([torch.nn.Linear(all_input_sizes[i], nrof_neurons),
                             torch.nn.BatchNorm1d(nrof_neurons),
                             torch.nn.ReLU()]) for i in range(nrof_layers - 1)]
        self.deep_block = torch.nn.Sequential(*list_layers)
        self.output_layer = torch.nn.Linear(nrof_neurons, output_size)

    def init_weights(self, m):
        if type(m) == nn.Linear:
            torch.nn.init.xavier_uniform(m.weight)
            # m.bias.data.fill_(0.001)

    def forward(self, input_data):
        output = self.deep_block(input_data)
        output = self.output_layer(output)
        return output

In [None]:
class DeepFMNet(nn.Module):

    def __init__(self, nrof_cat, emb_dim, emb_columns, numeric_columns,
                 nrof_layers, nrof_neurons, output_size, nrof_out_classes):
        super(DeepFMNet, self).__init__()
        self.emb_dim = emb_dim
        self.emb_columns = emb_columns
        self.numeric_columns = numeric_columns

        self.features_embd = DenseFeatureLayer(nrof_cat, emb_dim, emb_columns, numeric_columns)
        self.FM = FMLayer()

        input_size = (len(emb_columns) + len(numeric_columns)) * emb_dim
        self.MLP = MLPLayer(input_size, nrof_layers, nrof_neurons, output_size)

        input_size = len(emb_columns) + len(numeric_columns) + emb_dim + output_size
        self.dense_layer = nn.Linear(input_size, nrof_out_classes)

    def forward(self, input_data):
        first_order_embd, second_order_embd = self.features_embd(input_data)
        FM_first_order, FM_second_order = self.FM(first_order_embd, second_order_embd)

        second_order_embd = torch.reshape(second_order_embd, 
                                          [-1, (len(self.emb_columns) + len(self.numeric_columns)) * self.emb_dim])
        Deep = self.MLP(second_order_embd)

        concat_output = torch.cat([FM_first_order, FM_second_order, Deep], dim=1)
        output = self.dense_layer(concat_output)
        output = torch.squeeze(output, 1)
        return output

In [None]:
EPOCHS = 500
EMBEDDING_SIZE = 5
BATCH_SIZE = 512
NROF_LAYERS = 3
NROF_NEURONS = 50
DEEP_OUTPUT_SIZE = 50
NROF_OUT_CLASSES = 1
LEARNING_RATE = 3e-4
TRAIN_PATH = 'DeepLearning_Autumn22/week_05/data/train_adult.pickle'
VALID_PATH = 'DeepLearning_Autumn22/week_05/data/valid_adult.pickle'

In [None]:
class DeepFM:
    def __init__(self):
        self.train_dataset = CustomDataset(TRAIN_PATH)
        self.train_loader = DataLoader(dataset=self.train_dataset, batch_size=BATCH_SIZE, shuffle=True)
        self.build_model()
        self.log_params()
        self.train_writer = SummaryWriter('./logs/train')
        self.valid_writer = SummaryWriter('./logs/valid')
        return

    def build_model(self):
        self.network = DeepFMNet(nrof_cat=self.train_dataset.nrof_emb_categories, 
                                 emb_dim=EMBEDDING_SIZE,
                                 emb_columns=self.train_dataset.embedding_columns,
                                 numeric_columns=self.train_dataset.numeric_columns,
                                 nrof_layers=NROF_LAYERS, nrof_neurons=NROF_NEURONS,
                                 output_size=DEEP_OUTPUT_SIZE,
                                 nrof_out_classes=NROF_OUT_CLASSES)

        self.loss = torch.nn.BCEWithLogitsLoss()
        self.accuracy = Accuracy()
        self.optimizer = optim.Adam(self.network.parameters(), lr=LEARNING_RATE)
        return

    def log_params(self):
        return

    def load_model(self, restore_path=''):
        if restore_path == '':
            self.step = 0
        else:
            pass
        return

    def run_train(self):
        print('Run train ...')
        self.load_model()
        for epoch in range(EPOCHS):
            self.network.train()
            
            for features, label in self.train_loader:
                # Reset gradients
                self.optimizer.zero_grad()
                output = self.network(features)

                # Calculate error and backpropagate
                loss = self.loss(output, label)
                output = torch.sigmoid(output)
                loss.backward()
                acc = self.accuracy(output, torch.tensor(label, dtype=torch.int64)).item()

                # Update weights with gradients
                self.optimizer.step()
                self.train_writer.add_scalar('CrossEntropyLoss', loss, self.step)
                self.train_writer.add_scalar('Accuracy', acc, self.step)
                self.step += 1

                if self.step % 50 == 0:
                    print('EPOCH %d STEP %d : train_loss: %f train_acc: %f' %(epoch, self.step, loss.item(), acc))

            # self.train_writer.add_histogram('hidden_layer', self.network.linear1.weight.data, self.step)
            # Run validation
            #TODO
        return

In [None]:
deep_fm = DeepFM()
deep_fm.run_train()

Run train ...




EPOCH 0 STEP 50 : train_loss: nan train_acc: 0.748047
EPOCH 1 STEP 100 : train_loss: nan train_acc: 0.765625
EPOCH 2 STEP 150 : train_loss: nan train_acc: 0.783203
EPOCH 3 STEP 200 : train_loss: nan train_acc: 0.759766
EPOCH 4 STEP 250 : train_loss: nan train_acc: 0.761719
EPOCH 5 STEP 300 : train_loss: nan train_acc: 0.742188
EPOCH 6 STEP 350 : train_loss: nan train_acc: 0.753906
EPOCH 7 STEP 400 : train_loss: nan train_acc: 0.744141
EPOCH 8 STEP 450 : train_loss: nan train_acc: 0.720703
EPOCH 9 STEP 500 : train_loss: nan train_acc: 0.744141
EPOCH 10 STEP 550 : train_loss: nan train_acc: 0.748047
EPOCH 11 STEP 600 : train_loss: nan train_acc: 0.746094
EPOCH 12 STEP 650 : train_loss: nan train_acc: 0.755859
EPOCH 13 STEP 700 : train_loss: nan train_acc: 0.724609
EPOCH 14 STEP 750 : train_loss: nan train_acc: 0.744141
EPOCH 15 STEP 800 : train_loss: nan train_acc: 0.710938
EPOCH 16 STEP 850 : train_loss: nan train_acc: 0.718750
EPOCH 17 STEP 900 : train_loss: nan train_acc: 0.750000
EPO

# Домашка
1. Пофиксить лосс nan (поиграться с фичами в CustomDataset)
2. Добавить в run_train проверку на valid