# Neural Networks

In [81]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import (LinearRegression , LogisticRegression , Lasso)
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from ISLP import load_data
from ISLP.models import ModelSpec as MS 
from sklearn.model_selection import (train_test_split , GridSearchCV)

from ISLP.models import (ModelSpec as MS, summarize , poly)
import statsmodels.api as sm

import torch.nn.functional as F #for one hot encoding  

import torch
from torch import nn
from torch.optim import RMSprop
from torch.utils.data import TensorDataset
from torchmetrics import (MeanAbsoluteError , R2Score, MeanSquaredError)
from torchinfo import summary
from torchvision.io import read_image

from pytorch_lightning import Trainer
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning import seed_everything
seed_everything(0, workers=True) 
torch.use_deterministic_algorithms(True, warn_only=True)

from ISLP.torch import (SimpleDataModule , SimpleModule , ErrorTracker , rec_num_workers)

from ISLP.torch.imdb import (load_lookup ,load_tensor , load_sparse ,load_sequential)
from sklearn.metrics import accuracy_score 

Seed set to 0


Load data and standardize

In [None]:
import os
PathToRepo = os.path.normpath(os.getcwd() + os.sep + os.pardir)
Df = pd.read_csv(PathToRepo + '\\Data\\Faellesdata_cleaned.csv')

X = Df.drop(columns = 'IsMigratorInt')
scaler = StandardScaler(with_mean=True, with_std=True)
#Scale all columns in X
X_scaled = scaler.fit_transform(X)


x_train, x_test, y_train, y_test = train_test_split(X_scaled, Df['IsMigratorInt'], test_size = 0.2, random_state = 42)

Prepare the tensors

In [83]:
x_train_t = torch.tensor(x_train.astype(np.float32))
y_train_t = torch.tensor(y_train.to_numpy().astype(np.float32))
train_set = TensorDataset(x_train_t , y_train_t)

x_test_t = torch.tensor(x_test.astype(np.float32))
y_test_t = torch.tensor(y_test.to_numpy().astype(np.float32))
test_set = TensorDataset(x_test_t , y_test_t)

Now lets set up our neural network.

In [84]:
class modelDenseNN(nn.Module):


    def __init__(self, input_size): 
        super(modelDenseNN, self).__init__()         #This calls the constructor of the parent class (nn.Module) to initialize its internal mechanics.
        self.flatten = nn.Flatten()                  #This flattens the input tensor to a 1D tensor.
        self.sequential = nn.Sequential(             #sequence of layers that will be executed in order.
            nn.Linear(input_size, 64),               #Fully connected layer with input_size neurons as input and 10 neurons as output.
            nn.ReLU(),                               #Activation function using ReLU  
            nn.Dropout(0.4),                         #Dropout layer that randomly sets 40% of the neurons to zero during training to prevent overfitting.
            nn.Linear(64, 10),   
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(10, 2), 
            nn.Softmax(dim=1))                        #Fully connected layer with 50 neurons as input and 1 neuron as output. 
                                                      #Activation function using softmax
            
    
    #Defines the forward computation logic for the model. It determines how input data flows through the layers.
    def forward(self, x):
        x = self.flatten(x)
        return self.sequential(x)

In [85]:
migrator_model_nn = modelDenseNN(x_train.shape[1])  
summary(migrator_model_nn, input_size = x_train.shape, col_names=['input_size', 'output_size', 'num_params'])

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #
modelDenseNN                             [7999, 15]                [7999, 2]                 --
├─Flatten: 1-1                           [7999, 15]                [7999, 15]                --
├─Sequential: 1-2                        [7999, 15]                [7999, 2]                 --
│    └─Linear: 2-1                       [7999, 15]                [7999, 64]                1,024
│    └─ReLU: 2-2                         [7999, 64]                [7999, 64]                --
│    └─Dropout: 2-3                      [7999, 64]                [7999, 64]                --
│    └─Linear: 2-4                       [7999, 64]                [7999, 10]                650
│    └─ReLU: 2-5                         [7999, 10]                [7999, 10]                --
│    └─Dropout: 2-6                      [7999, 10]                [7999, 10]                --
│    └─Linear: 2-7             

We fit the network

In [None]:
max_num_workers = rec_num_workers()

migrator_dm = SimpleDataModule(train_set, test_set, batch_size=32, num_workers=min(4, max_num_workers), validation=test_set)

migrator_module = SimpleModule.classification(migrator_model_nn, num_classes=2)

migrator_logger = CSVLogger('logs', name='migrator')

migrator_trainer = Trainer(deterministic=True, max_epochs=100, log_every_n_steps=10, logger=migrator_logger, callbacks=[ErrorTracker()])
migrator_trainer.fit(migrator_module, datamodule=migrator_dm)   #datamodule the part connected to train and test data via himigrator_dmt_dm

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



  | Name  | Type             | Params | Mode 
---------------------------------------------------
0 | model | modelDenseNN     | 1.7 K  | train
1 | loss  | CrossEntropyLoss | 0      | train
---------------------------------------------------
1.7 K     Trainable params
0         Non-trainable params
1.7 K     Total params
0.007     Total estimated model params size (MB)
12        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.


Now deactivate the dropout layers and test it

In [89]:
migrator_model_nn.eval()
y_hat = migrator_model_nn(x_test_t) 
y_hat = torch.argmax(y_hat, dim=1)
y_hat = y_hat.detach().numpy()
print('Test accuracy:', accuracy_score(y_test, y_hat))

Test accuracy: 0.849
