In [14]:
import os
import torch
print("torch module path:", torch.__file__)
print(torch.cuda.is_available())
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import make_scorer, accuracy_score, roc_auc_score, log_loss

torch module path: c:\Users\Daria\anaconda3\Lib\site-packages\torch\__init__.py
False


In [15]:

data = pd.read_csv('Data/Final_data/final_depr_one_scaled')

data = data.iloc[:, 1:]
data.head()

Unnamed: 0,821,328813,823,817,818,239935,28116,351091,28118,1161942,...,78344,1692,68274,40091,1393,28133,183,29346,349096,Depression
0,1.439593,-0.405899,-0.592464,-0.294858,-0.629663,-0.388186,0.060858,-0.638728,-0.158473,-0.274041,...,-0.255388,-0.258027,-0.19843,2.305337,-0.153823,-0.106539,-0.177886,-0.184357,-0.063034,0
1,-0.746323,-0.497882,0.149193,-0.318254,-0.611975,-0.337264,-0.599826,0.072958,-0.54131,2.875357,...,-0.255388,-0.258027,-0.19843,-0.209176,-0.153823,-0.106539,-0.177886,-0.184357,-0.077118,1
2,0.491225,0.320783,-0.668681,-0.139988,-0.220985,-0.293335,3.686425,-0.317174,-0.216365,-0.257336,...,-0.255388,-0.258027,-0.19843,-0.209176,-0.153823,-0.106539,-0.177886,0.329359,-0.077118,0
3,0.234604,-0.341134,-0.547374,-0.308704,0.088972,-0.388365,0.077232,1.987442,-0.475624,-0.26277,...,-0.255388,-0.258027,-0.19843,-0.209176,-0.153823,-0.106539,-0.177886,-0.184357,-0.077118,0
4,0.933617,-0.362895,-0.236814,-0.193425,-0.01209,-0.175105,0.195748,-0.18602,0.265365,-0.033088,...,-0.255388,-0.258027,-0.19843,-0.209176,-0.153823,-0.106539,-0.177886,-0.184357,-0.077118,1


In [16]:
class Model(nn.Module):
    def __init__(self, input_size, h1, h2, h3, output_size = 2):
        super().__init__()
        self.fc_1 = nn.Linear(input_size, h1)
        self.fc_2 = nn.Linear(h1, h2)
        self.fc_3 = nn.Linear(h2, h3)
        self.output = nn.Linear(h3, output_size)

    def forward(self, x):
        x= F.relu(self.fc_1(x))
        x= F.relu(self.fc_2(x))
        x= F.relu(self.fc_3(x))
        x= self.output(x)
        return x

In [17]:
class SklearnWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, input_size=10, h1=32, h2=16, h3=16, lr=0.01, num_epochs=100):
        self.input_size = input_size
        self.h1 = h1
        self.h2 = h2
        self.h3 = h3
        self.lr = lr
        self.num_epochs = num_epochs
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = None

    def fit(self, X, y):
        from torch.utils.data import TensorDataset, DataLoader

        #X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        #y_tensor = torch.tensor(y, dtype=torch.long).to(self.device)
        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=32, shuffle=True)

        self.model = Model(self.input_size, self.h1, self.h2, self.h3).to(self.device)
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        criterion = nn.CrossEntropyLoss()

        for epoch in range(self.num_epochs):
            for xb, yb in loader:
                pred = self.model(xb)
                loss = criterion(pred, yb)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        return self

    def predict(self, X):
        self.model.eval()
        with torch.no_grad():
            outputs = self.model(X)
        return outputs.argmax(dim=1).cpu().numpy()


In [18]:
param_grid = {
    'h1': [16, 32, 64, 128, 160, 192],
    'h2': [16, 32, 64, 128, 160, 192],
    'h3': [16, 32, 64, 128, 160, 192],
    'lr': [0.01, 0.005, 0.001],
    'num_epochs': [40, 60, 80, 100, 120, 140, 160]
}

In [19]:
scoring = {
    'accuracy': make_scorer(accuracy_score),
    'roc_auc': make_scorer(roc_auc_score, needs_proba=True, multi_class='ovr')
    #'neg_log_loss': make_scorer(log_loss, greater_is_better=False, needs_proba=True) 
    #check loss calc
}

In [20]:
X = data.drop('Depression', axis = 1)
y = data['Depression']

X = X.values
y = y.values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)

y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

In [21]:
grid = GridSearchCV(SklearnWrapper(input_size=X_train.shape[1]), param_grid, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)

print("Best parameters found:", grid.best_params_)
print("Best score:", grid.best_score_)

Best parameters found: {'h1': 192, 'h2': 64, 'h3': 32, 'lr': 0.01, 'num_epochs': 60}
Best score: 0.9337474120082815


In [22]:
#ModelWrapper(input_size = X_train.shape[1], h1=192, h2=128, lr=0.01, num_epochs=60)

In [23]:
print(grid.best_estimator_.model)

Model(
  (fc_1): Linear(in_features=556, out_features=192, bias=True)
  (fc_2): Linear(in_features=192, out_features=64, bias=True)
  (fc_3): Linear(in_features=64, out_features=32, bias=True)
  (output): Linear(in_features=32, out_features=2, bias=True)
)


In [24]:
best_model = grid.best_estimator_

In [25]:
correct = 0
with torch.no_grad():
  for i, data in enumerate(X_test):
    y_val = best_model.model.forward(data)

    print(f'{i+1}.)  {str(y_val)} \t {y_test[i]} \t {y_val.argmax().item()}')

    # Correct or not
    if y_val.argmax().item() == y_test[i]:
      correct +=1

print(f'We got {correct} correct!')

1.)  tensor([-7.4057, 16.2615]) 	 1 	 1
2.)  tensor([2.2461, 2.2439]) 	 0 	 0
3.)  tensor([12.8032, -4.9961]) 	 1 	 0
4.)  tensor([ 43.8987, -16.4034]) 	 0 	 0
5.)  tensor([-5.1705, 11.1622]) 	 1 	 1
6.)  tensor([-4.3749, 11.0912]) 	 1 	 1
7.)  tensor([-17.3865,  39.8627]) 	 1 	 1
8.)  tensor([ 48.2528, -17.9782]) 	 0 	 0
9.)  tensor([-14.8727,  33.0279]) 	 1 	 1
10.)  tensor([-2.6871,  8.1762]) 	 0 	 1
11.)  tensor([ 38.1992, -15.2455]) 	 0 	 0
12.)  tensor([20.5657, -8.2328]) 	 0 	 0
13.)  tensor([ 5.3230, -1.9138]) 	 0 	 0
14.)  tensor([-27.1832,  60.9161]) 	 1 	 1
15.)  tensor([-2.0106,  4.9181]) 	 1 	 1
16.)  tensor([16.7826, -6.2067]) 	 0 	 0
17.)  tensor([-3.1547,  7.5314]) 	 0 	 1
18.)  tensor([-0.3356,  2.1552]) 	 1 	 1
19.)  tensor([ 4.5418, -1.6395]) 	 0 	 0
20.)  tensor([-3.1806,  7.4217]) 	 1 	 1
21.)  tensor([-14.9069,  33.1035]) 	 1 	 1
22.)  tensor([-1.5418,  4.2976]) 	 1 	 1
23.)  tensor([20.6018, -7.6351]) 	 0 	 0
24.)  tensor([-7.5379, 17.5241]) 	 1 	 1
25.)  tensor(

In [26]:
#model = Model(input_size = 556, h1 =20, h2 =20)

In [35]:
torch.save(best_model.model.state_dict(), 'Models/3_layer_nn_192_64_32')

In [33]:
model = Model(input_size=X_train.shape[1], h1=192, h2=64, h3=32, output_size=2)
model.load_state_dict(torch.load('Models/3_layer_nn'))
model.eval()

Model(
  (fc_1): Linear(in_features=556, out_features=192, bias=True)
  (fc_2): Linear(in_features=192, out_features=64, bias=True)
  (fc_3): Linear(in_features=64, out_features=32, bias=True)
  (output): Linear(in_features=32, out_features=2, bias=True)
)

In [None]:
correct = 0
with torch.no_grad():
  for i, data in enumerate(X_test):
    y_val = model.forward(data)

    print(f'{i+1}.)  {str(y_val)} \t {y_test[i]} \t {y_val.argmax().item()}')

    # Correct or not
    if y_val.argmax().item() == y_test[i]:
      correct +=1

print(f'We got {correct} correct!')

1.)  tensor([-7.4057, 16.2615]) 	 1 	 1
2.)  tensor([2.2461, 2.2439]) 	 0 	 0
3.)  tensor([12.8032, -4.9961]) 	 1 	 0
4.)  tensor([ 43.8987, -16.4034]) 	 0 	 0
5.)  tensor([-5.1705, 11.1622]) 	 1 	 1
6.)  tensor([-4.3749, 11.0912]) 	 1 	 1
7.)  tensor([-17.3865,  39.8627]) 	 1 	 1
8.)  tensor([ 48.2528, -17.9782]) 	 0 	 0
9.)  tensor([-14.8727,  33.0279]) 	 1 	 1
10.)  tensor([-2.6871,  8.1762]) 	 0 	 1
11.)  tensor([ 38.1992, -15.2455]) 	 0 	 0
12.)  tensor([20.5657, -8.2328]) 	 0 	 0
13.)  tensor([ 5.3230, -1.9138]) 	 0 	 0
14.)  tensor([-27.1832,  60.9161]) 	 1 	 1
15.)  tensor([-2.0106,  4.9181]) 	 1 	 1
16.)  tensor([16.7826, -6.2067]) 	 0 	 0
17.)  tensor([-3.1547,  7.5314]) 	 0 	 1
18.)  tensor([-0.3356,  2.1552]) 	 1 	 1
19.)  tensor([ 4.5418, -1.6395]) 	 0 	 0
20.)  tensor([-3.1806,  7.4217]) 	 1 	 1
21.)  tensor([-14.9069,  33.1035]) 	 1 	 1
22.)  tensor([-1.5418,  4.2976]) 	 1 	 1
23.)  tensor([20.6018, -7.6351]) 	 0 	 0
24.)  tensor([-7.5379, 17.5241]) 	 1 	 1
25.)  tensor(