In [87]:
import pandas as pd
import os
from Utils.FileUtils import file_exists
from torch.utils.data import Dataset
from itertools import chain

In [7]:
diabetes_dataset_path = './datasets/medical/diabetes.csv'

In [10]:
if not file_exists(diabetes_dataset_path):
    raise Exception("This dataset doesn't exist on local")

In [11]:
# Load the dataset
df = pd.read_csv(diabetes_dataset_path)

In [14]:
df.head(5)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [19]:
input_data =[df.iloc[0], df.iloc[1]]

In [28]:
input_df = pd.DataFrame(input_data)

In [31]:
data = input_df[feature_names]

In [34]:
import torch
import numpy as np

array([[  6.   , 148.   ,  72.   ,  35.   ,   0.   ,  33.6  ,   0.627,
         50.   ],
       [  1.   ,  85.   ,  66.   ,  29.   ,   0.   ,  26.6  ,   0.351,
         31.   ]])

In [36]:
torch.tensor(np.array(data))

tensor([[  6.0000, 148.0000,  72.0000,  35.0000,   0.0000,  33.6000,   0.6270,
          50.0000],
        [  1.0000,  85.0000,  66.0000,  29.0000,   0.0000,  26.6000,   0.3510,
          31.0000]], dtype=torch.float64)

In [26]:
feature_names

['Pregnancies',
 'Glucose',
 'BloodPressure',
 'SkinThickness',
 'Insulin',
 'BMI',
 'DiabetesPedigreeFunction',
 'Age']

In [116]:
# Make a dataset for diabete

class DiabeteDataset(Dataset):
    def __init__(self, file_path: str):
        self.df = pd.read_csv(file_path)
        self.target_col_name = "Outcome"
        self.feature_names =[ col  for col in pd.DataFrame(input_data).columns if col != self.target_col_name ]

    def __len__(self) -> int:
        return len(self.df)

    def num_features(self) -> int:
        return len(self.feature_names)

    def __getitem__(self, index: int) -> pd.Series:
        return self.df.iloc[index]

    def collate_fn(self, data: list[pd.Series]) -> tuple[torch.Tensor, torch.Tensor]:
        # Transform to df
        input_df = pd.DataFrame(data)
        input_data = input_df[self.feature_names]
        input_target = input_df[self.target_col_name]
        return torch.tensor(np.array(input_data)), torch.tensor(np.array(input_target))


   
    


In [49]:
diabete_dataset = DiabeteDataset(diabetes_dataset_path)

In [50]:
from torch.utils.data import DataLoader

In [51]:
diabete_loader = DataLoader(diabete_dataset, batch_size=8, shuffle=True, collate_fn=diabete_dataset.collate_fn)

In [52]:
iter(diabete_loader).next()

(tensor([[3.0000e+00, 9.6000e+01, 7.8000e+01, 3.9000e+01, 0.0000e+00, 3.7300e+01,
          2.3800e-01, 4.0000e+01],
         [3.0000e+00, 1.2300e+02, 1.0000e+02, 3.5000e+01, 2.4000e+02, 5.7300e+01,
          8.8000e-01, 2.2000e+01],
         [1.0000e+00, 1.1700e+02, 6.0000e+01, 2.3000e+01, 1.0600e+02, 3.3800e+01,
          4.6600e-01, 2.7000e+01],
         [9.0000e+00, 1.5400e+02, 7.8000e+01, 3.0000e+01, 1.0000e+02, 3.0900e+01,
          1.6400e-01, 4.5000e+01],
         [1.0000e+00, 1.8000e+02, 0.0000e+00, 0.0000e+00, 0.0000e+00, 4.3300e+01,
          2.8200e-01, 4.1000e+01],
         [9.0000e+00, 8.9000e+01, 6.2000e+01, 0.0000e+00, 0.0000e+00, 2.2500e+01,
          1.4200e-01, 3.3000e+01],
         [1.0000e+00, 7.7000e+01, 5.6000e+01, 3.0000e+01, 5.6000e+01, 3.3300e+01,
          1.2510e+00, 2.4000e+01],
         [1.0000e+01, 1.1500e+02, 9.8000e+01, 0.0000e+00, 0.0000e+00, 2.4000e+01,
          1.0220e+00, 3.4000e+01]], dtype=torch.float64),
 tensor([0., 0., 0., 0., 1., 0., 0., 0.],

In [54]:
import torch.nn as nn

In [99]:
class BaseNNModel(nn.Module):
    def __init__(self, num_input_features, hidden_dim: list[int] = []):
        super(BaseNNModel, self).__init__()
        output_dim = 1

        all_dim = [num_input_features] + hidden_dim + [output_dim]

        all_layers = [[nn.Linear(all_dim[idx], all_dim[idx+1]), nn.BatchNorm1d(all_dim[idx+1]), nn.LeakyReLU(inplace=True)] if idx + 2 != len(all_dim) else [nn.Linear(all_dim[idx], all_dim[idx+1])] for idx in range(len(all_dim)-1)]


        self.model = nn.Sequential(
            *chain.from_iterable(all_layers),
            nn.Sigmoid()
        )

        self.apply(BaseNNModel.weight_init)

    def forward(self, input: torch.tensor):
        return self.model(input)


    def num_all_params(self,) -> int:
        '''
        return how many parameters in the model
        '''
        return sum([param.nelement() for param in self.parameters()])

    @staticmethod
    def weight_init(m) -> None:
        '''
        Initialising the weihgt
        '''
        if type(m) in [nn.Conv2d, nn.ConvTranspose2d, nn.Linear, nn.Conv1d]:
            nn.init.kaiming_normal_(m.weight, 0.2, nonlinearity='leaky_relu')
        elif type(m) in [nn.LSTM]:
            for name, value in m.named_parameters():
                if 'weight' in name:
                    nn.init.xavier_normal_(value.data)
                if 'bias' in name:
                    value.data.normal_()

In [85]:
seq = nn.Sequential(
            nn.Linear(10, 10),
            *chain.from_iterable([[nn.Linear(5,10 ), nn.BatchNorm1d(10)]])
        )

In [72]:
hidden_dim  =[4, 6,7]

In [70]:
hidden_dim = [4]*10

In [75]:
[ dim for index, dim  in enumerate(hidden_dim)]

[4, 6, 7]

In [76]:
all_dim = [10] + hidden_dim + [1]

In [96]:
[[nn.Linear(all_dim[idx], all_dim[idx+1]), nn.BatchNorm1d(all_dim[idx+1]), nn.LeakyReLU(inplace=True)] if idx + 2 != len(all_dim) else [nn.Linear(all_dim[idx], all_dim[idx+1])] for idx in range(len(all_dim)-1) ]

[[Linear(in_features=10, out_features=4, bias=True),
  BatchNorm1d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
  LeakyReLU(negative_slope=0.01, inplace=True)],
 [Linear(in_features=4, out_features=6, bias=True),
  BatchNorm1d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
  LeakyReLU(negative_slope=0.01, inplace=True)],
 [Linear(in_features=6, out_features=7, bias=True),
  BatchNorm1d(7, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
  LeakyReLU(negative_slope=0.01, inplace=True)],
 [Linear(in_features=7, out_features=1, bias=True)]]

In [86]:
seq

Sequential(
  (0): Linear(in_features=10, out_features=10, bias=True)
  (1): Linear(in_features=5, out_features=10, bias=True)
  (2): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [102]:
model = BaseNNModel(  len(diabete_loader.dataset.feature_names), [8,10])

In [106]:
model((iter(diabete_loader).next())[0].float())

tensor([[0.6683],
        [0.7679],
        [0.1999],
        [0.6972],
        [0.7641],
        [0.7285],
        [0.6900],
        [0.7090]], grad_fn=<SigmoidBackward>)

In [101]:
diabete_loader.dataset.feature_names

['Pregnancies',
 'Glucose',
 'BloodPressure',
 'SkinThickness',
 'Insulin',
 'BMI',
 'DiabetesPedigreeFunction',
 'Age']

In [107]:
diabete_loader.batch_size

8

In [109]:
from Parameters import EnviromentParameters

In [113]:
EnviromentParameters.DiabetesDataset.file_path

'./datasets/medical/diabetes.csv'