<b>Dependencies:</b> <br>
    import pandas as pd <br>
    import random <br><br>
    from sklearn.preprocessing import StandardScaler <br>
    from sklearn.model_selection import train_test_split <br>
    from sklearn.metrics import accuracy_score <br>
    from sklearn.metrics import precision_score <br>
    from sklearn.metrics import recall_score <br>
    from sklearn.metrics import f1_score <br>
    import torch <br>
    import torch.nn as nn <br>
    import torch.nn.functional as F <br>

In [3]:
# ANN 75-25

import pandas as pd

from sklearn.preprocessing import StandardScaler

# Evaluation methods
from sklearn.model_selection import train_test_split

# tp: True Positive
# fp: False Positive
# tn: True Negative
# fn: false negative

# Number of correct predictions / Total number of predictions
from sklearn.metrics import accuracy_score

# tp / (tp + fp) -> Important when the cost of False Positive is high
from sklearn.metrics import precision_score

# tp / (tp + fn) -> Important when the cost of False Negative is high
from sklearn.metrics import recall_score

# (2* precision * recall) / (precision + recall) -> When looking for a balance between Precision and Recall AND
#                                                   there is an uneven class distribution (large number of negatives)
from sklearn.metrics import f1_score

# ANN libraries
import torch
# Base class for all neural network modules
import torch.nn as nn
import torch.nn.functional as F


class Custom_ANN_Model(nn.Module):
    """ Artificial Neural Network model used
    """

    def __init__(self, in_features=8, neurons_first_layer=20, neurons_second_layer=20, out_features=2):
        """ Inits Custom_ANN_Model hidden layers.

        :param in_features: int Number of input features
        :params neurons_X_layer: int Number of neurons in the X hidden layer
        :param out_features: int Number of output features
        """

        super(Custom_ANN_Model, self).__init__()
        # Definition of the two hidden layers
        self.f_connected1 = nn.Linear(in_features=in_features, out_features=neurons_first_layer)
        self.f_connected2 = nn.Linear(in_features=neurons_first_layer, out_features=neurons_second_layer)
        self.out = nn.Linear(in_features=neurons_second_layer, out_features=out_features)

    def forward(self, h):
        """ Defines the computation performed at every call.
        """

        # Layers activation
        h = F.relu(self.f_connected1(h))
        h = F.relu(self.f_connected2(h))
        h = self.out(h)
        return h


diabetes_cleaned= pd.read_csv('../datasets/diabetes_cleaned.csv')
features = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
            'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X = diabetes_cleaned[features]
y = diabetes_cleaned.Outcome
random_seed = 3

# Standardization of the columns formed by different values (e.g. Age [21-81] and Glucose [44-199])
# so that they can use a common scale
scaler= StandardScaler()
scaled= scaler.fit_transform(X)

X_train,X_val,y_train,y_val= train_test_split(scaled, y, random_state=random_seed)
# Lists to tensors
X_train= torch.FloatTensor(X_train)
X_val= torch.FloatTensor(X_val)
y_train= torch.LongTensor(y_train.values)
y_val= torch.LongTensor(y_val.values)


torch.manual_seed(random_seed)
model=Custom_ANN_Model()

# Common loss function used in classification tasks
loss_function= nn.CrossEntropyLoss()
# lr: Learning Rate
optimizer= torch.optim.Adam(model.parameters(), lr=0.01)

epochs = 900
for i in range(1, epochs):
    y_pred = model.forward(X_train)
    loss = loss_function(y_pred,y_train)
    # if i%10==1:
        # print("Epoch number: {} \t\tLoss: {}".format(i, loss.item()))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

predictions=[]
with torch.no_grad():
    for i,data in enumerate(X_val):
        y_pred = model(data)
        predictions.append(y_pred.argmax().item())


accuracy= accuracy_score(y_val, predictions)
precision= precision_score(y_val, predictions)
recall= recall_score(y_val,predictions)
f1= f1_score(y_val,predictions)


results_ANN = pd.DataFrame({'Model': ["ANN"],
                            'Accuracy': [accuracy],
                            'Precision ': [precision],
                            'Recall': [recall],
                            'f1': [f1],})

print("\n\n", "ANN\n", results_ANN, "\n\n")





 ANN
   Model  Accuracy  Precision     Recall        f1
0   ANN  0.646409    0.514286  0.545455  0.529412 




In [4]:
# AVERAGE ANN 75-25

import pandas as pd
import random

from sklearn.preprocessing import StandardScaler

# Evaluation methods
from sklearn.model_selection import train_test_split

# tp: True Positive
# fp: False Positive
# tn: True Negative
# fn: false negative

# Number of correct predictions / Total number of predictions
from sklearn.metrics import accuracy_score

# tp / (tp + fp) -> Important when the cost of False Positive is high
from sklearn.metrics import precision_score

# tp / (tp + fn) -> Important when the cost of False Negative is high
from sklearn.metrics import recall_score

# (2* precision * recall) / (precision + recall) -> When looking for a balance between Precision and Recall AND
#                                                   there is an uneven class distribution (large number of negatives)
from sklearn.metrics import f1_score

# ANN libraries
import torch
# Base class for all neural network modules
import torch.nn as nn
import torch.nn.functional as F


class Custom_ANN_Model(nn.Module):
    """ Artificial Neural Network model used
    """

    def __init__(self, in_features=8, neurons_first_layer=20, neurons_second_layer=20, out_features=2):
        """ Inits Custom_ANN_Model hidden layers.

        :param in_features: int Number of input features
        :params neurons_X_layer: int Number of neurons in the X hidden layer
        :param out_features: int Number of output features
        """

        super(Custom_ANN_Model, self).__init__()
        # Definition of the two hidden layers
        self.f_connected1 = nn.Linear(in_features=in_features, out_features=neurons_first_layer)
        self.f_connected2 = nn.Linear(in_features=neurons_first_layer, out_features=neurons_second_layer)
        self.out = nn.Linear(in_features=neurons_second_layer, out_features=out_features)

    def forward(self, h):
        """ Defines the computation performed at every call.
        """

        # Layers activation
        h = F.relu(self.f_connected1(h))
        h = F.relu(self.f_connected2(h))
        h = self.out(h)
        return h


diabetes_cleaned= pd.read_csv('../datasets/diabetes_cleaned.csv')
features = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
            'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X = diabetes_cleaned[features]
y = diabetes_cleaned.Outcome
accuracy = []
precision = []
recall = []
f1 = []

# Standardization of the columns formed by different values (e.g. Age [21-81] and Glucose [44-199])
# so that they can use a common scale
scaler= StandardScaler()
scaled= scaler.fit_transform(X)

# Common loss function used in classification tasks
loss_function= nn.CrossEntropyLoss()
epochs = 900

times_repeated = 100
for _ in range(times_repeated):
    random_seed = random.randint(0, 1000)

    X_train,X_val,y_train,y_val= train_test_split(scaled, y, random_state=random_seed)
    # Lists to tensors
    X_train= torch.FloatTensor(X_train)
    X_val= torch.FloatTensor(X_val)
    y_train= torch.LongTensor(y_train.values)
    y_val= torch.LongTensor(y_val.values)

    torch.manual_seed(random_seed)
    model=Custom_ANN_Model()

    # lr: Learning Rate
    optimizer= torch.optim.Adam(model.parameters(), lr=0.01)

    for i in range(1, epochs):
        y_pred = model.forward(X_train)
        loss = loss_function(y_pred,y_train)
        # if i%10==1:
            # print("Epoch number: {} \t\tLoss: {}".format(i, loss.item()))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    predictions=[]
    with torch.no_grad():
        for i,data in enumerate(X_val):
            y_pred = model(data)
            predictions.append(y_pred.argmax().item())

    accuracy.append(accuracy_score(y_val, predictions))
    precision.append(precision_score(y_val, predictions))
    recall.append(recall_score(y_val,predictions))
    f1.append(f1_score(y_val,predictions))


results_ANN = pd.DataFrame({'Model': ["ANN"],
                            'Accuracy': [sum(accuracy)/len(accuracy)],
                            'Precision ': [sum(precision)/len(precision)],
                            'Recall': [sum(recall)/len(recall)],
                            'f1': [sum(f1)/len(f1)],})

print("\n\n", "AVERAGE ANN\n", results_ANN, "\n\n")





 AVERAGE ANN
   Model  Accuracy  Precision    Recall        f1
0   ANN  0.700331    0.569363  0.56232  0.563627 




In [5]:
# ANN WITH FEATURE SELECTION 75-25

import pandas as pd

from sklearn.preprocessing import StandardScaler

# Evaluation methods
from sklearn.model_selection import train_test_split

# tp: True Positive
# fp: False Positive
# tn: True Negative
# fn: false negative

# Number of correct predictions / Total number of predictions
from sklearn.metrics import accuracy_score

# tp / (tp + fp) -> Important when the cost of False Positive is high
from sklearn.metrics import precision_score

# tp / (tp + fn) -> Important when the cost of False Negative is high
from sklearn.metrics import recall_score

# (2* precision * recall) / (precision + recall) -> When looking for a balance between Precision and Recall AND
#                                                   there is an uneven class distribution (large number of negatives)
from sklearn.metrics import f1_score

# ANN libraries
import torch
# Base class for all neural network modules
import torch.nn as nn
import torch.nn.functional as F


class Custom_ANN_Model(nn.Module):
    """ Artificial Neural Network model used
    """

    def __init__(self, in_features=4, neurons_first_layer=20, neurons_second_layer=20, out_features=2):
        """ Inits Custom_ANN_Model hidden layers.

        :param in_features: int Number of input features
        :params neurons_X_layer: int Number of neurons in the X hidden layer
        :param out_features: int Number of output features
        """

        super(Custom_ANN_Model, self).__init__()
        # Definition of the two hidden layers
        self.f_connected1 = nn.Linear(in_features=in_features, out_features=neurons_first_layer)
        self.f_connected2 = nn.Linear(in_features=neurons_first_layer, out_features=neurons_second_layer)
        self.out = nn.Linear(in_features=neurons_second_layer, out_features=out_features)

    def forward(self, h):
        """ Defines the computation performed at every call.
        """

        # Layers activation
        h = F.relu(self.f_connected1(h))
        h = F.relu(self.f_connected2(h))
        h = self.out(h)
        return h


diabetes_cleaned= pd.read_csv('../datasets/diabetes_cleaned.csv')
selected_features = ['Glucose', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X = diabetes_cleaned[selected_features]
y = diabetes_cleaned.Outcome
random_seed = 3

# Standardization of the columns formed by different values (e.g. Age [21-81] and Glucose [44-199])
# so that they can use a common scale
scaler= StandardScaler()
scaled= scaler.fit_transform(X)

X_train,X_val,y_train,y_val= train_test_split(scaled, y, random_state=random_seed)
# Lists to tensors
X_train= torch.FloatTensor(X_train)
X_val= torch.FloatTensor(X_val)
y_train= torch.LongTensor(y_train.values)
y_val= torch.LongTensor(y_val.values)


torch.manual_seed(random_seed)
model=Custom_ANN_Model()

# Common loss function used in classification tasks
loss_function= nn.CrossEntropyLoss()
# lr: Learning Rate
optimizer= torch.optim.Adam(model.parameters(), lr=0.01)

epochs = 900
for i in range(1, epochs):
    y_pred = model.forward(X_train)
    loss = loss_function(y_pred,y_train)
    # if i%10==1:
        # print("Epoch number: {} \t\tLoss: {}".format(i, loss.item()))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

predictions=[]
with torch.no_grad():
    for i,data in enumerate(X_val):
        y_pred = model(data)
        predictions.append(y_pred.argmax().item())


accuracy= accuracy_score(y_val, predictions)
precision= precision_score(y_val, predictions)
recall= recall_score(y_val,predictions)
f1= f1_score(y_val,predictions)


results_ANN = pd.DataFrame({'Model': ["ANN"],
                            'Accuracy': [accuracy],
                            'Precision ': [precision],
                            'Recall': [recall],
                            'f1': [f1],})

print("\n\n", "ANN with Feature Selection\n", results_ANN, "\n\n")





 ANN with Feature Selection
   Model  Accuracy  Precision     Recall        f1
0   ANN  0.712707    0.606061  0.606061  0.606061 




In [6]:
# AVERAGE ANN WITH FEATURE SELECTION 75-25

import pandas as pd
import random

from sklearn.preprocessing import StandardScaler

# Evaluation methods
from sklearn.model_selection import train_test_split

# tp: True Positive
# fp: False Positive
# tn: True Negative
# fn: false negative

# Number of correct predictions / Total number of predictions
from sklearn.metrics import accuracy_score

# tp / (tp + fp) -> Important when the cost of False Positive is high
from sklearn.metrics import precision_score

# tp / (tp + fn) -> Important when the cost of False Negative is high
from sklearn.metrics import recall_score

# (2* precision * recall) / (precision + recall) -> When looking for a balance between Precision and Recall AND
#                                                   there is an uneven class distribution (large number of negatives)
from sklearn.metrics import f1_score

# ANN libraries
import torch
# Base class for all neural network modules
import torch.nn as nn
import torch.nn.functional as F


class Custom_ANN_Model(nn.Module):
    """ Artificial Neural Network model used
    """

    def __init__(self, in_features=4, neurons_first_layer=20, neurons_second_layer=20, out_features=2):
        """ Inits Custom_ANN_Model hidden layers.

        :param in_features: int Number of input features
        :params neurons_X_layer: int Number of neurons in the X hidden layer
        :param out_features: int Number of output features
        """

        super(Custom_ANN_Model, self).__init__()
        # Definition of the two hidden layers
        self.f_connected1 = nn.Linear(in_features=in_features, out_features=neurons_first_layer)
        self.f_connected2 = nn.Linear(in_features=neurons_first_layer, out_features=neurons_second_layer)
        self.out = nn.Linear(in_features=neurons_second_layer, out_features=out_features)

    def forward(self, h):
        """ Defines the computation performed at every call.
        """

        # Layers activation
        h = F.relu(self.f_connected1(h))
        h = F.relu(self.f_connected2(h))
        h = self.out(h)
        return h


diabetes_cleaned= pd.read_csv('../datasets/diabetes_cleaned.csv')
selected_features = ['Glucose', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X = diabetes_cleaned[selected_features]
y = diabetes_cleaned.Outcome
accuracy = []
precision = []
recall = []
f1 = []

# Standardization of the columns formed by different values (e.g. Age [21-81] and Glucose [44-199])
# so that they can use a common scale
scaler= StandardScaler()
scaled= scaler.fit_transform(X)

# Common loss function used in classification tasks
loss_function= nn.CrossEntropyLoss()
epochs = 900

times_repeated = 100
for _ in range(times_repeated):
    random_seed = random.randint(0, 1000)

    X_train,X_val,y_train,y_val= train_test_split(scaled, y, random_state=random_seed)
    # Lists to tensors
    X_train= torch.FloatTensor(X_train)
    X_val= torch.FloatTensor(X_val)
    y_train= torch.LongTensor(y_train.values)
    y_val= torch.LongTensor(y_val.values)

    torch.manual_seed(random_seed)
    model=Custom_ANN_Model()

    # lr: Learning Rate
    optimizer= torch.optim.Adam(model.parameters(), lr=0.01)

    for i in range(1, epochs):
        y_pred = model.forward(X_train)
        loss = loss_function(y_pred,y_train)
        # if i%10==1:
            # print("Epoch number: {} \t\tLoss: {}".format(i, loss.item()))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    predictions=[]
    with torch.no_grad():
        for i,data in enumerate(X_val):
            y_pred = model(data)
            predictions.append(y_pred.argmax().item())

    accuracy.append(accuracy_score(y_val, predictions))
    precision.append(precision_score(y_val, predictions))
    recall.append(recall_score(y_val,predictions))
    f1.append(f1_score(y_val,predictions))


results_ANN = pd.DataFrame({'Model': ["ANN"],
                            'Accuracy': [sum(accuracy)/len(accuracy)],
                            'Precision ': [sum(precision)/len(precision)],
                            'Recall': [sum(recall)/len(recall)],
                            'f1': [sum(f1)/len(f1)],})

print("\n\n", "AVERAGE ANN with Feature Selection\n", results_ANN, "\n\n")





 AVERAGE ANN with Feature Selection
   Model  Accuracy  Precision     Recall        f1
0   ANN  0.711105    0.572089  0.571221  0.569484 




In [7]:
# ANN WITH DATA AUGMENTATION 75-25

import pandas as pd

from sklearn.preprocessing import StandardScaler

# Evaluation methods
from sklearn.model_selection import train_test_split

# tp: True Positive
# fp: False Positive
# tn: True Negative
# fn: false negative

# Number of correct predictions / Total number of predictions
from sklearn.metrics import accuracy_score

# tp / (tp + fp) -> Important when the cost of False Positive is high
from sklearn.metrics import precision_score

# tp / (tp + fn) -> Important when the cost of False Negative is high
from sklearn.metrics import recall_score

# (2* precision * recall) / (precision + recall) -> When looking for a balance between Precision and Recall AND
#                                                   there is an uneven class distribution (large number of negatives)
from sklearn.metrics import f1_score

# ANN libraries
import torch
# Base class for all neural network modules
import torch.nn as nn
import torch.nn.functional as F


class Custom_ANN_Model(nn.Module):
    """ Artificial Neural Network model used
    """

    def __init__(self, in_features=8, neurons_first_layer=20, neurons_second_layer=20, out_features=2):
        """ Inits Custom_ANN_Model hidden layers.

        :param in_features: int Number of input features
        :params neurons_X_layer: int Number of neurons in the X hidden layer
        :param out_features: int Number of output features
        """

        super(Custom_ANN_Model, self).__init__()
        # Definition of the two hidden layers
        self.f_connected1 = nn.Linear(in_features=in_features, out_features=neurons_first_layer)
        self.f_connected2 = nn.Linear(in_features=neurons_first_layer, out_features=neurons_second_layer)
        self.out = nn.Linear(in_features=neurons_second_layer, out_features=out_features)

    def forward(self, h):
        """ Defines the computation performed at every call.
        """

        # Layers activation
        h = F.relu(self.f_connected1(h))
        h = F.relu(self.f_connected2(h))
        h = self.out(h)
        return h


diabetes_da_training = pd.read_csv('../datasets/diabetes_train_data_75pc_100times_10.csv')
diabetes_da_test = pd.read_csv('../datasets/diabetes_test_data_25pc.csv')
features = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
            'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X_train = diabetes_da_training[features]
y_train = diabetes_da_training.Outcome
X_test = diabetes_da_test[features]
y_test = diabetes_da_test.Outcome
random_seed = 3

# Standardization of the columns formed by different values (e.g. Age [21-81] and Glucose [44-199])
# so that they can use a common scale
scaler= StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test= scaler.fit_transform(X_test)


# Lists to tensors
X_train= torch.FloatTensor(X_train)
X_val= torch.FloatTensor(X_test)
y_train= torch.LongTensor(y_train.values)
y_val= torch.LongTensor(y_test.values)


torch.manual_seed(random_seed)
model=Custom_ANN_Model()

# Common loss function used in classification tasks
loss_function= nn.CrossEntropyLoss()
# lr: Learning Rate
optimizer= torch.optim.Adam(model.parameters(), lr=0.01)

epochs = 900
for i in range(1, epochs):
    y_pred = model.forward(X_train)
    loss = loss_function(y_pred,y_train)
    # if i%10==1:
        # print("Epoch number: {} \t\tLoss: {}".format(i, loss.item()))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

predictions=[]
with torch.no_grad():
    for i,data in enumerate(X_val):
        y_pred = model(data)
        predictions.append(y_pred.argmax().item())


accuracy= accuracy_score(y_val, predictions)
precision= precision_score(y_val, predictions)
recall= recall_score(y_val,predictions)
f1= f1_score(y_val,predictions)


results_ANN = pd.DataFrame({'Model': ["ANN"],
                            'Accuracy': [accuracy],
                            'Precision ': [precision],
                            'Recall': [recall],
                            'f1': [f1],})

print("\n\n", "ANN with Data Augmentation\n", results_ANN, "\n\n")





 ANN with Data Augmentation
   Model  Accuracy  Precision     Recall        f1
0   ANN  0.718232    0.615385  0.606061  0.610687 




In [8]:
# ANN WITH FEATURE SELECTION AND DATA AUGMENTATION 75-25

import pandas as pd

from sklearn.preprocessing import StandardScaler

# Evaluation methods
from sklearn.model_selection import train_test_split

# tp: True Positive
# fp: False Positive
# tn: True Negative
# fn: false negative

# Number of correct predictions / Total number of predictions
from sklearn.metrics import accuracy_score

# tp / (tp + fp) -> Important when the cost of False Positive is high
from sklearn.metrics import precision_score

# tp / (tp + fn) -> Important when the cost of False Negative is high
from sklearn.metrics import recall_score

# (2* precision * recall) / (precision + recall) -> When looking for a balance between Precision and Recall AND
#                                                   there is an uneven class distribution (large number of negatives)
from sklearn.metrics import f1_score

# ANN libraries
import torch
# Base class for all neural network modules
import torch.nn as nn
import torch.nn.functional as F


class Custom_ANN_Model(nn.Module):
    """ Artificial Neural Network model used
    """

    def __init__(self, in_features=4, neurons_first_layer=20, neurons_second_layer=20, out_features=2):
        """ Inits Custom_ANN_Model hidden layers.

        :param in_features: int Number of input features
        :params neurons_X_layer: int Number of neurons in the X hidden layer
        :param out_features: int Number of output features
        """

        super(Custom_ANN_Model, self).__init__()
        # Definition of the two hidden layers
        self.f_connected1 = nn.Linear(in_features=in_features, out_features=neurons_first_layer)
        self.f_connected2 = nn.Linear(in_features=neurons_first_layer, out_features=neurons_second_layer)
        self.out = nn.Linear(in_features=neurons_second_layer, out_features=out_features)

    def forward(self, h):
        """ Defines the computation performed at every call.
        """

        # Layers activation
        h = F.relu(self.f_connected1(h))
        h = F.relu(self.f_connected2(h))
        h = self.out(h)
        return h


diabetes_da_training = pd.read_csv('../datasets/diabetes_train_data_75pc_100times_10.csv')
diabetes_da_test = pd.read_csv('../datasets/diabetes_test_data_25pc.csv')
selected_features = ['Glucose', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X_train = diabetes_da_training[selected_features]
y_train = diabetes_da_training.Outcome
X_test = diabetes_da_test[selected_features]
y_test = diabetes_da_test.Outcome
random_seed = 3

# Standardization of the columns formed by different values (e.g. Age [21-81] and Glucose [44-199])
# so that they can use a common scale
scaler= StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test= scaler.fit_transform(X_test)


# Lists to tensors
X_train= torch.FloatTensor(X_train)
X_val= torch.FloatTensor(X_test)
y_train= torch.LongTensor(y_train.values)
y_val= torch.LongTensor(y_test.values)


torch.manual_seed(random_seed)
model=Custom_ANN_Model()

# Common loss function used in classification tasks
loss_function= nn.CrossEntropyLoss()
# lr: Learning Rate
optimizer= torch.optim.Adam(model.parameters(), lr=0.01)

epochs = 900
for i in range(1, epochs):
    y_pred = model.forward(X_train)
    loss = loss_function(y_pred,y_train)
    # if i%10==1:
        # print("Epoch number: {} \t\tLoss: {}".format(i, loss.item()))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

predictions=[]
with torch.no_grad():
    for i,data in enumerate(X_val):
        y_pred = model(data)
        predictions.append(y_pred.argmax().item())


accuracy= accuracy_score(y_val, predictions)
precision= precision_score(y_val, predictions)
recall= recall_score(y_val,predictions)
f1= f1_score(y_val,predictions)


results_ANN = pd.DataFrame({'Model': ["ANN"],
                            'Accuracy': [accuracy],
                            'Precision ': [precision],
                            'Recall': [recall],
                            'f1': [f1],})

print("\n\n", "ANN with Feature Selection and Data Augmentation\n", results_ANN, "\n\n")





 ANN with Feature Selection and Data Augmentation
   Model  Accuracy  Precision     Recall        f1
0   ANN  0.718232    0.631579  0.545455  0.585366 




In [1]:
# ANN WITH FEATURE SELECTION AND DATA AUGMENTATION 75-25; FOUR LAYERS

import pandas as pd

from sklearn.preprocessing import StandardScaler

# Evaluation methods

# tp: True Positive
# fp: False Positive
# tn: True Negative
# fn: false negative

# Number of correct predictions / Total number of predictions
from sklearn.metrics import accuracy_score

# tp / (tp + fp) -> Important when the cost of False Positive is high
from sklearn.metrics import precision_score

# tp / (tp + fn) -> Important when the cost of False Negative is high
from sklearn.metrics import recall_score

# (2* precision * recall) / (precision + recall) -> When looking for a balance between Precision and Recall AND
#                                                   there is an uneven class distribution (large number of negatives)
from sklearn.metrics import f1_score

# ANN libraries
import torch
# Base class for all neural network modules
import torch.nn as nn
import torch.nn.functional as F


class Custom_ANN_Model(nn.Module):
    """ Artificial Neural Network model used
    """

    def __init__(self, in_features=4, neurons_first_layer=10, neurons_second_layer=10, neurons_third_layer=10,
                 neurons_fourth_layer=10, out_features=2):
        """ Inits Custom_ANN_Model hidden layers.

        :param in_features: int Number of input features
        :params neurons_X_layer: int Number of neurons in the X hidden layer
        :param out_features: int Number of output features
        """

        super(Custom_ANN_Model, self).__init__()
        # Definition of the three hidden layers
        self.f_connected1 = nn.Linear(in_features=in_features, out_features=neurons_first_layer)
        self.f_connected2 = nn.Linear(in_features=neurons_first_layer, out_features=neurons_second_layer)
        self.f_connected3 = nn.Linear(in_features=neurons_second_layer, out_features=neurons_third_layer)
        self.f_connected4 = nn.Linear(in_features=neurons_third_layer, out_features=neurons_fourth_layer)
        self.out = nn.Linear(in_features=neurons_fourth_layer, out_features=out_features)

    def forward(self, h):
        """ Defines the computation performed at every call.
        """

        # Layers activation
        h = F.relu(self.f_connected1(h))
        h = F.relu(self.f_connected2(h))
        h = F.relu(self.f_connected3(h))
        h = F.relu(self.f_connected3(h))
        h = self.out(h)
        return h


diabetes_da_training = pd.read_csv('../datasets/diabetes_train_data_75pc_100times_10.csv')
diabetes_da_test = pd.read_csv('../datasets/diabetes_test_data_25pc.csv')
selected_features = ['Glucose', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X_train = diabetes_da_training[selected_features]
y_train = diabetes_da_training.Outcome
X_test = diabetes_da_test[selected_features]
y_test = diabetes_da_test.Outcome
random_seed = 3

# Standardization of the columns formed by different values (e.g. Age [21-81] and Glucose [44-199])
# so that they can use a common scale
scaler= StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test= scaler.fit_transform(X_test)


# Lists to tensors
X_train= torch.FloatTensor(X_train)
X_val= torch.FloatTensor(X_test)
y_train= torch.LongTensor(y_train.values)
y_val= torch.LongTensor(y_test.values)


torch.manual_seed(random_seed)
model=Custom_ANN_Model()

# Common loss function used in classification tasks
loss_function= nn.CrossEntropyLoss()
# lr: Learning Rate
optimizer= torch.optim.Adam(model.parameters(), lr=0.01)

epochs = 900
for i in range(1, epochs):
    y_pred = model.forward(X_train)
    loss = loss_function(y_pred,y_train)
    # if i%10==1:
        # print("Epoch number: {} \t\tLoss: {}".format(i, loss.item()))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

predictions=[]
with torch.no_grad():
    for i,data in enumerate(X_val):
        y_pred = model(data)
        predictions.append(y_pred.argmax().item())


accuracy= accuracy_score(y_val, predictions)
precision= precision_score(y_val, predictions)
recall= recall_score(y_val,predictions)
f1= f1_score(y_val,predictions)


results_ANN = pd.DataFrame({'Model': ["ANN"],
                            'Accuracy': [accuracy],
                            'Precision ': [precision],
                            'Recall': [recall],
                            'f1': [f1],})

print("\n\n", "ANN with Feature Selection and Data Augmentation\n", results_ANN, "\n\n")





 ANN with Feature Selection and Data Augmentation
   Model  Accuracy  Precision     Recall        f1
0   ANN  0.762431    0.676923  0.666667  0.671756 




In [2]:
# ANN WITH FEATURE SELECTION AND DATA AUGMENTATION 75-25; EIGHT LAYERS

import pandas as pd

from sklearn.preprocessing import StandardScaler

# Evaluation methods

# tp: True Positive
# fp: False Positive
# tn: True Negative
# fn: false negative

# Number of correct predictions / Total number of predictions
from sklearn.metrics import accuracy_score

# tp / (tp + fp) -> Important when the cost of False Positive is high
from sklearn.metrics import precision_score

# tp / (tp + fn) -> Important when the cost of False Negative is high
from sklearn.metrics import recall_score

# (2* precision * recall) / (precision + recall) -> When looking for a balance between Precision and Recall AND
#                                                   there is an uneven class distribution (large number of negatives)
from sklearn.metrics import f1_score

# ANN libraries
import torch
# Base class for all neural network modules
import torch.nn as nn
import torch.nn.functional as F


class Custom_ANN_Model(nn.Module):
    """ Artificial Neural Network model used
    """

    def __init__(self, in_features=4, neurons_first_layer=10, neurons_second_layer=10, neurons_third_layer=10,
                 neurons_fourth_layer=10, neurons_fifth_layer=10, neurons_sixth_layer=10, neurons_seventh_layer=10,
                 neurons_eighth_layer=10, out_features=2):
        """ Inits Custom_ANN_Model hidden layers.

        :param in_features: int Number of input features
        :params neurons_X_layer: int Number of neurons in the X hidden layer
        :param out_features: int Number of output features
        """

        super(Custom_ANN_Model, self).__init__()
        # Definition of the three hidden layers
        self.f_connected1 = nn.Linear(in_features=in_features, out_features=neurons_first_layer)
        self.f_connected2 = nn.Linear(in_features=neurons_first_layer, out_features=neurons_second_layer)
        self.f_connected3 = nn.Linear(in_features=neurons_second_layer, out_features=neurons_third_layer)
        self.f_connected4 = nn.Linear(in_features=neurons_third_layer, out_features=neurons_fourth_layer)
        self.f_connected5 = nn.Linear(in_features=neurons_fourth_layer, out_features=neurons_fifth_layer)
        self.f_connected6 = nn.Linear(in_features=neurons_fifth_layer, out_features=neurons_sixth_layer)
        self.f_connected7 = nn.Linear(in_features=neurons_sixth_layer, out_features=neurons_seventh_layer)
        self.f_connected8 = nn.Linear(in_features=neurons_seventh_layer, out_features=neurons_eighth_layer)
        self.out = nn.Linear(in_features=neurons_eighth_layer, out_features=out_features)

    def forward(self, h):
        """ Defines the computation performed at every call.
        """

        # Layers activation
        h = F.relu(self.f_connected1(h))
        h = F.relu(self.f_connected2(h))
        h = F.relu(self.f_connected3(h))
        h = F.relu(self.f_connected3(h))
        h = self.out(h)
        return h


diabetes_da_training = pd.read_csv('../datasets/diabetes_train_data_75pc_100times_10.csv')
diabetes_da_test = pd.read_csv('../datasets/diabetes_test_data_25pc.csv')
selected_features = ['Glucose', 'BMI', 'DiabetesPedigreeFunction', 'Age']
X_train = diabetes_da_training[selected_features]
y_train = diabetes_da_training.Outcome
X_test = diabetes_da_test[selected_features]
y_test = diabetes_da_test.Outcome
random_seed = 3

# Standardization of the columns formed by different values (e.g. Age [21-81] and Glucose [44-199])
# so that they can use a common scale
scaler= StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test= scaler.fit_transform(X_test)


# Lists to tensors
X_train= torch.FloatTensor(X_train)
X_val= torch.FloatTensor(X_test)
y_train= torch.LongTensor(y_train.values)
y_val= torch.LongTensor(y_test.values)


torch.manual_seed(random_seed)
model=Custom_ANN_Model()

# Common loss function used in classification tasks
loss_function= nn.CrossEntropyLoss()
# lr: Learning Rate
optimizer= torch.optim.Adam(model.parameters(), lr=0.01)

epochs = 900
for i in range(1, epochs):
    y_pred = model.forward(X_train)
    loss = loss_function(y_pred,y_train)
    # if i%10==1:
        # print("Epoch number: {} \t\tLoss: {}".format(i, loss.item()))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

predictions=[]
with torch.no_grad():
    for i,data in enumerate(X_val):
        y_pred = model(data)
        predictions.append(y_pred.argmax().item())


accuracy= accuracy_score(y_val, predictions)
precision= precision_score(y_val, predictions)
recall= recall_score(y_val,predictions)
f1= f1_score(y_val,predictions)


results_ANN = pd.DataFrame({'Model': ["ANN"],
                            'Accuracy': [accuracy],
                            'Precision ': [precision],
                            'Recall': [recall],
                            'f1': [f1],})

print("\n\n", "ANN with Feature Selection and Data Augmentation\n", results_ANN, "\n\n")



 ANN with Feature Selection and Data Augmentation
   Model  Accuracy  Precision     Recall        f1
0   ANN  0.762431    0.716981  0.575758  0.638655 


