# Model Training with PyTorch

Train a model that predicts whether or not a patient has diabetes, based on medical features. 

### 1. Import the required libraries and packages.

In [None]:
from typing import List, Dict

import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

### 2. Load the data into a Pandas dataframe.

In [None]:
data = pd.read_csv('./data/diabetes.csv')

Split the data into two data frames: features (`X`) and target variable (`y`).

In [None]:
X = data.drop('Outcome', axis=1)
y = data['Outcome']

Inspect the two dataframes.

In [None]:
X.head()

In [None]:
y.head()

Divide the data into training and test data sets. 

The `train_test_split` method of Scikit-learn can split the data set into random train and test subsets.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=0
)

print(f"Number of samples in training set: {X_train.shape[0]}")
print(f"Number of samples in test set: {X_test.shape[0]}")

Encode the data as PyTorch tensors.

In [None]:
X_train = torch.FloatTensor(X_train.values)
X_test = torch.FloatTensor(X_test.values)
y_train = torch.LongTensor(y_train.values)
y_test = torch.LongTensor(y_test.values)

Preview the training features tensor and its shape.

In [None]:
X_train

In [None]:
X_train.shape

Preview the training target value tensor and its shape.

In [None]:
y_train

In [None]:
y_train.shape

### 4. Create and train the model.

Define a simple neural network model with PyTorch.
The network must take eight input features and output two target values, corresponding to the two possible outcomes, diabetes or no diabetes.
The network also defines two internal layers, with 20 and 10 neurons respectively.

In [None]:
# Seed for reproducible results
torch.manual_seed(20)


class ANN_model(nn.Module):
    def __init__(
        self,
        num_input_features=8,
        num_neurons_layer1=20,
        num_neurons_layer2=10,
        num_targets=2
    ):
        super().__init__()
        # Define the neural network layers
        self.layer1 = nn.Linear(num_input_features, num_neurons_layer1)
        self.layer2 = nn.Linear(num_neurons_layer1, num_neurons_layer2)
        self.out = nn.Linear(num_neurons_layer2, num_targets)

    def forward(self, X):
        # pass the data through the layers
        x = F.relu(self.layer1(X))
        x = F.relu(self.layer2(x))
        return self.out(x)

Instantiate the model and define the loss function, the optimizer, and the training epochs.

In [None]:
model = ANN_model()

# == Backward Propagation Configuration ==
# CrossEntropyLoss is a common loss function for classifcation
loss_function = nn.CrossEntropyLoss()
# Use the Adam optimizer with a learning rate of 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
epochs = 500

Train the model.

In [None]:
for i in range(epochs):
    y_pred = model.forward(X_train)
    loss = loss_function(y_pred, y_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if i % 10 == 0:
        print(f"Epoch: {i}. Loss: {loss.item()}")

### 5. Evaluate the model metrics.

After the model is trained, evaluate the model against the test set.

In [None]:
# Compute the predictions (y_predictions) given the test data
y_predicted = []
with torch.no_grad():
    for i, data in enumerate(X_test):
        predictions = model(data)
        y_predicted.append(predictions.argmax())

# Compare the predicted values for the test set (y_predicted)
# against the expected values (y_test)
print("Classification Report:")
print(classification_report(y_test, y_predicted))

The trained model has an accuracy value of 79%.

You can improve the score by retraining the model after more sophisticated data engineering or by tweaking the model's hyper parameters.

### 6. Test the model with sample cases.
Test the model with data from two patients: one patient with diabetes and one patient without diabetes.

In [None]:
# Tuple for textual display of prediction
classes = ('No diabetes', 'Diabetes')


def predict(patients: List[Dict]):
    inputs_dataframe = pd.DataFrame(patients)
    inputs_tensor = torch.FloatTensor(inputs_dataframe.values)
    predictions = []
    for case in inputs_tensor:
        predictions_tensor = model(case)
        prediction_index = predictions_tensor.argmax().item()
        predictions.append(classes[prediction_index])
    return predictions


diabetes_patient = {
    "Pregnancies": 6.0,
    "Glucose": 110.0,
    "BloodPressure": 65.0,
    "SkinThickness": 15.0,
    "Insulin": 1.0,
    "BMI": 45.7,
    "DiabetesPedigreeFunction": 0.627,
    "Age": 50
}

no_diabetes_patient = {
    "Pregnancies": 0,
    "Glucose": 88.0,
    "BloodPressure": 60.0,
    "SkinThickness": 35.0,
    "Insulin": 1.0,
    "BMI": 45.7,
    "DiabetesPedigreeFunction": 0.27,
    "Age": 20
}

predictions = predict([diabetes_patient, no_diabetes_patient])
print(predictions)