In [11]:
import pandas as pd
import torch

diabetes_df = pd.read_csv("diabetes.csv")
diabetes_df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [12]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = diabetes_df.drop('Outcome', axis=1).values
y = diabetes_df['Outcome'].values

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42, stratify=y)

# #Standardize
sc= StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.fit_transform(X_test)

In [13]:
import torch.nn as nn
import torch.nn.functional as F #this has activation functions

# Creating tensors
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)

y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

print(X_train)

tensor([[-0.8514, -0.9801, -0.4048,  ..., -0.6077,  0.3108, -0.7922],
        [ 0.3566,  0.1614,  0.4654,  ..., -0.3021, -0.1164,  0.5610],
        [-0.5494, -0.5045, -0.6223,  ...,  0.3726, -0.7649, -0.7076],
        ...,
        [-0.8514, -0.7582,  0.0303,  ...,  0.7800, -0.7861, -0.2847],
        [ 1.8665, -0.3142,  0.0303,  ..., -0.5695, -1.0194,  0.5610],
        [ 0.0546,  0.7322, -0.6223,  ..., -0.3149, -0.5770,  0.3073]])


In [14]:
class ANN_Model(nn.Module):
    def __init__(self, input_features=8, hidden1=20, hidden2=20, out_features =2):
        super().__init__()
        self.layer_1_connection = nn.Linear(input_features, hidden1)
        self.layer_2_connection = nn.Linear(hidden1, hidden2)
        self.out = nn.Linear(hidden2, out_features)
    
    def forward(self, x):
        #apply activation functions
        x = F.relu(self.layer_1_connection(x))
        x = F.relu(self.layer_2_connection(x))
        x = self.out(x)
        return x

In [22]:
torch.manual_seed(42)

#instantiate the model
model = ANN_Model()

In [23]:
# loss function
loss_function = nn.CrossEntropyLoss()

#optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)

#optimizer #2: SGD:
#optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

#optimizer #3: SGD without momentum specified:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [24]:
#run model through multiple epochs/iterations
final_loss = []
n_epochs = 500
for epoch in range(n_epochs):
    y_pred = model.forward(X_train)
    loss = loss_function(y_pred, y_train)
    final_loss.append(loss)
    
    if epoch % 10 == 1:
        print(f'Epoch number: {epoch} with loss: {loss.item()}')
    
    optimizer.zero_grad() #zero the gradient before running backwards propagation
    loss.backward() #for backward propagation 
    optimizer.step() #performs one optimization step each epoch
    

Epoch number: 1 with loss: 0.6611752510070801
Epoch number: 11 with loss: 0.659355103969574
Epoch number: 21 with loss: 0.657654881477356
Epoch number: 31 with loss: 0.6560633182525635
Epoch number: 41 with loss: 0.6545655727386475
Epoch number: 51 with loss: 0.6531487107276917
Epoch number: 61 with loss: 0.6517978310585022
Epoch number: 71 with loss: 0.6505092978477478
Epoch number: 81 with loss: 0.6492682695388794
Epoch number: 91 with loss: 0.6480698585510254
Epoch number: 101 with loss: 0.6468980312347412
Epoch number: 111 with loss: 0.6457501649856567
Epoch number: 121 with loss: 0.6446250677108765
Epoch number: 131 with loss: 0.6435220241546631
Epoch number: 141 with loss: 0.642434298992157
Epoch number: 151 with loss: 0.6413562893867493
Epoch number: 161 with loss: 0.6402857899665833
Epoch number: 171 with loss: 0.6392180919647217
Epoch number: 181 with loss: 0.6381511688232422
Epoch number: 191 with loss: 0.6370807886123657
Epoch number: 201 with loss: 0.6360084414482117
Epoch 

In [25]:
#predictions
y_pred = []

with torch.no_grad():
    for i, data in enumerate(X_test):
        prediction = model(data)
        y_pred.append(prediction.argmax().item())



In [26]:
from sklearn.metrics import accuracy_score
a_score = accuracy_score(y_test, y_pred)
print(a_score)

0.6493506493506493


In [27]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.65      1.00      0.79       100
           1       0.00      0.00      0.00        54

    accuracy                           0.65       154
   macro avg       0.32      0.50      0.39       154
weighted avg       0.42      0.65      0.51       154



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## In Class Exercise Starts Here

1. Look up the Adam optimization functions in PyTorch https://pytorch.org/docs/stable/optim.html . How does it work? Try at least one other optimization function with the diabetes dataset shown in class. How does the model perform with the new optimizer? Did it perform better or worse than Adam? Why do you think that is?


The goal of all optimizers in PyTorch is to minimize the loss function (error) of a model during the training phases. The loss function is minimized through changing the weights assigned at each step of the process. The way this is accomplished varies dependent upon the specific optimizer.

The Adam optimization function works to find the lowest (optimized) values for its loss functions. Adam does this through adjusting the learning rate (a model parameter) throughout the model iterations instead of keeping it as a constant value. It also takes advantage of momentum. The advantage of using momentum is that the optimization will continue even if the model encounters a low point in the loss function value. This enables the model to continue to the "true" minimum value of the loss function. Adam is a very popular and commonly used optimizer.

I decided to also apply the SGD (Stochastic Gradient) optimizer because it is one of the most commonly used optimizers currently available. As a note, Adam and SGD are based on some of the same principles so they may not be as different as some other optimizers available. 

|      Optimizer     |  accuracy     |         precision       |       recall     |
|--------------------|---------------|-------------------------|------------------|
|       Adam:        |    0.6948     |            0.70         |         0.69     |
|        SGD:        |    0.7273     |            0.73         |         0.73     |
|  SGD (no momentum) |    0.6494     |            0.42         |         0.65     |

In the end, the SGD optimizer performed slightly better for this dataset in all metrics (accuracy, precision, and recall). Of particular note, the optimized SGD performance occurred when momentum was specified for the optimizer. Without the use of momentum for SGD (so just specifying the learning rate parameter), performance dropped significantly for all 3 metrics and most precipitously for precision. Basically, you need to make sure you optimize your optimizer and when comparing optimizers make sure you compare apples to apples.


2. Write a function that lists and counts the number of divisors for an input value. Example 1: Input: 5 Output: “There are 2 divisors: 1 and 5” Example 2: Input: 40 Output: “There are 8 divisors: 1, 2, 4, 5, 8, 10, 20, and 40”

In [21]:
def divisors(num):
    divisors_list = []
    for n in range (1, num + 1):
        if num % n == 0:
            divisors_list.append(n)
   
    number_divisors = len(divisors_list)
    divisors_string = [str(element) for element in divisors_list]
    divisors_string_list = " and ".join([", ".join(divisors_string[:-1]), divisors_string[-1]] 
                                       if len(divisors_string)>2 else divisors_string)
    return(print("There are ", number_divisors, " divisors:", divisors_string_list ))

#Test Cases:
divisors(40)
divisors(1)
divisors(2)

There are  8  divisors: 1, 2, 4, 5, 8, 10, 20 and 40
There are  1  divisors: 1
There are  2  divisors: 1 and 2
