In [1]:
import pandas as pd
import torch

diabetes_df = pd.read_csv("diabetes.csv")
diabetes_df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [36]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = diabetes_df.drop('Outcome', axis=1).values
y = diabetes_df['Outcome'].values

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42, stratify=y)

# #Standardize
sc= StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.fit_transform(X_test)

In [37]:
import torch.nn as nn
import torch.nn.functional as F #this has activation functions

# Creating tensors
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)

y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

print(X_train)

tensor([[-0.8514, -0.9801, -0.4048,  ..., -0.6077,  0.3108, -0.7922],
        [ 0.3566,  0.1614,  0.4654,  ..., -0.3021, -0.1164,  0.5610],
        [-0.5494, -0.5045, -0.6223,  ...,  0.3726, -0.7649, -0.7076],
        ...,
        [-0.8514, -0.7582,  0.0303,  ...,  0.7800, -0.7861, -0.2847],
        [ 1.8665, -0.3142,  0.0303,  ..., -0.5695, -1.0194,  0.5610],
        [ 0.0546,  0.7322, -0.6223,  ..., -0.3149, -0.5770,  0.3073]])


In [38]:
class ANN_Model(nn.Module):
    def __init__(self, input_features=8, hidden1=20, hidden2=20, out_features =2):
        super().__init__()
        self.layer_1_connection = nn.Linear(input_features, hidden1)
        self.layer_2_connection = nn.Linear(hidden1, hidden2)
        self.out = nn.Linear(hidden2, out_features)
    
    def forward(self, x):
        #apply activation functions
        x = F.relu(self.layer_1_connection(x))
        x = F.relu(self.layer_2_connection(x))
        x = self.out(x)
        return x

In [39]:
torch.manual_seed(42)

#instantiate the model
model = ANN_Model()

In [40]:
# loss function
loss_function = nn.CrossEntropyLoss()

#optimizer
optimizer = torch.optim.Rprop(model.parameters(), lr = 0.01)

In [41]:
#run model through multiple epochs/iterations
final_loss = []
n_epochs = 500
for epoch in range(n_epochs):
    y_pred = model.forward(X_train)
    loss = loss_function(y_pred, y_train)
    final_loss.append(loss)
    
    if epoch % 10 == 1:
        print(f'Epoch number: {epoch} with loss: {loss.item()}')
    
    optimizer.zero_grad() #zero the gradient before running backwards propagation
    loss.backward() #for backward propagation 
    optimizer.step() #performs one optimization step each epoch
    

Epoch number: 1 with loss: 0.6474142670631409
Epoch number: 11 with loss: 0.45198854804039
Epoch number: 21 with loss: 0.3870345950126648
Epoch number: 31 with loss: 0.3395462930202484
Epoch number: 41 with loss: 0.30658769607543945
Epoch number: 51 with loss: 0.2752005457878113
Epoch number: 61 with loss: 0.25215259194374084
Epoch number: 71 with loss: 0.22651627659797668
Epoch number: 81 with loss: 0.20444045960903168
Epoch number: 91 with loss: 0.18579360842704773
Epoch number: 101 with loss: 0.17048318684101105
Epoch number: 111 with loss: 0.1541174203157425
Epoch number: 121 with loss: 0.13827119767665863
Epoch number: 131 with loss: 0.12396775931119919
Epoch number: 141 with loss: 0.11416137963533401
Epoch number: 151 with loss: 0.10690709203481674
Epoch number: 161 with loss: 0.10016005486249924
Epoch number: 171 with loss: 0.09251974523067474
Epoch number: 181 with loss: 0.08521822094917297
Epoch number: 191 with loss: 0.07992039620876312
Epoch number: 201 with loss: 0.07561808

In [42]:
#predictions
y_pred = []

with torch.no_grad():
    for i, data in enumerate(X_test):
        prediction = model(data)
        y_pred.append(prediction.argmax().item())



In [43]:
from sklearn.metrics import accuracy_score
a_score = accuracy_score(y_test, y_pred)
print(a_score)

0.7142857142857143


In [44]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.79      0.77      0.78       100
           1       0.59      0.61      0.60        54

    accuracy                           0.71       154
   macro avg       0.69      0.69      0.69       154
weighted avg       0.72      0.71      0.72       154



 Look up the Adam optimization functions in PyTorch https://pytorch.org/docs/stable/optim.html . How does it work? Try at least one other optimization function with the diabetes dataset shown in class. How does the model perform with the new optimizer? Did it perform better or worse than Adam? Why do you think that is?


Adam combines the AdaGrad and RMSProp algorithms

I tried Rprop(model.parameters(), lr = 0.01) and got a slightly better model score.   It's a backpropagation algorithm.

I think since Adam is already a mashup algorithm, it covers a lot of the good points and is probably a good default.  


This was all pretty confusing and I don't remember what a gradient is.


2. Write a function that lists and counts the number of divisors for an input value.
Example 1:
Input: 5
Output: “There are 2 divisors: 1 and 5”
Example 2:
Input: 40
Output: “There are 8 divisors: 1, 2, 4, 5, 8, 10, 20, and 40”

In [64]:
def divisors(n):
    ### a function that returns a statement with the divisors of a number
    result = []
    for i in range(1,n+1):
        if n%i == 0:
            result.append(str(i)) 
            
    # alternative code, but going through the list twice instead of once?
    # import numpy as np
    # result = np.arange(1, n+1, 1)
    # result = [x for x in result if n%x==0]
    
    # for only two divisors
    output = 'There are ' + str(len(result)) + ' divisors: '
    if len(result) == 2:
        output += result[0] + ' and ' + result[1]
        return output
    
    # for 3 or more divisors
    for j in range(len(result)):
        if j < len(result) - 1:
            output += result[j] + ', '
        else:
            output += 'and ' + result[j]
     
    return output




In [70]:
divisors(47)

'There are 2 divisors: 1 and 47'

In [77]:
divisors(5)

'There are 2 divisors: 1 and 5'

In [78]:
divisors(40)

'There are 8 divisors: 1, 2, 4, 5, 8, 10, 20, and 40'

In [79]:
divisors(99)

'There are 6 divisors: 1, 3, 9, 11, 33, and 99'