# Week 18
## Intro to Neural Networks

In [1]:
# Dependencies and Modules:

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# diabetes.csv file:
diabetes_path = "C:/Users/Nik/Documents/diabetes.csv"
diabetes_df = pd.read_csv(diabetes_path)
diabetes_df.head(1)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1


## 1. Look up the Adam optimization functions in PyTorch https://pytorch.org/docs/stable/optim.html. 
How does it work? Try at least one other optimization function with the diabetes dataset shown in class. How does the model perform with the new optimizer? Did it perform better or worse than Adam? Why do you think that is?

In [5]:
# preparing my training and test sets:
X = diabetes_df.drop('Outcome', axis=1).values
y = diabetes_df['Outcome'].values

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42, stratify=y)

sc=StandardScaler()
X_train = sc.fit_transform(X_train)
X_test=sc.fit_transform(X_test)

In [6]:
# converting the sets into tensors for the optimizers:
#convert to tensors
X_train = torch.FloatTensor(X_train) 
X_test = torch.FloatTensor(X_test)

y_train = torch.LongTensor(y_train) 
y_test = torch.LongTensor(y_test)

In [7]:
# Creating a class for the Advanced Neural Network module:
class ANN_Model(nn.Module):
    
    #I checked documentation to choose best parameters:
    def __init__(self, input_features=8, hidden1=20, hidden2=20, out_features=2):
        
        # I use the keyword 'super' to iolates changes and ensure
        # children are calling the right parents:
        super().__init__() 
        
        self.layer_1_connection = nn.Linear(input_features, hidden1)
        self.layer_2_connection = nn.Linear(hidden1, hidden2)
        self.out = nn.Linear(hidden2, out_features)
        
    def forward(self, x):
        #apply the functions:
        x = F.relu(self.layer_1_connection(x))
        x = F.relu(self.layer_2_connection(x))
        x = self.out(x)
        return x

In [9]:
# Here we go!

torch.manual_seed(42)

# Instantiate the model:
ann = ANN_Model()

# Define a loss function:
loss_function = nn.CrossEntropyLoss()

# Create a list of all the optimizers I want to check:
optimizer_list = [torch.optim.Adadelta, torch.optim.Adagrad, torch.optim.Adam, torch.optim.AdamW,
                  torch.optim.Adamax, torch.optim.ASGD, torch.optim.NAdam, torch.optim.RAdam, 
                  torch.optim.RMSprop,torch.optim.Rprop, torch.optim.SGD]

# Here is the for loop to test the different optimizers:
for x in optimizer_list:
    
    optimizer = x(ann.parameters(), lr=0.1)
    
    #run model through multiple iterations (or epochs)
    final_loss = []
    n_epochs = 501
    for epoch in range(n_epochs):
        y_pred = ann.forward(X_train)
        loss = loss_function(y_pred, y_train)
        final_loss.append(loss)
        # Printing loss value at final epoch:
        if epoch == 500:
            print(f'Optimizer {x}: Epoch number {epoch} with loss, {loss}')

        # Fire up each optimizer with .zero_grad which will zero the gradient before 
        # running backwards propagation for meaningful comparison:
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
        
        if epoch == 500:
        # Make the predictions: 
            y_pred = []

            with torch.no_grad():
                for i, data in enumerate(X_test):
                    prediction = ann(data)
                    y_pred.append(prediction.argmax())
                    
# Print a report for each optimizer after final epoch:
            print(classification_report(y_test, y_pred))

Optimizer <class 'torch.optim.adadelta.Adadelta'>: Epoch number 500 with loss, 0.43025681376457214
              precision    recall  f1-score   support

           0       0.77      0.85      0.81       150
           1       0.66      0.54      0.59        81

    accuracy                           0.74       231
   macro avg       0.72      0.69      0.70       231
weighted avg       0.73      0.74      0.73       231

Optimizer <class 'torch.optim.adagrad.Adagrad'>: Epoch number 500 with loss, 0.11430498212575912
              precision    recall  f1-score   support

           0       0.75      0.75      0.75       150
           1       0.54      0.54      0.54        81

    accuracy                           0.68       231
   macro avg       0.65      0.65      0.65       231
weighted avg       0.68      0.68      0.68       231

Optimizer <class 'torch.optim.adam.Adam'>: Epoch number 500 with loss, 0.043085530400276184
              precision    recall  f1-score   support

   

### The report above is of the classification report and loss function determination of loss for every optimizer it made sense for us to try. The only optimizers that can be said to have outperformed Adam in some way are: AdaDelta, which had better statistical metrics. AdamW and Adamax, which had half the loss as Adam.

### I think AdaDelta had better metrics on its report because (almost identical to AdaGrad) it uses a moving window of gradient updates and keeps learning longer than other optimizers. So it makes sense that it's model had better stats.

### I think AdamW and Adamax had smaller loss than Adam because they have updated weight decay algorithms to improve on stock Adam. 

## 2. Write a function that lists and counts the number of divisors for an input value.
**Example 1:**\
Input: 5\
Output: “There are 2 divisors: 1 and 5”

**Example 2:**\
Input: 40\
Output: “There are 8 divisors: 1, 2, 4, 5, 8, 10, 20, and 40”

In [2]:
# Assigning the separator variable
separator = ", "

# Defining the function:
def factor_fun(num):
    factors = []
    for i in np.arange(1, num+1):
        if num % i == 0:
            factors.append(i)
    else:
        pass
    return f'There are {len(factors)} divisors in {num}. They are: {(separator.join(map(str, factors[:-1])))}, and {factors[-1]}'
factor_fun(12)

'There are 6 divisors in 12. They are: 1, 2, 3, 4, 6, and 12'

In [3]:
factor_fun(5)

'There are 2 divisors in 5. They are: 1, and 5'

In [4]:
factor_fun(40)

'There are 8 divisors in 40. They are: 1, 2, 4, 5, 8, 10, 20, and 40'