#Perceptron - Pima Indians Diabetes Classification

##Prepare Packages and Google Collab

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, roc_auc_score

In [None]:
#mount the google drive to access the data 
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


##Initialise Dataset

###Real World Dataset

In [None]:
#Load Dataset
data = pd.read_csv('diabetes.csv')
data.head()

FileNotFoundError: ignored

In [None]:
#Remove classification column
variables = data.drop("Outcome", axis = 1)
#Create new data frame with classification column
classification = data["Outcome"]
#Normalize data
variables = MinMaxScaler().fit_transform(variables)
#Turn Data back into a dataframe
variables = pd.DataFrame(variables)
variables.head()

In [None]:
classification.head()

In [None]:
#Split dataset into test and training sets
train_X, test_X, train_Y, test_Y = train_test_split(variables, classification, test_size=0.2, random_state=0)
#Check the size of the training and testing sets
print(train_X.shape)
print(test_X.shape)
print(train_Y.shape)
print(test_Y.shape)

In [None]:
#Convert dataset into numpy arrays
train_X = np.array(train_X)
test_X = np.array(test_X)
train_Y = np.array(train_Y)
test_Y = np.array(test_Y)

###Toy Dataset

For the toy dataset the iris dataset was used with data some data preparation demonstrated by Jun Li in his week 10 demonstration (https://colab.research.google.com/drive/1sdyzxla7RjCCrRWmlHVUXnIRtfyK4gCs?usp=sharing). The goal of the dataset is to simplify the iris dataset into a binary dataset with only 100 data points in order for the Perceptron to be tested on an easy data set to validate the algorithm works as expected

In [None]:
#Load the iris dataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
data = load_iris()
#Minimise the X dataset into 2 data attributes and reduce the dataset to only 100 samples
X = data['data'][:100, :2]
#Take the first 100 target samples to match with the X attributes
y = data['target'][:100]
#Split the data set into training and testing set with 33% going to the test side
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

##Design the Perceptron

In [None]:
class Perceptron:
  #Create the constructor for the Perceptron and initialise parameters
  def __init__(self, learning_rate = 0.1, epochs = 10):
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.weights = None
    self.bias = None
  
  #create an activation function
  def activation_func(self, x):
    #if x is greater or equal to 0 return 1 otherwise return 0 meaning if it is activated or not
    return 1 if x >= self.bias else 0
  
  #create the weighted sum for the hypothesis space
  def weighted_sum(self, x):
    #transpose of weights times the data samples plus the bias
    return np.dot(x, self.weights) + self.bias

  #Fit the data to the model
  def fit(self, X, y):
    #Initialise features
    #make the weights initially all zero based on he number of features in the data
    self.weights = np.zeros(X.shape[1])
    #initialise the bias to be 0
    self.bias = 0
    #initialise the count to be 0
    count = 0
    #covert labelled data into a numpy array for processing
    y_array = np.array(y)

    while(count <= self.epochs):
      #initialise an array to append the current predicted values to
      predict_array = []
      for i, x in enumerate(X):
        #calculate predicted values
        #print(self.weights)
        #print(x)
        linear_calc = self.weighted_sum(x)
        predicted_value = self.activation_func(linear_calc)
        #append predictins for the current epoch into an array
        predict_array.append(predicted_value)

        # print("linear calc:", linear_calc)
        # print("real value:", y_array[i])
        # print("predicted_value:", predicted_value)
        
        #Update the wegihts and biases based on an update rule
        #Update rule stating that if the predicted value is false but the real value is true increase the weights and decrease the bias based on the learning rate
        if y_array[i] == 1 and predicted_value == 0:
          self.weights = self.weights + self.learning_rate * x
          self.bias = self.bias - self.learning_rate
        #Update rule stating that if the predicted value is true but the real value is false increase the bias and decrease the weights based on the learning rate
        elif y_array[i] == 0 and predicted_value == 1:
          self.weights = self.weights - self.learning_rate * x
          self.bias = self.bias + self.learning_rate
        
        # print("weights" , self.weights)
        # print("bias", self.bias)
      #calculate the accuracy of the current epoch 
      curr_accuracy = accuracy_score(y_array, predict_array)
      print("No. Epoch:", count, ",", "Accuracy:", curr_accuracy)
      #increase the count
      count += 1
  
  #create the predict function
  def predict(self, X):
    #initialise an array to hold the predicted results
    Y_prediction = []
    for x in X:
      #calculate the approximation
      linear_calc = self.weighted_sum(x)
      #determine whether the perceptron will be activated based on the calculated approximation
      prediction = self.activation_func(linear_calc)
      #append the predicted results to the array and return the array
      Y_prediction.append(prediction)
    return np.array(Y_prediction)
  



##Test the Perceptron on the Toy Dataset

In [None]:
#construct perceptron
perceptron = Perceptron()
#fit data to the perceptron
perceptron.fit(X_train, y_train)

In [None]:
#run the perceptron's predict function on the test data
pred = perceptron.predict(X_test)
#show the predicted values and actual values
print("predictions", pred)
print("actual outcomes", y_test)
#display the accuracy of the model on the toy dataset
print("Accuracy:", accuracy_score(y_test, pred))

##Train the Perecptron on the Real-World Dataset

In [None]:
#construct perceptron with 100 epochs instead of 10
perceptron = Perceptron(0.1, 100)
#fit data to the perceptron
perceptron.fit(train_X, train_Y)

##Evaluation of the Perceptron


In [None]:
#run the perceptron's predict function on the test data
pred = perceptron.predict(test_X)
#check the shape of the prediction array to ensure the prediction function produced the right format
pred.shape

In [None]:
#compare the predicted results with the true results 
print("predictions", pred)
print("actual outcomes", test_Y)
print("confusion matrix:")
confusion_matrix(test_Y, pred)

In [None]:
print("Accuracy:", accuracy_score(test_Y, pred))
print("Precision:", precision_score(test_Y, pred))
print("F1 score:", f1_score(test_Y, pred))
print("Area Under the Curve Score:", roc_auc_score(test_Y, pred))