<a href="https://colab.research.google.com/github/Lifeisforty2/Lifeisforty2.github.io/blob/main/StochasticGD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
'''
Machine Learning, Stochastic Gradient Descent
'''

' \nMachine Learning, Stochastic Gradient Descent\n'

In [2]:
# import all required libraries
import pandas as pd
from google.colab import drive
import numpy as np
import matplotlib.pyplot as plt

In [3]:

drive.mount('/content/drive')
basePath = "/content/drive/My Drive/Colab Notebooks/Ai/"

Mounted at /content/drive


In [4]:
# Data file name variables
train = basePath + "gd-train.dat"
test = basePath + "gd-test.dat"
'''
import os
folder_path = '/content/drive/My Drive/Colab Notebooks/Ai/'
file_list = os.listdir(folder_path)
print(file_list)
'''

"\nimport os\nfolder_path = '/content/drive/My Drive/Colab Notebooks/Ai/'\nfile_list = os.listdir(folder_path)\nprint(file_list)\n"

In [5]:
# Read the training and testing data files
trainData = pd.read_csv(train, sep='\s+', header=None)
testData = pd.read_csv(test, sep='\s+', header=None)

In [6]:
# Activation Function - implement Sigmoid
def activation_function(h):
    # given 'h' compute and return 'z' based on the activation function implemented
    z = 1/(1+np.exp(-h))
    return z

In [7]:
# Train the model using the given training dataset and the learning rate
# return the "weights" learnt for the perceptron - include the weight assocaited with bias as the last entry
def train(train_data, learning_rate=0.05):
    train_data = train_data.to_numpy()
    # initialize weights to 0
    weights = np.zeros(train_data.shape[1] - 1)
    weights = weights.astype(float)
    # go through each training data instance
    for index, rows in enumerate(train_data):
        # get 'x' as one multi-variate data instance and 'y' as the ground truth class label

        if index == 0:
            continue



        x = rows[:-1]
        y = rows[-1]
        x = x.astype(float)
        # y is a string, make it a float
        y = float(y)
        # print y type
        # obtain h(x)
        # b = position, b = bias
        b = weights[-1]
        h = np.dot(weights, x) + b

        # call the activation function with 'h' as parameter to obtain 'z'
        z = activation_function(h)
        # update all weights individually using learning_rate, (y-z), and the corresponding 'x' value
        for i in range(len(weights)):
            weights[i] = weights[i] + learning_rate * (y - z) * x[i]

        # update the bias weight
        weights[-1] = weights[-1] + learning_rate * (y - z) * 1
    # return the final learnt weights
    return weights


In [8]:
# Test the model (weights learnt) using the given test dataset
# return the accuracy value
def test(test_data, weights, threshold):
    # go through each testing data instance
    # initialize the positive and negative instances to 0
    pos = 0
    neg = 0
    test_data = test_data.to_numpy()
    for index, rows in enumerate(test_data):
        # get 'x' as one multi-variate data instance and 'y' as the ground truth class label
        if index == 0:
            continue
        x = rows[:-1]
        y = rows[-1]
        x = x.astype(float)
        y = float(y)
        # obtain h(x)
        b = weights[-1]
        h = np.dot(weights, x) + b
        # call the activation function with 'h' as parameter to obtain 'z'
        z = activation_function(h)
        # use 'threshold' to convert 'z' to either 0 or 1 so as to match to the ground truth binary labels
        if z >= threshold:
            z = 1
        else:
            z = 0

        # compare the thresholded 'z' with 'y' to calculate the positive and negative instances for calculating accuracy
        if z == y:
            pos += 1
        else:
            neg += 1
    # calculate the accuracy value
    # return the accuracy value for the given test dataset
    accuracy = pos / (pos + neg)
    return accuracy


In [9]:
# Gradient Descent function
def gradient_descent(df_train, df_test, learning_rate=0.05, threshold=0.5):
    # call the train function to train the model and obtain the weights
    weights = train(df_train, learning_rate)
    # call the test function with the training dataset to obtain the training accuracy
    trainAccuracy = test(df_train, weights, threshold)
    # call the test function with the testing dataset to obtain the testing accuracy
    testAccuracy = test(df_test, weights, threshold)
    # return (trainAccuracy, testAccuracy)
    return (trainAccuracy, testAccuracy)

In [10]:
# Threshold of 0.5 will be used to classify the instance for the test. If the value is >= 0.5, classify as 1 or else 0.
threshold = 0.5


In [11]:
# Main algorithm loop
if __name__ == "__main__":

# Loop through all the different learning rates [0.05 to 1.0] with an increment of 0.05
    learning_rates = np.arange(0.05, 1.05, 0.05)
    # if decimal place is > 2, round it to 2 decimal places'
    learning_rates = np.round(learning_rates, 2)
    # For each learning rate selected, call the gradient descent function to obtain the train and test accuracy values
    for learning_rate in learning_rates:
        # call the gradient_descent function to obtain the train and test accuracy values
        trainAccuracy, testAccuracy = gradient_descent(trainData, testData, learning_rate, threshold)
        # Print both the accuracy values as "Accuracy for LR of 0.1 on Training set = x %" OR "Accuracy for LR of 0.1 on Testing set = x %"
        print("Accuracy for LR of", learning_rate, "on Training set =", trainAccuracy * 100, "%")
        print("Accuracy for LR of", learning_rate, "on Testing set =", testAccuracy * 100, "%")
    # Print both the accuracy values as "Accuracy for LR of 0.1 on Training set = x %" OR "Accuracy for LR of 0.1 on Testing set = x %"



Accuracy for LR of 0.05 on Training set = 68.0 %
Accuracy for LR of 0.05 on Testing set = 72.25 %
Accuracy for LR of 0.1 on Training set = 68.0 %
Accuracy for LR of 0.1 on Testing set = 72.25 %
Accuracy for LR of 0.15 on Training set = 68.0 %
Accuracy for LR of 0.15 on Testing set = 72.0 %
Accuracy for LR of 0.2 on Training set = 68.0 %
Accuracy for LR of 0.2 on Testing set = 71.75 %
Accuracy for LR of 0.25 on Training set = 69.0 %
Accuracy for LR of 0.25 on Testing set = 71.25 %
Accuracy for LR of 0.3 on Training set = 69.0 %
Accuracy for LR of 0.3 on Testing set = 71.75 %
Accuracy for LR of 0.35 on Training set = 69.0 %
Accuracy for LR of 0.35 on Testing set = 71.0 %
Accuracy for LR of 0.4 on Training set = 71.0 %
Accuracy for LR of 0.4 on Testing set = 70.25 %
Accuracy for LR of 0.45 on Training set = 69.0 %
Accuracy for LR of 0.45 on Testing set = 70.0 %
Accuracy for LR of 0.5 on Training set = 69.0 %
Accuracy for LR of 0.5 on Testing set = 69.25 %
Accuracy for LR of 0.55 on Traini