In [1]:
#importing the libraries

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# reading and shuffling the data

df = pd.read_csv("FoDS-A1.csv")
df = df.sample(frac = 1)

In [3]:
# normalising the data values

myData = (df - df.min()) / (df.max() - df.min())
X = myData.iloc[:, :-1]
y = myData.iloc[:, -1:]

In [4]:
# splitting the dataset into training data and testing data

splitData = int(0.7 * len(myData))
train_X, test_X, train_y, test_y = X[:splitData], X[splitData:], y[:splitData], y[splitData:]
coeff_list = np.zeros((10, 11, 11))

In [5]:
def stocasticGD(deg, X, y, learning_rate, iterations):
    
    # assigning random values for weights
    
    A = np.zeros((deg+1, deg+1))
    for m in range(deg+1):
            for n in range(deg+1-m):
                A[m][n] = np.random.randn()
                
    for itr in range(iterations):
        sumItrError = 0
        
        for z in range(len(X)):    # for each row in input data
            row = X.iloc[z]        # contains strength, temperature
            column = y.iloc[z]       # contains pressure
            
            dotP_wx = 0                # variable for calculating error in each iteration
            
            for m in range(deg+1):                    # power of x1
                for n in range(deg+1-m):            # power of x2
                    dotP_wx += A[m][n] * (row[0]**m) * (row[1]**n)    # summation of (w0+ w1*x1 + w2*x1^2 + w3*x1*x2 + w4*x2^2 ...)
                    
            dotP_wx -= column[0]                    # yn - (w0 + w1*x1 + w2*x1^2 + w3*x1*x2 + w4*x2^2 ...) 
                
             # for each parameter(w0,w1,w2,...)    
            for m in range(deg+1):                   # power of x1
                for n in range(deg+1-m):           # power of x2
                    A[m][n] -= (learning_rate/len(X)) * dotP_wx * (row[0]**m) * (row[1]**n)         # calculating w0,w1,w2,... for each iteration
                    
            sumItrError +=  (dotP_wx**2)*0.5  
            
        if(itr == iterations - 1):
            print("Error of last iteration of degree ", deg, " :  " ,sumItrError)
            for m in range(deg+1):                   # power of x1
                for n in range(deg+1-m):
                    coeff_list[deg][m][n] = A[m][n]
    return sumItrError

#     cost_history = np.zeros(iterations)

In [6]:
def testing_SGD(deg, X, y, learning_rate, iterations):
                
    for itr in range(iterations):
        sumItrError = 0
        
        for z in range(len(X)):    # for each row in input data
            row = X.iloc[z]              # contains strength, temperature
            column = y.iloc[z]       # contains pressure
            
            dotP_wx = 0                # variable for calculating error in each iteration
            
            for m in range(deg+1):                    # power of x1
                for n in range(deg+1-m):            # power of x2
                    dotP_wx += coeff_list[deg][m][n] * (row[0]**m) * (row[1]**n)    # summation of (w0+ w1*x1 + w2*x1^2 + w3*x1*x2 + w4*x2^2 ...)
                    
            dotP_wx -= column[0]                    # yn - (w0 + w1*x1 + w2*x1^2 + w3*x1*x2 + w4*x2^2 ...)
                    
            sumItrError +=  (dotP_wx**2)*0.5  
            
        if(itr == iterations - 1):
            print("Error of last iteration of degree ", deg, " :  " ,sumItrError)
            
    return sumItrError

In [7]:
#For training data

minError = 100000
minErrorDeg = -1
for i in range(10):
    SGD_Error = stocasticGD(i, train_X, train_y, 0.5, 200)
    if(SGD_Error <= minError):
        minError = SGD_Error
        minErrorDeg = i
print("Degree ", minErrorDeg, " gives minimum error.")      


Error of last iteration of degree  0  :   67.78127100269224
Error of last iteration of degree  1  :   34.32915728033017
Error of last iteration of degree  2  :   19.67286238206615
Error of last iteration of degree  3  :   25.91154087750624
Error of last iteration of degree  4  :   15.230666842286196
Error of last iteration of degree  5  :   19.806599801988195
Error of last iteration of degree  6  :   20.485939155481415
Error of last iteration of degree  7  :   21.182644123917676


In [None]:
# for testing data
print("Testing error: ", testing_SGD(minErrorDeg, test_X, test_y, 0.5, 300))
