In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn

## Perceptron Algorithm
Perceptron class implementing the algorithm

In [10]:
def step(z):
    return np.where(z > 0 , 1, 0)

In [11]:
class Perceptron:
    #initialising the class perceptron with parameters learning rate and n_iters denoting the epochs or no of iterations
    def __init__(self, learning_rate=0.01 , n_iters = 100):
        self.lr = learning_rate 
        self.n_iters = n_iters
        self.activation_func = step
        self.weights = None #defining the weights and bias attributes assigning them as None 
        self.bias = None

    # Fitting function of the class perceptron
    def fit(self,X,y):
        n_samples= X.shape #defining number of samples equal to the number of rows in X(i.e no of training samples)
        n_features = X.shape[1] #defining number of features equal to the number of columns in X
        
        #initialising parameters(i.e W(weights) and w_0(bias)) and setting them equal to 0 or 0 vector
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        y_ = step(y)
        
        #learning weights
        for _ in range(self.n_iters): #A loop for no of iterations
                for idx,x_i in enumerate(X): 
                    linear_output = np.dot(x_i, self.weights) + self.bias #evaluating weighted sum 
                    y_predicted = self.activation_func(linear_output) #activating using step function
                
                    #Update Rule
                    update = self.lr * (y_[idx] - y_predicted) 
                    self.weights += update * x_i
                    self.bias += update #Updating bias 
    
    
    def predict(self,X):
        linear_output = np.dot(X, self.weights) + self.bias #predicting the trained dataset
        y_predicted = self.activation_func(linear_output)
        return y_predicted


In [12]:
#Confusion matrix calculation for the dataset
def confusion_matrix(y_test,y_pred):
    mat = np.zeros((2,2))
    for i in range(len(y_test)):
        if(y_test[i] == 0 and y_pred[i] == 0):
            mat[0][0] += 1
        if(y_test[i] == 0 and y_pred[i] == 1):
            mat[0][1] += 1
        if(y_test[i] == 1 and y_pred[i] == 0):
            mat[1][0] += 1
        if(y_test[i] == 1 and y_pred[i] == 1):
            mat[1][1] += 1
    return mat

## Importing Data and Data Analysis

In [13]:
df=pd.read_csv("Dsata Set for Assignment 1 (1).csv")
df.drop("id",axis=1,inplace=True)
df["diagnosis"].replace('M',1,inplace=True)
df["diagnosis"].replace('B',0,inplace=True)
columns = df.loc[:,df.columns!="diagnosis"].columns
for i in columns:
    df[i].fillna(df[i].mean(),inplace=True)
for i in columns:
    df[i] = (df[i] - df[i].mean())/(np.std(df[i]))
df

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,1,1.103392,-2.073335,1.269505,0.984375,1.568466,3.283515,2.653170,2.532475,2.217515,...,1.886690,-1.359293,2.303601,2.000417,1.307686,2.616665,2.109228,2.296076,2.750622,1.937015
1,1,1.838251,-0.353632,1.686122,1.908708,-0.826962,-0.487072,-0.025439,0.548144,0.001392,...,1.805927,-0.369203,1.535126,1.889634,-0.375612,-0.430444,-0.148098,1.087084,-0.243890,0.281190
2,1,1.587601,0.456187,1.566499,1.558884,0.942210,1.052926,1.362864,2.037231,0.939685,...,1.511870,-0.023974,1.347475,1.455295,0.527407,1.082932,0.854091,1.955000,1.152255,0.201391
3,1,-0.767935,0.253732,-0.595781,-0.764464,3.283553,3.402909,1.915673,1.451707,2.867383,...,-0.281464,0.133984,-0.249939,-0.551632,3.394275,3.893397,1.989234,2.175786,6.046041,4.935010
4,1,1.758499,-1.151816,1.776870,1.826229,0.280372,0.539340,1.370402,1.428493,-0.009560,...,1.298575,-1.466770,1.338539,1.219662,0.220556,-0.313395,0.612183,0.729259,-0.868353,-0.397100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,1,2.120232,0.721473,2.061489,2.343856,1.041842,0.219060,1.947082,2.320965,-0.312589,...,1.901185,0.117700,1.752563,2.014484,0.378365,-0.273318,0.663541,1.629151,-1.360158,-0.709091
565,1,1.712926,2.085134,1.615998,1.723842,0.102458,-0.017833,0.691955,1.263669,-0.217664,...,1.536720,2.047399,1.421940,1.493981,-0.691230,-0.394820,0.235402,0.733827,-0.531855,-0.973978
566,1,0.707480,2.045574,0.671393,0.577953,-0.840484,-0.038680,0.045044,0.105777,-0.809117,...,0.561361,1.374854,0.579001,0.426597,-0.809587,0.350735,0.325638,0.414069,-1.104549,-0.318409
567,1,1.846796,2.336457,1.983115,1.735218,1.525767,3.272144,3.297694,2.658866,2.137194,...,1.961239,2.237926,2.303601,1.652242,1.430427,3.904848,3.197813,2.289985,1.919083,2.219635


## Splitting the Data into train set and test set

In [14]:
acc = 0
pre = 0
rec = 0
for i in range(10):
    df_train = df.sample(frac = 0.67)
    df_test = df.drop(df_train.index)
    
    #Splitting into target and training values
    X_cols = df.loc[:,df.columns!="diagnosis"].columns
    X_train = df_train[X_cols]
    X_train = X_train.to_numpy()
    X_test = df_test[X_cols]
    X_test = X_test.to_numpy()
    y_train = df_train["diagnosis"]
    y_train = y_train.to_numpy()
    y_test = df_test["diagnosis"]
    y_test = y_test.to_numpy()
    
    #Implementing perceptron
    p = Perceptron(learning_rate = 0.01,n_iters = 1000)
    p.fit(X_train,y_train)
    predictions = p.predict(X_test)
    
    #Confusion Matrix calculations for accuracy,precision and recall
    c_mat = confusion_matrix(y_test,predictions)
    acc = acc + (c_mat[0][0] + c_mat[1][1])*100 /(c_mat[0][0] + c_mat[0][1] + c_mat[1][0] + c_mat[1][1])
    pre = pre + (c_mat[0][0]*100) / (c_mat[0][1] + c_mat[0][0])
    rec = rec + (c_mat[0][0])*100 / (c_mat[0][0] + c_mat[1][0])
    
print("Accuracy : ", acc/10)
print("Precision :", pre/10)
print("Recall :", rec/10)

Accuracy :  94.30851063829786
Precision : 94.24345991558924
Recall : 96.747001489827


## Differences between PM1 and PM3

By building the model on normalized data the accuracy is increased from around 85-90% to 90-95%
,precision is also increased by around 5% and recall is almost the same