In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('dataset.csv')

In [3]:
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


# Feature Engineering Task 1

In [4]:
for i in df.columns[df.isnull().any(axis=0)]:
    mean = df[i].mean()
    print(f'Filling\t{df[i].isna().sum()} null value(s) in {i} with mean value -->\t{mean}')
    df[i].fillna(mean , inplace = True)

Filling	1 null value(s) in radius_mean with mean value -->	14.116125000000011
Filling	1 null value(s) in perimeter_mean with mean value -->	92.02346830985917
Filling	1 null value(s) in concavity_mean with mean value -->	0.08892480757042255
Filling	1 null value(s) in compactness_se with mean value -->	0.02546582922535212
Filling	2 null value(s) in area_worst with mean value -->	881.4024691358021
Filling	1 null value(s) in concavity_worst with mean value -->	0.27245536443661955


# SPLITTING INTO TRAIN-TEST SPLIT

In [5]:
df = df.sample(frac = 1)
train_size=int(0.67*len(df))
X = df.drop(columns = ['diagnosis', 'id']).values
#X = np.c_[np.ones(len(X)), X]
y = df['diagnosis'].values
y[y == 'M'] = 1
y[y == 'B'] = -1

X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
#if you do not want to shuffle, simply do not run this again

# NORMALIZING -- FEATURE ENGINEERING TASK 2

In [6]:
X_train = (X_train - X_train.mean(axis=0))/(X_train.std(axis=0))
X_test = (X_test - X_test.mean(axis=0))/(X_test.std(axis=0)) 

# PERCEPTRON MODEL

In [7]:
class perceptronNode:
    
    def __init__(self , iters):
        self.iters = iters
        self.w = None
    
    def fit(self , X , y):
        flag = True
        c = 0
        w = np.zeros(X.shape[1])
        for _ in range(self.iters):
            for i in range(0 , len(X)):
                if(c == 80):
                    flag = False
                    break;
                if(y[i] * np.dot(w , X[i]) <= 0):
                    flag = True
                    c = 0
                    w += (y[i] * X[i])
        self.w = w
        return self.w
    
    def activation_func(self, x):
        return np.where(x>=0, 1, -1)
    
    def predict(self , X_test):
        y_vals = np.dot(X_test, self.w)
        y_predicted = np.where(y_vals>=0, 1, -1)
        
        return y_predicted
    
    def accuracy(self , y_actual , y_out):
        acc = np.sum(y_actual == y_out)/len(y_out)
        return acc
    
    def metricscore(self, y_actual, y_out):
        truepos = 0;
        falsepos = 0;
        trueneg = 0;
        falseneg = 0;
        for i in range(len(y_actual)):
            if(y_actual[i] == y_out[i]):
                if(y_actual[i] == -1):
                    trueneg = trueneg + 1;
                else:
                    truepos = truepos + 1;
            else:
                if(y_out[i] == -1):
                    falseneg = falseneg + 1;
                else:
                    falsepos = falsepos + 1;
        return truepos, falsepos, trueneg, falseneg
    
    def precision(self, y_actual, y_out):
        tp, fp, tn, fn = self.metricscore(y_actual, y_out)
        prec = tp/(tp + fp)
        return prec
    
    def recall(self, y_actual, y_out):
        tp, fp, tn, fn = self.metricscore(y_actual, y_out)
        rec = tp/(tp + fn)
        return rec

In [8]:
PM3 = perceptronNode(10000)
PM3.fit(X_train , y_train)
#train_acc1 = PM3.accuracy(y_train, PM3.predict(X_train))
test_acc1 = PM3.accuracy(y_test , PM3.predict(X_test))
#train_prec1 = PM3.precision(y_train, PM3.predict(X_train))
test_prec1 = PM3.precision(y_test , PM3.predict(X_test))
#train_rec1 = PM3.precision(y_train, PM3.predict(X_train))
test_rec1 = PM3.recall(y_test , PM3.predict(X_test))
#print('Training Accuracy ',train_acc1)
print('Testing Accuracy ',test_acc1)
#print('Training Precision ',train_prec1)
print('Testing Precision ',test_prec1)
#print('Training Recall ',train_rec1)
print('Testing Recall ',test_rec1)

Testing Accuracy  0.9361702127659575
Testing Precision  0.9710144927536232
Testing Recall  0.8701298701298701


# 