In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split as tts

from sklearn.metrics import accuracy_score, recall_score, precision_score

data=pd.read_csv('data.csv')
data.drop(data.columns[0],axis=1, inplace=True) #inplace=True to physically drop the columns
diagnosis_mapping={'M':1 ,'B':-1}               #Malignant is 1 Benign is -1 for SVM labels are +1 and -1: not 1 nd 0
data['diagnosis']=data['diagnosis'].map(diagnosis_mapping)

y=data.iloc[:,0]
x=data.iloc[:,1:]


In [None]:
def remove_correlated_features(X):

  corr = X.corr()                                       #Variance covariance matrix is returned
  Correlation_Threshold=.9
  drop_columns=np.full(corr.shape[0],False, dtype=bool) #Making a list.A column drops if the value is True.

  for i in range (X.shape[1]):                          #check number of Columns
    for j in range (i+1,X.shape[1]):
      
      if(corr.iloc[i,j]>Correlation_Threshold):         #If the 2 columns are correalted then drop the 2nd column
        drop_columns[j]=True

  columns_to_be_dropped=X.columns[drop_columns]         #List of Column names
  X.drop(columns_to_be_dropped, axis=1, inplace=True)   #Columns dropped Physically

  return(X)

In [None]:
X=remove_correlated_features(x)         #Removal of Highly Correlated Features
list_=np.zeros((X.shape[0],X.shape[1]))
#print(list_.shape)
j=0

for col in X.columns:                   #Take attributes Data from panda Dataframe to Numpy array
  list_[:,j]=X[col]
  j+=1

label=np.zeros(X.shape[0])
label[:]=data['diagnosis']              #Take Y/label from panda Dataframe to Numpy array

In [None]:
def normalize_data_with_bias(my_data):
  mu = np.mean(my_data,axis=0)
  std = np.std(my_data, axis=0)
  
  N=my_data.shape[0]

  #insert extra singleton dimension, to obtain 1xD shape. Creating matrix like structure from 1-D list
  mu_arr = np.expand_dims(mu,axis=0)
  sig_arr = np.expand_dims(std, axis=0)

  #Repeat N times to get NxD shape
  ndarray_mu = np.repeat(mu_arr, N, axis=0)
  ndarray_sig = np.repeat(sig_arr, N, axis=0)

  normalized_data=(my_data-ndarray_mu)/ndarray_sig

  bias_column=np.ones((data.shape[0],1))
  normalized_data_with_bias=np.append(bias_column, normalized_data ,axis=1)

  return (mu_arr, sig_arr, normalized_data_with_bias)


In [None]:
def gradient_descent(d_train, label, weight, learning_rate,max_iteration):
  
  N=d_train.shape[0]
  gutter_weight=0

  for i in range (max_iteration):       #Maximum Number of Iteration
    
    gutter_weight = calculate_weight_using_gutter(d_train,label, weight)  
    weight = weight-gutter_weight*learning_rate
  print("Final Cost Is",calculate_cost_after_every_iteration(d_train,label,weight))
    
  return weight


In [None]:
def calculate_weight_using_gutter(train_data,label,weight):

  N=train_data.shape[0]
  dw=0
  for ind, val in enumerate(train_data):
    distance = 1 - label[ind]*(np.dot(train_data[ind],weight))
    
    if(max(0,distance)==0):
      di = weight
    
    else:
      di = weight- lagrange_multiplier*label[ind]*train_data[ind]
    
    dw+=di
  return (dw/N)


In [None]:
def calculate_cost_after_every_iteration(X,Y,W):
  no_of_training_examples = Y.shape[0]

  distances = 1 - Y * (np.dot(X, W))
  distances[distances < 0] = 0  # equivalent to max(0, distance)
  hinge_loss = lagrange_multiplier * (np.sum(distances) / no_of_training_examples)
  
  cost = 1 / 2 * (np.dot(W, W)/(no_of_training_examples)) + hinge_loss
  return cost

  

In [None]:
mu, sigma, d_train = normalize_data_with_bias(list_)
print(d_train.shape)

weight=np.zeros(d_train.shape[1])
learning_rate= 0.000009
max_iteration=2000
lagrange_multiplier=10000

print("splitting dataset into train and test sets...")
X_train, X_test, y_train, y_test = tts(d_train, label, test_size=0.2, random_state=42)
wt=gradient_descent(X_train,y_train,weight,learning_rate,max_iteration)
print(wt)

y_train_predicted = np.array([])
for i in range(X_train.shape[0]):
    yp = np.sign(np.dot(X_train[i], wt))
    y_train_predicted = np.append(y_train_predicted, yp)
    
print("*********************************************************")
print("*********************************************************")
print("*********************************************************")

print("accuracy on test dataset: {}".format(accuracy_score(y_train, y_train_predicted)))
print("recall on test dataset: {}".format(recall_score(y_train, y_train_predicted)))
print("precision on test dataset: {}".format(recall_score(y_train, y_train_predicted)))

print("*********************************************************")
print("*********************************************************")
print("*********************************************************")

y_test_predicted = np.array([])
for i in range(X_test.shape[0]):
    yp = np.sign(np.dot(X_test[i], wt))
    y_test_predicted = np.append(y_test_predicted, yp)

print("accuracy on test dataset: {}".format(accuracy_score(y_test, y_test_predicted)))
print("recall on test dataset: {}".format(recall_score(y_test, y_test_predicted)))
print("precision on test dataset: {}".format(recall_score(y_test, y_test_predicted)))
