# **Preprocessing & Evaluation**

> ### Mitul Agrawal

Drive Link : [PRML Lab 1 Drive](https://drive.google.com/drive/folders/1s-SjO2uPZO5FS-Wx7AxTVdquwn8q5JDf?usp=sharing)

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

path = '/content/drive/My Drive/PRML/Lab 1/'

Mounted at /content/drive/


##Preprocessing

####Importing

In [None]:
import pandas as pd

df = pd.read_csv(path+'Cars93.csv')

df.head(3)

Unnamed: 0,Manufacturer,Model,Type,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,Cylinders,EngineSize,Horsepower,RPM,Rev.per.mile,Man.trans.avail,Fuel.tank.capacity,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Weight,Origin
0,Acura,Integra,Small,12.9,15.9,18.8,25,31,,Front,4,1.8,140,6300,2890,Yes,13.2,5,177,102,68,37,26.5,11.0,2705,non-USA
1,Acura,Legend,Midsize,29.2,33.9,38.7,18,25,Driver & Passenger,Front,6,3.2,200,5500,2335,Yes,18.0,5,195,115,71,38,30.0,15.0,3560,non-USA
2,Audi,90,Compact,25.9,29.1,32.3,20,26,Driver only,Front,6,2.8,172,5500,2280,Yes,16.9,5,180,102,67,37,28.0,14.0,3375,non-USA


###Cleaning Data

In [None]:
def remove_missing(df) : 
  remove = []
  for i, row in df.iterrows():
    if row.isna().values.any() : remove.append(i)
  df.drop(remove,axis=0,inplace=True)

print("Number of Rows Before :",df.shape[0])

remove_missing(df)

print("Number of Rows After  :",df.shape[0])

Number of Rows Before : 93
Number of Rows After  : 82


In [None]:
def remove_mismatch(df) : 
  for col in df : 
    df.reset_index(drop=True, inplace=True)
    s = [False]*len(df[col])
    for i,cell in enumerate(df[col]) : 
      try : n = int(cell)
      except : s[i] = True
    t = s.count(True)
    f = s.count(False)
    st = False
    if(t>f) : st = True
    remove = [i for i in range(len(df[col])) if s[i]!=st]
    df.drop(remove,axis=0,inplace=True)
      
print("Number of Rows Before :",df.shape[0])

remove_mismatch(df)

print("Number of Rows After  :",df.shape[0])

Number of Rows Before : 82
Number of Rows After  : 75


###Encoding

In [None]:
# Problem 3 - 4


# Encode Label (ordinal type) for 1 column
def encode_label(df,col,order) : 
  for i,cell in enumerate(df[col]) : 
    try : 
      df.at[i,col] = order.index(df[col][i])
    except : 
      df.at[i,col] = -1

# Encode Label (ordinal type) for multiple columns  
def encode_labels(df,cols) : 
  for col in cols.keys() : 
    encode_label(df,col,cols[col])

# Encode One Hot (nominal type) for 1 column
def encode_onehot(df,col) :
  k = {}
  n = df[col].shape[0]
  unique = df[col].unique()
  for unq in unique : k[unq] = [0]*n
  for i in range(n) :
    k[df.at[i,col]][i] = 1
  for unq in unique : df[unq] = k[unq] 
  df.drop(col,1,inplace=True)

# Encode One Hot (nominal type) for multiple columns
def encode_onehots(df,cols) :
  for col in cols : encode_onehot(df,col)

# Encode both types (if value of dict[column] is None then nominal 
# otherwise ordinal type) for multiple columns
def encode(df,cols) : 
  for col in cols.keys() : 
    if(cols[col] is None) : encode_onehot(df,col)
    else : encode_label(df,col,cols[col])

labels = {}
labels['AirBags'] = ['None','Driver only','Driver & Passenger']
labels['DriveTrain'] = ['Front','Rear','4WD']
labels['Man.trans.avail'] = ['No','Yes']
labels['Origin'] = ['non-USA','USA']
labels['Type'] = None

print("Shape Before :",df.shape)

# Not Encoding Manufacturer & Model as it as lot of unique attributes 
# and hot encoding them might supress weightage of actual car specification 
# and will also lead to over-fitting based on just car mode and manufacturer

encode(df,labels)

print("Shape After  :",df.shape)

df.head(3)

Shape Before : (75, 26)
Shape After  : (75, 30)


Unnamed: 0,Manufacturer,Model,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,Cylinders,EngineSize,Horsepower,RPM,Rev.per.mile,Man.trans.avail,Fuel.tank.capacity,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Weight,Origin,Small,Midsize,Large,Compact,Sporty
0,Acura,Integra,12.9,15.9,18.8,25,31,0,0,4,1.8,140,6300,2890,1,13.2,5,177,102,68,37,26.5,11.0,2705,0,1,0,0,0,0
1,Acura,Legend,29.2,33.9,38.7,18,25,2,0,6,3.2,200,5500,2335,1,18.0,5,195,115,71,38,30.0,15.0,3560,0,0,1,0,0,0
2,BMW,535i,23.7,30.0,36.2,22,30,1,1,4,3.5,208,5700,2545,1,21.1,4,186,109,69,39,27.0,13.0,3640,0,0,1,0,0,0


###Normalization

In [None]:
# convert to ints or floats if saved as str 
def str_to_num(df) : 
  for col in df : 
    try : df[col] = pd.to_numeric(df[col])
    except : pass

# Dividing by largest
def normalize_dbl(df,cols,round=None) : 
  if(type(cols)!=list) : cols = [cols]
  for col in cols : 
    l = max(df[col])
    if round is None : df[col] = df[col].div(l)
    else : df[col] = df[col].div(l).round(round)

# Dividing by constant
def normalize_dbc(df,cols,round=None,c=1) :
  if(type(cols)!=list) : cols = [cols]
  for col in cols : 
    if round is None : df[col] = df[col].div(l)
    else : df[col] = df[col].div(c).round(round)

# Dividing by constant x largest
def normalize_dblc(df,cols,round=None,c=1) :
  if(type(cols)!=list) : cols = [cols]
  for col in cols : 
    l = max(df[col]) * c
    if round is None : df[col] = df[col].div(l)
    else : df[col] = df[col].div(l).round(round)

# min-max normalization
def normalize_rescale(df,cols,round=None) :
  if(type(cols)!=list) : cols = [cols]
  for col in cols : 
    l = max(df[col])
    m = min(df[col])
    df[col] = df[col] - m
    l = l - m
    if round is None : df[col] = df[col].div(l)
    else : df[col] = df[col].div(l).round(round)

# mean normalization
def normalize_mean(df,cols,round=None) :
  if(type(cols)!=list) : cols = [cols]
  for col in cols : 
    l = max(df[col])
    m = min(df[col])
    mean = df[col].sum()
    n = df[col].shape[0]
    mean = mean/n
    l = l - m
    for i in range(n) : df.at[i,col] = df.at[i,col] - mean
    if round is None : df[col] = df[col].div(l)
    else : df[col] = df[col].div(l).round(round)

# Combining above normalization functions
def normalize(df,cols=None,kinds=None,round=None,c=1) :
  if(cols is None) : 
    cols = []
    for col in df : 
      if(pd.api.types.is_numeric_dtype(df[col])) : 
        if(max(df[col])>1 or min(df[col])<-1) : cols.append(col)
  if(type(cols)!=list) : cols = [cols]
  n = len(cols)
  if(kinds is None) : kinds = 'dbl'
  if(type(kinds)!=list) : kinds = [kinds]*n
  for i,kind in enumerate(kinds) : 
    if(kind=='dbl') : normalize_dbl(df,cols[i],round)
    if(kind=='dbc') : normalize_dbc(df,cols[i],round,c)
    if(kind=='dblc') : normalize_dblc(df,cols[i],round,c)
    if(kind in ['min-max','rescale','scale']) : normalize_rescale(df,cols[i],round)
    if(kind=='mean') : normalize_mean(df,cols[i],round)

str_to_num(df)
normalize(df,['Min.Price','Price','Max.Price'],'dbc',round=4,c=max(df['Max.Price'])) 
normalize(df)

df.head(3)

Unnamed: 0,Manufacturer,Model,Min.Price,Price,Max.Price,MPG.city,MPG.highway,AirBags,DriveTrain,Cylinders,EngineSize,Horsepower,RPM,Rev.per.mile,Man.trans.avail,Fuel.tank.capacity,Passengers,Length,Wheelbase,Width,Turn.circle,Rear.seat.room,Luggage.room,Weight,Origin,Small,Midsize,Large,Compact,Sporty
0,Acura,Integra,0.1612,0.1988,0.235,0.543478,0.62,0.0,0.0,0.5,0.315789,0.466667,0.969231,0.76964,1,0.573913,0.833333,0.808219,0.871795,0.871795,0.822222,0.736111,0.5,0.658952,0,1,0,0,0,0
1,Acura,Legend,0.365,0.4238,0.4838,0.391304,0.5,1.0,0.0,0.75,0.561404,0.666667,0.846154,0.621838,1,0.782609,0.833333,0.890411,0.982906,0.910256,0.844444,0.833333,0.681818,0.867235,0,0,1,0,0,0
2,BMW,535i,0.2962,0.375,0.4525,0.478261,0.6,0.5,0.5,0.5,0.614035,0.693333,0.876923,0.677763,1,0.917391,0.666667,0.849315,0.931624,0.884615,0.866667,0.75,0.590909,0.886724,0,0,1,0,0,0


###Train-Validation-Test Split (using sklearn)

In [None]:
from sklearn.model_selection import train_test_split

x = df.copy()
x.drop(['Price','Manufacturer','Model'], axis=1, inplace=True)
y = df.loc[:,'Price']

def train_val_test(x,y,train_size=-1,val_size=-1,test_size=-1) : 

  if(train_size==-1) : train_size = 1-val_size-test_size
  if(val_size==-1) : test_size = 1-train_size-test_size

  x_train,x_val,y_train,y_val = train_test_split(x,y,train_size=train_size,random_state=42)

  x_val,x_test,y_val,y_test = train_test_split(x_val,y_val,train_size=(test_size/(1-train_size)),random_state=42)

  return x_train,x_val,x_test,y_train,y_val,y_test

x_train,x_val,x_test,y_train,y_val,y_test = train_val_test(x,y,train_size=0.7,test_size=0.1)  

print("Training Data   :",x_train.shape)
print("Validation Data :",x_val.shape)
print("Test Data       :",x_test.shape)

Training Data   : (52, 27)
Validation Data : (15, 27)
Test Data       : (8, 27)


##Evaluation

####Training

In [None]:
# Importing Libraries & Data

from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LogisticRegression as LR
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.metrics import confusion_matrix as cm
from sklearn.metrics import precision_score as ps
from sklearn.metrics import recall_score as rs
from sklearn.metrics import f1_score as f1s
from sklearn.metrics import accuracy_score

from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

data = pd.read_csv(path+"Breast Cancer.csv")

# String Labels to Numeric

condition_M = data.diagnosis == "M"
condition_B = data.diagnosis == "B"

data.loc[condition_M,"diagnosis"]=0
data.loc[condition_B,"diagnosis"]=1

data

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,smoothness_se,compactness_se,concavity_se,concave points_se,symmetry_se,fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,1.0950,0.9053,8.589,153.40,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,
1,842517,0,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.01860,0.01340,0.01389,0.003532,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,
2,84300903,0,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.006150,0.04006,0.03832,0.02058,0.02250,0.004571,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,
3,84348301,0,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,0.4956,1.1560,3.445,27.23,0.009110,0.07458,0.05661,0.01867,0.05963,0.009208,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,
4,84358402,0,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.011490,0.02461,0.05688,0.01885,0.01756,0.005115,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,926424,0,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,1.1760,1.2560,7.673,158.70,0.010300,0.02891,0.05198,0.02454,0.01114,0.004239,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,
565,926682,0,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,0.7655,2.4630,5.203,99.04,0.005769,0.02423,0.03950,0.01678,0.01898,0.002498,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,
566,926954,0,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,0.4564,1.0750,3.425,48.55,0.005903,0.03731,0.04730,0.01557,0.01318,0.003892,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,
567,927241,0,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,0.7260,1.5950,5.772,86.22,0.006522,0.06158,0.07117,0.01664,0.02324,0.006185,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,


In [None]:
# DF in np array 

Y = data.diagnosis.to_numpy().astype('int')                                     

X_data = data.drop(columns=["id","diagnosis","Unnamed: 32"])
X = X_data.to_numpy()                  

# Splitting Data

user_prompt = 0.3
user_enable = False

x_train,x_test,y_train,y_test = tts(X,Y,test_size=user_prompt,shuffle=user_enable)

# Training

logistic_model = LR()
logistic_model.fit(x_train,y_train)
logistic_pred = logistic_model.predict(x_test)

decision_model = DTC()
decision_model.fit(x_train,y_train)
decision_pred = decision_model.predict(x_test)
Actual = np.copy(y_test)

print("Actual : \n",Actual)
print("Predicted (Logistic Regression) : \n",logistic_pred)
print("Predicted (Decision Tree)       : \n",decision_pred)

Actual : 
 [1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1
 0 1 1 1 1 1 0 1 1 0 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1
 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 1 1 1 1 1
 0 1 1 0 1 0 1 0 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1]
Predicted (Logistic Regression) : 
 [1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1
 0 1 1 1 1 1 0 1 1 0 1 0 1 0 0 1 0 1 1 1 0 0 1 1 1 0 0 1 1 1 0 1 1 0 1 1 1
 1 1 1 1 0 1 1 0 1 0 1 1 1 1 1 0 1 0 1 0 0 1 1 1 1 1 0 0 1 0 1 0 1 1 1 1 1
 0 1 1 0 1 0 1 0 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 0 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1]
Predicted (Decision Tree)       : 
 [1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 0 1 0 0 1
 0 1 1 1 1 1 0 1 1 0 1 0 1 0 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 1 0 1 1 0 1 1 1 1 0 1 0 0 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 0 0 1 

###Evaluation Metrics (Inbulit v/s Scratch)

In [None]:
from time import time as t

def micro(now) : 
  return str(int(now*(10**6)))
def acc(accuracy) :
  accuracy = int(accuracy*(10**4))
  accuracy = accuracy/100
  return str(accuracy)+" %"

bold = '\033[1m'
end = '\033[0m'
green = '\033[92m'
red = '\033[91m'

def print_accuracy(inbuilt,scratch,decision) :
  if(decision) : print("\nDecision Tree -")
  else : print("\nLogistic Regression -")
  if(inbuilt>scratch) : inb,scr = green,red 
  elif(inbuilt<scratch) : inb,scr = red,green
  else : inb=scr=''
  print(f"\n\t Inbuilt : {inb}{acc(inbuilt)}{end} \n\t Scratch : {scr}{acc(scratch)}{end}")

def print_time(inbuilt,scratch) :
  if(inbuilt>scratch) : inb,scr = red,green 
  elif(inbuilt<scratch) : inb,scr = green,red
  else : inb=scr=''
  print(f"\nTime Taken -\n \n\t Inbuilt : {inb}{micro(inbuilt)}{end} µs \n\t Scratch : {scr}{micro(scratch)}{end} µs")  

#### 1) Average Accuracy & Class Wise Average Accuracy

In [None]:
# Average 

def average(actual,prediction):
  acc = 0 
  for i in range(actual.size) : 
    acc+= int(actual[i]==prediction[i])
  acc = acc/actual.size
  return acc 

now = t()
inbuilt_acc_logistic = accuracy_score(y_test,logistic_pred)
inbuilt_acc_decision = accuracy_score(y_test,decision_pred)
inbuilt = t()-now

now = t()
scratch_acc_logistic = average(y_test,logistic_pred)
scratch_acc_decision = average(y_test,decision_pred)
scratch = t()-now

print(bold+green+"Average"+end)
print_accuracy(inbuilt_acc_logistic,scratch_acc_logistic,False)
print_accuracy(inbuilt_acc_decision,scratch_acc_decision,True)
print_time(inbuilt,scratch)

[1m[92mAverage[0m

Logistic Regression -

	 Inbuilt : 93.56 %[0m 
	 Scratch : 93.56 %[0m

Decision Tree -

	 Inbuilt : 88.88 %[0m 
	 Scratch : 88.88 %[0m

Time Taken -
 
	 Inbuilt : [91m2343[0m µs 
	 Scratch : [92m579[0m µs


In [None]:
# Class Wise Average

def class_wise_average(actual,prediction):
  acc = [0,0]
  n = [0,0]
  for i in range(actual.size) : 
    c = actual[i]
    acc[c] = acc[c] + int(actual[i]==prediction[i])
    n[c] = n[c] + 1
  acc[0] = acc[0]/n[0]
  acc[1] = acc[1]/n[1]
  return acc 

now = t()
cmx = cm(y_test,logistic_pred)
cmx = cmx.astype('float') / cmx.sum(axis=1)[:, np.newaxis]
inbuilt_cwa_logistic = cmx.diagonal()
cmx = cm(y_test,decision_pred)
cmx = cmx.astype('float') / cmx.sum(axis=1)[:, np.newaxis]
inbuilt_cwa_decision = cmx.diagonal()
inbuilt = t()-now

now = t()
scratch_cwa_logistic = class_wise_average(y_test,logistic_pred)
scratch_cwa_decision = class_wise_average(y_test,decision_pred)
scratch = t()-now

print(bold+green+"Class-Wise Average"+end)
print(f"\n{red}Malignant{end}")
print_accuracy(inbuilt_cwa_logistic[0],scratch_cwa_logistic[0],False)
print_accuracy(inbuilt_cwa_decision[0],scratch_cwa_decision[0],True)
print(f"\n{red}Benign{end}")
print_accuracy(inbuilt_cwa_logistic[1],scratch_cwa_logistic[1],False)
print_accuracy(inbuilt_cwa_decision[1],scratch_cwa_decision[1],True)
print()
print_time(inbuilt,scratch)

[1m[92mClass-Wise Average[0m

[91mMalignant[0m

Logistic Regression -

	 Inbuilt : 97.43 %[0m 
	 Scratch : 97.43 %[0m

Decision Tree -

	 Inbuilt : 97.43 %[0m 
	 Scratch : 97.43 %[0m

[91mBenign[0m

Logistic Regression -

	 Inbuilt : 92.42 %[0m 
	 Scratch : 92.42 %[0m

Decision Tree -

	 Inbuilt : 86.36 %[0m 
	 Scratch : 86.36 %[0m


Time Taken -
 
	 Inbuilt : [91m5867[0m µs 
	 Scratch : [92m487[0m µs


####2) Precision

In [None]:
# Precision

def precision(actual,prediction):
  acc = 0 
  for i in range(actual.size) : 
    acc+= int(actual[i]==1 and prediction[i]==1)
  acc = acc/sum(prediction)
  return acc 

now = t()
inbuilt_ps_logistic = ps(y_test,logistic_pred)
inbuilt_ps_decision = ps(y_test,decision_pred)
inbuilt = t()-now

now = t()
scratch_ps_logistic = precision(y_test,logistic_pred)
scratch_ps_decision = precision(y_test,decision_pred)
scratch = t()-now

print(bold+green+"Precision"+end)
print_accuracy(inbuilt_ps_logistic,scratch_ps_logistic,False)
print_accuracy(inbuilt_ps_decision,scratch_ps_decision,True)
print_time(inbuilt,scratch)

[1m[92mPrecision[0m

Logistic Regression -

	 Inbuilt : 99.18 %[0m 
	 Scratch : 99.18 %[0m

Decision Tree -

	 Inbuilt : 99.13 %[0m 
	 Scratch : 99.13 %[0m

Time Taken -
 
	 Inbuilt : [91m3421[0m µs 
	 Scratch : [92m505[0m µs


####3) Recall

In [None]:
# Recall

def recall(actual,prediction):
  acc = 0 
  for i in range(actual.size) : 
    acc+= int(actual[i]==1 and prediction[i]==1)
  acc = acc/sum(actual)
  return acc 

now = t()
inbuilt_rs_logistic = rs(y_test,logistic_pred)
inbuilt_rs_decision = rs(y_test,decision_pred)
inbuilt = t()-now

now = t()
scratch_rs_logistic = recall(y_test,logistic_pred)
scratch_rs_decision = recall(y_test,decision_pred)
scratch = t()-now

print(bold+green+"Recall"+end)
print_accuracy(inbuilt_rs_logistic,scratch_rs_logistic,False)
print_accuracy(inbuilt_rs_decision,scratch_rs_decision,True)
print_time(inbuilt,scratch)

[1m[92mRecall[0m

Logistic Regression -

	 Inbuilt : 92.42 %[0m 
	 Scratch : 92.42 %[0m

Decision Tree -

	 Inbuilt : 86.36 %[0m 
	 Scratch : 86.36 %[0m

Time Taken -
 
	 Inbuilt : [91m4978[0m µs 
	 Scratch : [92m458[0m µs


####4) F1-Score

In [None]:
# F1-Score

def F1(actual,prediction):
  acc = 0 
  for i in range(actual.size) : 
    acc+= int(actual[i]==1 and prediction[i]==1)
  ps = acc/sum(prediction)
  rs = acc/sum(actual)
  acc = 2*ps*rs
  acc = acc/(ps+rs)
  return acc

now = t()
inbuilt_f1s_logistic = f1s(y_test,logistic_pred)
inbuilt_f1s_decision = f1s(y_test,decision_pred)
inbuilt = t()-now

now = t()
scratch_f1s_logistic = F1(y_test,logistic_pred)
scratch_f1s_decision = F1(y_test,decision_pred)
scratch = t()-now

print(bold+green+"F1-Score"+end)
print_accuracy(inbuilt_f1s_logistic,scratch_f1s_logistic,False)
print_accuracy(inbuilt_f1s_decision,scratch_f1s_decision,True)
print_time(inbuilt,scratch)

[1m[92mF1-Score[0m

Logistic Regression -

	 Inbuilt : 95.68 %[0m 
	 Scratch : 95.68 %[0m

Decision Tree -

	 Inbuilt : 92.3 %[0m 
	 Scratch : 92.3 %[0m

Time Taken -
 
	 Inbuilt : [91m4408[0m µs 
	 Scratch : [92m1008[0m µs


####5) Sensitivity

In [None]:
# Sensitivity

def sensitivity(actual,prediction):
  acc = 0 
  for i in range(actual.size) : 
    acc+= int(actual[i]==1 and prediction[i]==1)
  acc = acc/sum(actual)
  return acc 

now = t()
inbuilt_se_logistic = rs(y_test,logistic_pred)
inbuilt_se_decision = rs(y_test,decision_pred)
inbuilt = t()-now

now = t()
scratch_se_logistic = sensitivity(y_test,logistic_pred)
scratch_se_decision = sensitivity(y_test,decision_pred)
scratch = t()-now

print(bold+green+"Sensitivity"+end)
print_accuracy(inbuilt_se_logistic,scratch_se_logistic,False)
print_accuracy(inbuilt_se_decision,scratch_se_decision,True)
print_time(inbuilt,scratch)

[1m[92mSensitivity[0m

Logistic Regression -

	 Inbuilt : 92.42 %[0m 
	 Scratch : 92.42 %[0m

Decision Tree -

	 Inbuilt : 86.36 %[0m 
	 Scratch : 86.36 %[0m

Time Taken -
 
	 Inbuilt : [91m8678[0m µs 
	 Scratch : [92m1204[0m µs


####6) Specificity

In [None]:
# Specificity

def specificity(actual,prediction):
  acc = 0 
  for i in range(actual.size) : 
    acc+= int(actual[i]==0 and prediction[i]==0)
  acc = acc/(len(actual)-sum(actual))
  return acc 

now = t()
inbuilt_sp_logistic = rs(y_test,logistic_pred,pos_label=0)
inbuilt_sp_decision = rs(y_test,decision_pred,pos_label=0)
inbuilt = t()-now

now = t()
scratch_sp_logistic = specificity(y_test,logistic_pred)
scratch_sp_decision = specificity(y_test,decision_pred)
scratch = t()-now

print(bold+green+"Specificity"+end)
print_accuracy(inbuilt_sp_logistic,scratch_sp_logistic,False)
print_accuracy(inbuilt_sp_decision,scratch_sp_decision,True)
print_time(inbuilt,scratch)

[1m[92mSpecificity[0m

Logistic Regression -

	 Inbuilt : 97.43 %[0m 
	 Scratch : 97.43 %[0m

Decision Tree -

	 Inbuilt : 97.43 %[0m 
	 Scratch : 97.43 %[0m

Time Taken -
 
	 Inbuilt : [91m6130[0m µs 
	 Scratch : [92m783[0m µs
