In [1]:
import pandas as analytics
import numpy as maths
import os
from matplotlib import pyplot as graph
import warnings
from math import log
from general_functions import *
from logistic_regression import *
warnings.filterwarnings("ignore")

In [33]:
def preprocess(df_raw):
    """This is data dependent function. As the data changes the preprocessing also changes. What matters is the output of preprocessed data. This should look like (x1,x2,x3, ... , xn, y)
    Remember to encode the categorical attributes / features
    Remeber to normalise the data before replacing the categorical data
    """
    cols = ['x'+str(i+1) for i in range(len(df_raw.columns[:-1]))]+['y']
    df_raw.columns=cols
    X=normalize(df_raw[df_raw.columns[:-1]])
    df_raw=analytics.concat([X,df_raw['y']],axis=1)
    df_raw['x3'] = df_raw['x3'].replace({'nontypical':0,'asymptomatic':1,'nonanginal':2,'typical':3})
    df_raw['x13'] = df_raw['x13'].replace({'normal':0,'fixed':1,'reversable':2})
    df_raw['y']=df_raw['y'].replace({'No':0,'Yes':1})
    df_raw = df_raw.dropna()
    return df_raw

In [34]:
data_path = "data/heart"
filename = "Heart.csv"
filepath = os.path.join(data_path,filename)

In [46]:
df_raw = analytics.read_csv(filepath)
df_raw = preprocess(df_raw)
# get_summary(df_raw)

hyperparameters={
    "alpha":0.2,
    "weights_lb":0.1,
    "weights_ub":0.3,
    "tolerance": 1e-2,
    "max_epoch": 100
}

log_reg=logistic_regression(data=df_raw,hyperparameters=hyperparameters)
print("\n")
log_reg.train()
df_train=log_reg.df_train
df_test=log_reg.df_test
weights=log_reg.weights





Data seems as preprocessed and normalised !! Ready to train...


Initial Weights: [[0.15306122 0.27142857 0.26326531 0.23469388 0.15306122 0.13265306
  0.26734694 0.23469388 0.14081633 0.18979592 0.10408163 0.1122449
  0.25918367]] 

Epoch # 001 :: Loss :0.116
Epoch # 002 :: Loss :0.101
Epoch # 003 :: Loss :0.095
Epoch # 004 :: Loss :0.091
Epoch # 005 :: Loss :0.087
Epoch # 006 :: Loss :0.083
Epoch # 007 :: Loss :0.080
Epoch # 008 :: Loss :0.077
Epoch # 009 :: Loss :0.075
Epoch # 010 :: Loss :0.073
Epoch # 011 :: Loss :0.071
Epoch # 012 :: Loss :0.069
Epoch # 013 :: Loss :0.067

Final Weights: [[-0.66152282  1.45656353 -0.34935731  2.37602929  0.11257901 -0.95314387
   0.73893915 -5.211516    1.06946157  1.8127382   0.32178399  3.9707594
   1.0121312 ]]


In [47]:
if weights.shape[0] == 2:
    df_train = df_train[df_train.columns[:weights.shape[0]+1]]
    decision_boundary = [0] * len(df_train) 
    if df_train.shape[1] == 3:
        m = -weights[0]/weights[1]
        for i in range(len(df_train)):
            x = df_train.iloc[i][:2]
            d = float(m*x[0])
            decision_boundary.insert(i,d)
    decision_boundary = decision_boundary[:len(df_train)]        
    
    
    
    pos_df=df_train[df_train['y']==1]
    neg_df=df_train[df_train['y']==0]
    
    
    graph.scatter(pos_df['x1'],pos_df['x2'],color='blue',label='positive')
    graph.scatter(neg_df['x1'],neg_df['x2'],color='red',label='negative')
    graph.legend()
    graph.plot(df_train['x1'],decision_boundary)

In [48]:
pos_class = 1
neg_class = 0
print("TRAIN", end = " ")
check_performance(df_train,weights, pos_class, neg_class)

TRAIN METRICS
Accuracy : 0.848
Sensitivity :0.826
Specificity :0.867
Precision :0.448
FMeasure :0.581


In [49]:
print("TEST", end = " ")
check_performance(df_test,weights, pos_class, neg_class)

TEST METRICS
Accuracy : 0.850
Sensitivity :0.929
Specificity :0.781
Precision :0.510
FMeasure :0.658
