In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline


In [2]:
data=pd.read_csv('heart.csv')
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [4]:
#all are int values
#checking fn to confirm about nan values
data.isnull().values.any()

False

In [5]:
np.random.seed(42)
data=data.sample(frac=1)

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 303 entries, 179 to 102
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 35.5 KB


In [7]:
data=data.reset_index()
data.head()

Unnamed: 0,index,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,179,57,1,0,150,276,0,0,112,1,0.6,1,1,1,0
1,228,59,1,3,170,288,0,0,159,0,0.2,1,0,3,0
2,111,57,1,2,150,126,1,1,173,0,0.2,2,1,3,1
3,246,56,0,0,134,409,0,0,150,1,1.9,1,2,3,0
4,60,71,0,2,110,265,1,0,130,0,0.0,2,1,2,1


In [8]:
data=data.drop('index',axis=1)


In [9]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [10]:
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,57,1,0,150,276,0,0,112,1,0.6,1,1,1,0
1,59,1,3,170,288,0,0,159,0,0.2,1,0,3,0
2,57,1,2,150,126,1,1,173,0,0.2,2,1,3,1
3,56,0,0,134,409,0,0,150,1,1.9,1,2,3,0
4,71,0,2,110,265,1,0,130,0,0.0,2,1,2,1


# converting data in np arrays

In [11]:
data_array=np.array(data)

In [12]:
data.shape

(303, 14)

In [13]:
X=data_array[:,0:13]

In [14]:
X.shape

(303, 13)

In [15]:
Y=data_array[:,13:]
print(Y.shape)

(303, 1)


In [16]:
X_test=np.append(X,np.ones((303,1)),axis=1)

In [17]:
print(X_test.shape)
print(X_test[0,0:14])

(303, 14)
[ 57.    1.    0.  150.  276.    0.    0.  112.    1.    0.6   1.    1.
   1.    1. ]


# standardising data

In [18]:
def standardise(num):
    mean_data=np.mean(num,axis=0)
    std_data=np.std(num,axis=0)
    D=(num-mean_data)/std_data
    
    print(mean_data)
    print(std_data)
    
    return D


In [19]:
temp=standardise(X)

[5.43663366e+01 6.83168317e-01 9.66996700e-01 1.31623762e+02
 2.46264026e+02 1.48514851e-01 5.28052805e-01 1.49646865e+02
 3.26732673e-01 1.03960396e+00 1.39933993e+00 7.29372937e-01
 2.31353135e+00]
[ 9.06710164  0.46524119  1.03034803 17.50917807 51.74515101  0.3556096
  0.52499112 22.86733258  0.46901859  1.15915747  0.61520843  1.0209175
  0.61126531]


In [20]:
standardise(temp)

[ 5.56943564e-17 -1.64151787e-16 -5.27630744e-17 -6.74194840e-16
 -9.81979441e-17 -3.22441010e-17  1.64151787e-16 -5.97981510e-16
 -8.20758936e-17  1.64151787e-16 -1.28976404e-16  2.34502553e-17
 -2.13983580e-16]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


array([[ 0.29046364,  0.68100522, -0.93851463, ..., -0.64911323,
         0.26508221, -2.14887271],
       [ 0.5110413 ,  0.68100522,  1.97312292, ..., -0.64911323,
        -0.71442887,  1.12302895],
       [ 0.29046364,  0.68100522,  1.00257707, ...,  0.97635214,
         0.26508221,  1.12302895],
       ...,
       [ 1.61392956,  0.68100522,  1.97312292, ..., -0.64911323,
         0.26508221, -0.51292188],
       [-0.92271345,  0.68100522, -0.93851463, ...,  0.97635214,
        -0.71442887,  1.12302895],
       [ 0.9521966 , -1.46841752,  0.03203122, ...,  0.97635214,
         1.24459328, -0.51292188]])

In [21]:
X_test=np.append(temp,np.ones((303,1)),axis=1)
print(X_test.shape)

(303, 14)


In [22]:
print(temp[0:2])

[[ 0.29046364  0.68100522 -0.93851463  1.04952029  0.57466203 -0.41763453
  -1.00583187 -1.6463164   1.43548113 -0.37924438 -0.64911323  0.26508221
  -2.14887271]
 [ 0.5110413   0.68100522  1.97312292  2.19177836  0.80656782 -0.41763453
  -1.00583187  0.40901733 -0.69663055 -0.7243226  -0.64911323 -0.71442887
   1.12302895]]


# performing logistic regression

In [23]:
X_test=X_test.T

In [24]:
print(X_test.shape)

(14, 303)


In [25]:
Y=Y.T

In [26]:
print(Y.shape)

(1, 303)


In [27]:
W=np.ones((1,14))
print(W.shape)

(1, 14)


In [28]:
#bias=np.ones((1,303))
bias=1

In [29]:
def z_function(w,x,b):
    z=np.dot(w,x)+b
    
    return z

In [30]:
def activate(value):
    A=1/(1+np.exp(-value))
    
    return A

In [31]:
activate(z_function(W,X_test,bias)).shape

(1, 303)

In [32]:
def loss(y,y_hat):
    l=-((y*(np.log(y_hat+0.000001)))+((1-y)*(np.log(1-y_hat+0.000001))))
    l=np.sum(l)/Y.shape[1]
    return l

In [33]:
tempor_235=z_function(W,X_test,bias)
tempor_678=activate(tempor_235)
loss(Y,tempor_678)

2.2571602855816133

In [34]:
def derivatives(y,y_hat,z,x):
    helper=((y/(y_hat+0.000001))+((1-y)/(1-y_hat)))*y_hat*(y_hat-1+0.000001)
    helper=-(helper)/y.shape[1]
    
    temp_dict={}
    for i in range(0,14):
        temp_dict['dw'+str(i)]=np.sum(helper*x[i])
        
        #temp_list.append(dw+str(i))
        
    db=np.sum(helper)
    
    temp_dict['der_b']=db
    
    return temp_dict
        
    

In [35]:
tempor_393=derivatives(Y,tempor_678,tempor_235,X_test)
print(tempor_393)

{'dw0': -0.0007380261789565797, 'dw1': -0.03803217339156757, 'dw2': -0.11633978603322859, 'dw3': 0.0020625411710725114, 'dw4': -0.0025796239969741314, 'dw5': -0.037504267168443184, 'dw6': -0.03573346158859188, 'dw7': -0.06506852159421524, 'dw8': 0.07878659026384313, 'dw9': 0.10033907053669935, 'dw10': -0.10906232820825967, 'dw11': 0.06050646684173246, 'dw12': 0.05076823183367378, 'dw13': 0.5792879253530728, 'der_b': 0.5792879253530728}


In [36]:
def gradient_descent(w,x,b,y):
    Z=z_function(w,x,b)
    Y_hat=activate(Z)
    cost=loss(y,Y_hat)
    
    ders=derivatives(y,Y_hat,Z,x)
    
    dB=ders['der_b']
    dW=[]
    
    for i in range(0,14):
        dW.append(ders['dw'+str(i)])
    
    dW=np.array(dW,ndmin=2)
    alpha=0.0001
    
    w=w-(alpha*dW)
    b=b-(alpha*dB)
    
    update_param={
        'W_s':w,
        'b_s':b,
        'c_s':cost
    }
    
    return update_param
    
    

In [37]:
def logit_regressor(w,x,b,y):
    
    tempor_list=[]
    
    for i in range(10001):
        
        grad=gradient_descent(w,x,b,y)
        
        w=grad['W_s']
        b=grad['b_s']
        cost_present=grad['c_s']
        
        if i%1000==0:
            print(cost_present)
            tempor_list.append(cost_present)
            print(w)
            
    cache={
        'cost_list':tempor_list,
        'final_weights':w,
        'bias_value':b
    }
    
    return cache

In [38]:
code=logit_regressor(W,X_test,bias,Y)

2.2571602855816133
[[1.00000007 1.0000038  1.00001163 0.99999979 1.00000026 1.00000375
  1.00000357 1.00000651 0.99999212 0.99998997 1.00001091 0.99999395
  0.99999492 0.99994207]]
2.2301614010475337
[[1.00014038 1.00390671 1.01145075 0.99983182 1.00022915 1.00374972
  1.00350377 1.00628639 0.99228065 0.99009781 1.01075479 0.99408595
  0.99502528 0.94200153]]
2.2049545787984473
[[1.00041816 1.00800328 1.02249515 0.99974266 1.00040218 1.00748871
  1.00685527 1.01211088 0.98491026 0.98049854 1.02116626 0.98846818
  0.99027836 0.88404148]]
2.1815772786000074
[[1.00084005 1.01228522 1.03313694 0.9997353  1.00052265 1.01122342
  1.01005454 1.01747699 0.97789151 0.97120689 1.03123321 0.98314812
  0.98576685 0.82606737]]
2.1600647110033884
[[1.00141223 1.01674421 1.04336849 0.9998123  1.00059388 1.01495633
  1.01309844 1.02238163 0.97123481 0.9622379  1.04094339 0.97813294
  0.98150318 0.76808409]]
2.140449771915611
[[1.00214047 1.02137185 1.05318241 0.99997594 1.00061917 1.01868972
  1.01598

In [39]:
final_W=code['final_weights']

In [40]:
print(final_W)

[[1.00828853 1.04674301 1.09574915 1.00217191 1.00016566 1.03743266
  1.02798122 1.04193586 0.93942826 0.91608051 1.09101493 0.95483087
  0.96177255 0.42018964]]


In [41]:
final_bias=code['bias_value']
print(final_bias)

0.4201896353970752


In [42]:
tempor3434=z_function(final_W,X_test,final_bias)

In [43]:
Y_hat=activate(tempor3434)

In [44]:
print(Y_hat.shape)

(1, 303)


In [48]:
prob_list=[]
new_Y=[]
for i in range(303):
    if Y_hat[0][i]<0.5:
        prob_list.append(0)
    else:
        prob_list.append(1)
        
    new_Y.append(int(Y[0][i]))
        
print(len(prob_list))
print(len(new_Y))

303
303


In [50]:
temp=0
for i in range(303):
    if prob_list[i]!=new_Y[i]:
        temp=temp+1
               
    

In [52]:
print(temp/303)

0.5841584158415841


## hence 58% accuracy