In [50]:
import numpy as np 
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split 
import matplotlib.pyplot as plt

#Logistic Regression
class Logistic_Regression() :
    def __init__(self,X, lr, epochs):
        self.lr = lr        
        self.epochs = epochs
        self.samples, self.features = X.shape              
        self.weights = np.zeros(self.features)        
        self.bias = 0   

    #Sigmoid function
    def sigmoid(self,z):
        return 1 / (1 + np.exp(-z))

    #Hypothesis function
    def hypothesis(self, X):
        return self.sigmoid(X.dot(self.weights) + self.bias) 
    
    #Gradient descent function
    def gradientDescent(self, hyp):
        # calculate gradients        
        dw = 1 / self.samples * np.dot(self.X.T,hyp-self.Y.T)
        db = 1 / self.samples * np.sum(hyp-self.Y.T) 
          
        #Update weight values  
        self.weights -= self.lr*dw
        self.bias -= self.lr*db
          
        return self
    
    #Training function
    def train(self, X, Y):     
        self.X = X        
        self.Y = Y
        cost_list = [] 
        for i in range( self.epochs + 1 ) : 
            #Hypothesis
            hyp = self.hypothesis(self.X)
            
            #Cost function - Cross entropy
            cost = -1 / self.samples * np.sum(self.Y * np.log(hyp) + (1-self.Y) * np.log(1 - hyp))    

            #Gradient descent
            self.gradientDescent(hyp) 

            cost_list.append(cost)

        return cost_list
    
    def predict(self, X):
        hyp = self.hypothesis(X)        
        predict = np.where(hyp > 0.5, 1, 0)        
        return predict



Get Dataset

In [24]:

path = 'D:\Repos\Intelligent-Systems-Technologies\heart.csv';
df = pd.read_csv(path)
df.head(5)


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


Check null values

In [None]:
print(df.isnull().sum())
sns.countplot(x='target', data=df)

Check correlations

In [None]:
plt.figure(figsize=(16,10))
sns.heatmap(df.corr(), annot=True)
plt.show

Define features
Split data, into 80% training and 20% testing


In [25]:
path = 'D:\Repos\Intelligent-Systems-Technologies\heart.csv';
df = pd.read_csv(path)
x = df[['sex','cp','fbs','exang','oldpeak','slope','ca','thal']]
y = df.target.values

X_train, X_test, Y_train, Y_test = train_test_split(x, y,test_size=0.20, random_state=5)

Training

In [51]:
lr=0.001
epochs = 3000
model = Logistic_Regression( X_train, lr,epochs)
cost = model.train(X_train,Y_train)

plt.plot(np.arange(epochs + 1), cost)
plt.show()


0.1
10000
242
7
     cp  thalach  exang  oldpeak  slope  ca  thal
62    3      190      0      0.0      1   0     1
127   2      172      0      0.0      2   1     2
111   2      173      0      0.2      2   1     3
287   1      164      0      0.0      2   1     2
108   1      162      0      1.1      2   0     2
..   ..      ...    ...      ...    ...  ..   ...
203   2      150      1      1.6      1   0     3
255   0      147      1      0.0      1   3     3
72    1      202      0      0.0      2   0     2
235   0      173      1      1.6      2   0     3
37    2      165      0      1.6      2   0     3

[242 rows x 7 columns]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]
Cost after epochs 0: target    0.693147
dtype: float64
     cp  thalach  exang  oldpeak  slope  ca  thal
62    3      190      0      0.0      1   0     1
127   2      172      0      0.0      2   1     2
111   2      173      0      0.2      2   1     3
287   1      164      0      0.0      2   1     2
108   1     

ValueError: Length of passed values is 242, index implies 1.

Testing

In [None]:
Y_pred = model.predict( X_test ) 
    
accuracy = 0    
for i in range( np.size( Y_pred ) ) :  
    if Y_test[i] == Y_pred[i] :            
        accuracy += 1
print(f'Accuracy :  { accuracy / len(Y_pred) * 100} ')