In [2]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import classification_report

In [3]:
data = pd.read_csv("heart.csv")

In [4]:
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [5]:
#create features and labels column
y=data['target']
x=data.drop('target',axis=1) #The axis=1 argument specifies that a column (not a row) is being removed.

In [7]:
x.head

<bound method NDFrame.head of      age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
0     63    1   3       145   233    1        0      150      0      2.3   
1     37    1   2       130   250    0        1      187      0      3.5   
2     41    0   1       130   204    0        0      172      0      1.4   
3     56    1   1       120   236    0        1      178      0      0.8   
4     57    0   0       120   354    0        1      163      1      0.6   
..   ...  ...  ..       ...   ...  ...      ...      ...    ...      ...   
298   57    0   0       140   241    0        1      123      1      0.2   
299   45    1   3       110   264    0        1      132      0      1.2   
300   68    1   0       144   193    1        1      141      0      3.4   
301   57    1   0       130   131    0        1      115      1      1.2   
302   57    0   1       130   236    0        0      174      0      0.0   

     slope  ca  thal  
0        0   0     1  
1        0 

In [12]:
'''
Split the data
We need to split the data before scaler Coz:because i need scaler to fit to training data not test data

StandardScaler standardizes features by removing the mean and scaling to unit variance. 
After scaling:
Each feature will have a mean of 0.
Each feature will have a standard deviation of 1.

Use it:Helps improve the performance of machine learning models that are sensitive to feature scales
'''
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state= 42)

#we will use a standard scaler to scale the features for preprocessing
scaler=StandardScaler()
scale=scaler.fit(X_train) #Calculates the mean and standard deviation for each feature in the training set.
X_train=scale.transform(X_train) #Applies the scaling (standardization) to the training data using the mean and standard deviation computed in Previous step
X_test=scale.transform(X_test) #Ensures the testing data is scaled using the same parameters (mean and standard deviation) as the training data.

In [13]:
#creating Logistic Regression Model
model = LogisticRegression() 
model.fit(X_train,y_train)
pred = model.predict(X_test)

In [14]:
pred

array([0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0,
       0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0], dtype=int64)

In [16]:
score =  accuracy_score(y_test,pred)
score

0.8524590163934426

In [17]:
#creating a confusion matrix
confusion_matrix(y_test,pred)

array([[25,  4],
       [ 5, 27]], dtype=int64)

In [18]:
#extracting TN, TP, FP, FN
tn, fp, fn, tp = confusion_matrix (y_test, pred).ravel() 
(tn, fp, fn, tp)

(25, 4, 5, 27)

In [21]:
#Confusion Matrix Report
matrix = classification_report(y_test, pred)
print("classification report: \n",matrix)

classification report: 
               precision    recall  f1-score   support

           0       0.83      0.86      0.85        29
           1       0.87      0.84      0.86        32

    accuracy                           0.85        61
   macro avg       0.85      0.85      0.85        61
weighted avg       0.85      0.85      0.85        61

