# Importing the Dependencies

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# Data Collection and Preprocessing

In [2]:
df=pd.read_csv("heart1.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
df.shape

(303, 14)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [5]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [6]:
#checking the distribution of target variables

In [7]:
df['target'].value_counts()

1    165
0    138
Name: target, dtype: int64

# Data Separation class and labels 

1---> Defective Heart
0---> Healthy Heart

In [8]:
x=df.drop(columns='target',axis=1)
y=df['target']

In [9]:
x

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3


In [10]:
y

0      1
1      1
2      1
3      1
4      1
      ..
298    0
299    0
300    0
301    0
302    0
Name: target, Length: 303, dtype: int64

# Splitting the training and testing data

In [11]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=2)

In [12]:
print(x.shape,x_train.shape,x_test.shape)

(303, 13) (242, 13) (61, 13)


# Model Training

In [13]:
lr=LogisticRegression()
lr.fit(x_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()

# Model Evaluation

In [14]:
#evaluating training data
x_training_data=lr.predict(x_train)
training_accuracy_data=metrics.accuracy_score(y_train,x_training_data)
print("Accuracy score of training dataset:",training_accuracy_data)

Accuracy score of training dataset: 0.8305785123966942


In [15]:
#evaluating testing data
x_testing_data=lr.predict(x_test)
testing_accuracy_data=metrics.accuracy_score(y_test,x_testing_data)
print("Accuracy score of training dataset:",testing_accuracy_data)

Accuracy score of training dataset: 0.9016393442622951


# Model Making

    Building a predictive system

In [16]:
input_data=(56,1,1,120,236,0,1,178,0,0.8,2,0,2)
input_data_as_numpy_array=np.array(input_data)
input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)
prediction=lr.predict(input_data_reshaped)
print(prediction)
if (prediction[0]==1):
    print("The person has Defective Heart")
else:
    print("The person has Healthy Heart")

[1]
The person has Defective Heart




# Saving the trained model

In [17]:
import pickle

In [18]:
filename="Heartdisease_model.sav"
pickle.dump(lr,open(filename,'wb'))

In [20]:
#loading the saved model
loaded_model=pickle.load(open("Heartdisease_model.sav",'rb'))

In [21]:
input_data=(56,1,1,120,236,0,1,178,0,0.8,2,0,2)
input_data_as_numpy_array=np.array(input_data)
input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)
prediction=loaded_model.predict(input_data_reshaped)
print(prediction)
if (prediction[0]==1):
    print("The person has Defective Heart")
else:
    print("The person has Healthy Heart")

[1]
The person has Defective Heart


