# Heart Disease Predictor

##### Supervised Learning -> Regression -> Logistic Regression -> Here we predict  whether a person having heart disease or not

### Importing the libraries

In [9]:
import numpy as np # To work with array
import pandas as pd # To work with data

### Importing Sk learn Libraries / Modules

In [10]:
# This module used to split the data into training and testing.
from sklearn.model_selection import train_test_split

# This module used to build the model.
from sklearn.linear_model import LogisticRegression

# This module used for Evaluation.
from sklearn.metrics import accuracy_score

### Importing the data

In [11]:
df = pd.read_csv("Data/heartDiseaseDummyData.csv")

#### First 5 Records

In [12]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,58,0,1,128,228,1,0,157,0,0.568679,2,1,3,0
1,69,1,1,105,284,0,0,108,1,4.266273,2,0,7,0
2,76,0,3,98,252,1,1,186,0,1.460071,2,2,6,0
3,73,0,2,90,199,1,1,125,0,3.098133,1,2,6,0
4,65,0,3,114,195,1,1,105,1,0.056961,1,1,3,1


### Last 5 Records

In [13]:
df.tail()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
995,66,0,3,93,195,0,0,199,0,0.857678,2,1,6,0
996,75,1,2,133,269,0,0,155,1,4.776033,2,2,6,0
997,51,1,1,97,141,0,0,128,0,3.147192,1,1,6,1
998,65,1,3,118,180,0,0,100,0,1.000076,1,0,7,0
999,61,0,1,117,212,1,1,159,0,1.884997,1,1,3,0


### Dimention of our Data

In [14]:
df.shape

(1000, 14)

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1000 non-null   int64  
 1   sex       1000 non-null   int64  
 2   cp        1000 non-null   int64  
 3   trestbps  1000 non-null   int64  
 4   chol      1000 non-null   int64  
 5   fbs       1000 non-null   int64  
 6   restecg   1000 non-null   int64  
 7   thalach   1000 non-null   int64  
 8   exang     1000 non-null   int64  
 9   oldpeak   1000 non-null   float64
 10  slope     1000 non-null   int64  
 11  ca        1000 non-null   int64  
 12  thal      1000 non-null   int64  
 13  target    1000 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 109.5 KB


### Missing values

In [16]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

### Descriptive statistics

In [17]:
df.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,54.613,0.516,1.962,113.958,209.043,0.484,0.498,151.242,0.492,2.58208,1.506,0.977,5.283,0.285
std,14.390216,0.499994,0.816837,14.127839,51.599609,0.499994,0.500246,29.392369,0.500186,1.467204,0.500214,0.809399,1.713431,0.45164
min,30.0,0.0,1.0,90.0,120.0,0.0,0.0,100.0,0.0,0.007479,1.0,0.0,3.0,0.0
25%,42.75,0.0,1.0,102.0,165.0,0.0,0.0,126.0,0.0,1.330126,1.0,0.0,3.0,0.0
50%,55.0,1.0,2.0,113.0,208.0,0.0,0.0,153.0,0.0,2.61998,2.0,1.0,6.0,0.0
75%,67.0,1.0,3.0,126.0,254.0,1.0,1.0,177.0,1.0,3.804209,2.0,2.0,7.0,1.0
max,79.0,1.0,3.0,139.0,299.0,1.0,1.0,199.0,1.0,4.995281,2.0,2.0,7.0,1.0


- 1 $\rightarrow$ Heart Disease
- 0 $\rightarrow$ Healthy Patient

In [18]:
df['target'].value_counts()

target
0    715
1    285
Name: count, dtype: int64

### Splitting Features and Target

In [19]:
x = df.drop(columns='target', axis=1)
x

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,58,0,1,128,228,1,0,157,0,0.568679,2,1,3
1,69,1,1,105,284,0,0,108,1,4.266273,2,0,7
2,76,0,3,98,252,1,1,186,0,1.460071,2,2,6
3,73,0,2,90,199,1,1,125,0,3.098133,1,2,6
4,65,0,3,114,195,1,1,105,1,0.056961,1,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,66,0,3,93,195,0,0,199,0,0.857678,2,1,6
996,75,1,2,133,269,0,0,155,1,4.776033,2,2,6
997,51,1,1,97,141,0,0,128,0,3.147192,1,1,6
998,65,1,3,118,180,0,0,100,0,1.000076,1,0,7


In [20]:
y = df['target']
y

0      0
1      0
2      0
3      0
4      1
      ..
995    0
996    0
997    1
998    0
999    0
Name: target, Length: 1000, dtype: int64

### Training and Testing

In [21]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, stratify=y, random_state=42)

In [22]:
x.shape

(1000, 13)

In [23]:
x_train.shape

(800, 13)

In [24]:
x_test.shape

(200, 13)

In [25]:
y_train.shape

(800,)

In [26]:
y_test.shape

(200,)

### Model Building
$\rightarrow$ LogisticRegression is used to create the model where end of the it is a binary classification.


In [27]:
model = LogisticRegression()

### Train the Model

In [28]:
model.fit(x_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


###### Testing data score >> Training data score then under fit the model
###### Testing data score << Training data score then over fit the model

### Evaluation
$\rightarrow$ Accuracy score on training data

In [29]:
x_train_prediction = model.predict(x_train)
training_accuracy = accuracy_score(x_train_prediction, y_train)
print(f"The training accuracy is {training_accuracy * 100}%")


The training accuracy is 71.5%


### Evaluation
$\rightarrow$ Accuracy score on testing data

In [30]:
x_test_prediction = model.predict(x_test)
test_accuracy = accuracy_score(x_test_prediction, y_test)
print(f"The testing accuracy is {test_accuracy * 100}%")

The testing accuracy is 71.5%


### Building the Predicting system

In [31]:
# Tuple of feature data --> which is my input
input_data = (46,1,2,118,253,0,0,144,0,0.3187415348860684,1,0,6) # Having target=0

# Converting it into numpy array
input_data = np.asarray(input_data)

# Reshaping into required input form
input_data = input_data.reshape(1,-1)

# With the input i am prediction
prediction = model.predict(input_data)

if prediction[0] == 0:
    print("Patient no hart disease")
else:
    print("Patient go to hospital")

Patient no hart disease


