# Importing 

In [1]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 

# Data collection and processing

In [2]:
heart_data = pd.read_csv('dataset_heart.csv')

In [3]:
heart_data.head()

Unnamed: 0,age,sex,chest pain type,resting blood pressure,serum cholestoral,fasting blood sugar,resting electrocardiographic results,max heart rate,exercise induced angina,oldpeak,ST segment,major vessels,thal,heart disease
0,70,1,4,130,322,0,2,109,0,2.4,2,3,3,2
1,67,0,3,115,564,0,2,160,0,1.6,2,0,7,1
2,57,1,2,124,261,0,0,141,0,0.3,1,0,7,2
3,64,1,4,128,263,0,0,105,1,0.2,2,1,7,1
4,74,0,2,120,269,0,2,121,1,0.2,1,1,3,1


In [4]:
heart_data.tail()

Unnamed: 0,age,sex,chest pain type,resting blood pressure,serum cholestoral,fasting blood sugar,resting electrocardiographic results,max heart rate,exercise induced angina,oldpeak,ST segment,major vessels,thal,heart disease
265,52,1,3,172,199,1,0,162,0,0.5,1,0,7,1
266,44,1,2,120,263,0,0,173,0,0.0,1,0,7,1
267,56,0,2,140,294,0,2,153,0,1.3,2,0,3,1
268,57,1,4,140,192,0,0,148,0,0.4,2,0,6,1
269,67,1,4,160,286,0,2,108,1,1.5,2,3,3,2


In [5]:
heart_data.shape

(270, 14)

# Data Analyzing

In [6]:
heart_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 270 entries, 0 to 269
Data columns (total 14 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   age                                   270 non-null    int64  
 1   sex                                   270 non-null    int64  
 2   chest pain type                       270 non-null    int64  
 3   resting blood pressure                270 non-null    int64  
 4   serum cholestoral                     270 non-null    int64  
 5   fasting blood sugar                   270 non-null    int64  
 6   resting electrocardiographic results  270 non-null    int64  
 7   max heart rate                        270 non-null    int64  
 8   exercise induced angina               270 non-null    int64  
 9   oldpeak                               270 non-null    float64
 10  ST segment                            270 non-null    int64  
 11  major vessels      

In [7]:
heart_data.isnull().sum()

age                                     0
sex                                     0
chest pain type                         0
resting blood pressure                  0
serum cholestoral                       0
fasting blood sugar                     0
resting electrocardiographic results    0
max heart rate                          0
exercise induced angina                 0
oldpeak                                 0
ST segment                              0
major vessels                           0
thal                                    0
heart disease                           0
dtype: int64

In [8]:
heart_data.describe()

Unnamed: 0,age,sex,chest pain type,resting blood pressure,serum cholestoral,fasting blood sugar,resting electrocardiographic results,max heart rate,exercise induced angina,oldpeak,ST segment,major vessels,thal,heart disease
count,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0
mean,54.433333,0.677778,3.174074,131.344444,249.659259,0.148148,1.022222,149.677778,0.32963,1.05,1.585185,0.67037,4.696296,1.444444
std,9.109067,0.468195,0.95009,17.861608,51.686237,0.355906,0.997891,23.165717,0.470952,1.14521,0.61439,0.943896,1.940659,0.497827
min,29.0,0.0,1.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,1.0,0.0,3.0,1.0
25%,48.0,0.0,3.0,120.0,213.0,0.0,0.0,133.0,0.0,0.0,1.0,0.0,3.0,1.0
50%,55.0,1.0,3.0,130.0,245.0,0.0,2.0,153.5,0.0,0.8,2.0,0.0,3.0,1.0
75%,61.0,1.0,4.0,140.0,280.0,0.0,2.0,166.0,1.0,1.6,2.0,1.0,7.0,2.0
max,77.0,1.0,4.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,3.0,3.0,7.0,2.0


In [9]:
heart_data['heart disease'].value_counts()

1    150
2    120
Name: heart disease, dtype: int64

In [10]:
X = heart_data.drop(columns = 'heart disease', axis = 1)
Y = heart_data['heart disease']

In [11]:
print(X)

     age  sex   chest pain type  resting blood pressure  serum cholestoral  \
0     70     1                4                     130                322   
1     67     0                3                     115                564   
2     57     1                2                     124                261   
3     64     1                4                     128                263   
4     74     0                2                     120                269   
..   ...   ...              ...                     ...                ...   
265   52     1                3                     172                199   
266   44     1                2                     120                263   
267   56     0                2                     140                294   
268   57     1                4                     140                192   
269   67     1                4                     160                286   

     fasting blood sugar  resting electrocardiographic results 

In [12]:
print(Y)

0      2
1      1
2      2
3      1
4      1
      ..
265    1
266    1
267    1
268    1
269    2
Name: heart disease, Length: 270, dtype: int64


In [13]:
# Split data into a train data and test data
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size= 0.2, stratify= Y, random_state = 2)

In [14]:
print(X.shape, x_train.shape, x_test.shape)

(270, 13) (216, 13) (54, 13)


# Train the LogisticRegression model

In [15]:
model = LogisticRegression()

In [16]:
model.fit(x_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [17]:
# Model Evaluation
x_train_prediction = model.predict(x_train)
train_data_accuracy = accuracy_score(x_train_prediction, y_train)

In [18]:
print('accuracy on training data = ', train_data_accuracy)

accuracy on training data =  0.8796296296296297


In [19]:
x_test_prediction = model.predict(x_test)
test_data_accuracy = accuracy_score(x_test_prediction, y_test)

In [20]:
print('accuracy on testing data = ', test_data_accuracy)

accuracy on testing data =  0.8333333333333334


# Build a prediction system

In [21]:
# Target is = 1, that means Helthy Heart 
# Target is = 2, that means Defective Heart
input_data = (45,0,2,130,234,0,2,175,0,0.6,2,0,3) # Expected output is = 1
input_data_as_numpy_array = np.array(input_data)
input_data_reshape = input_data_as_numpy_array.reshape(1, -1)
prediction = model.predict(input_data_reshape)

if(prediction[0] == 1):
    print('The Person does not have a Heart Disease')
else:
    print('The Person has a Heart Disease')

The Person does not have a Heart Disease


