# Heart Disease Detector

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data.csv')

In [3]:
df

Unnamed: 0,bloodPreasure,Age,disProb
0,128,12,0
1,139,46,0
2,135,67,0
3,127,81,1
4,147,5,0
...,...,...,...
1019,136,51,1
1020,139,43,0
1021,147,3,1
1022,133,53,0


In [4]:
df.head()

Unnamed: 0,bloodPreasure,Age,disProb
0,128,12,0
1,139,46,0
2,135,67,0
3,127,81,1
4,147,5,0


In [5]:
df.tail()

Unnamed: 0,bloodPreasure,Age,disProb
1019,136,51,1
1020,139,43,0
1021,147,3,1
1022,133,53,0
1023,137,35,1


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1024 entries, 0 to 1023
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   bloodPreasure  1024 non-null   int64
 1   Age            1024 non-null   int64
 2   disProb        1024 non-null   int64
dtypes: int64(3)
memory usage: 24.1 KB


In [7]:
df.describe()

Unnamed: 0,bloodPreasure,Age,disProb
count,1024.0,1024.0,1024.0
mean,134.495117,50.848633,0.515625
std,8.779808,29.119223,0.5
min,120.0,1.0,0.0
25%,127.0,25.0,0.0
50%,134.0,52.0,1.0
75%,142.0,76.0,1.0
max,150.0,100.0,1.0


# Train Test Splitting

In [8]:
import numpy as np

In [9]:
def split_data(data, ratio):
    np.random.seed(42)
    shuffled = np.random.permutation(len(data))
    test_set_size = int(len(data) * ratio)
    test_indicies = shuffled[:test_set_size]
    train_indicies = shuffled[test_set_size:]
    return data.iloc[train_indicies], data.iloc[test_indicies]

In [10]:
train, test = split_data(df, 0.2)

In [11]:
train

Unnamed: 0,bloodPreasure,Age,disProb
745,140,92,0
137,133,9,1
377,134,21,0
388,147,85,0
824,132,70,0
...,...,...,...
106,148,76,0
270,146,33,0
860,141,79,0
435,129,8,0


In [12]:
test

Unnamed: 0,bloodPreasure,Age,disProb
525,137,95,1
357,136,69,1
444,138,15,1
31,120,83,1
618,133,17,1
...,...,...,...
429,126,43,0
542,137,9,0
903,141,68,0
528,129,9,0


In [13]:
X_train = train[['bloodPreasure', 'Age']].to_numpy()
X_test = test[['bloodPreasure', 'Age']].to_numpy()

In [14]:
Y_train = train[['disProb']].to_numpy().reshape(820, )
Y_test = test[['disProb']].to_numpy().reshape(204, )

In [15]:
X_train

array([[140,  92],
       [133,   9],
       [134,  21],
       ...,
       [141,  79],
       [129,   8],
       [139,  74]], dtype=int64)

In [16]:
X_test

array([[137,  95],
       [136,  69],
       [138,  15],
       [120,  83],
       [133,  17],
       [131,  67],
       [134,  47],
       [147,  66],
       [131,   1],
       [124,  20],
       [126,  24],
       [137,  61],
       [150,  74],
       [129,  10],
       [123,   7],
       [150,  49],
       [141,  27],
       [140,  13],
       [126,  94],
       [122,  86],
       [139,  97],
       [135,  61],
       [135,  48],
       [142,  36],
       [126,  56],
       [146,  10],
       [137,  89],
       [133,  47],
       [135,  99],
       [125,  15],
       [132,  13],
       [125,  22],
       [123,  29],
       [141,  23],
       [136,  58],
       [126,  28],
       [121,  26],
       [125,  40],
       [137,  22],
       [129,  40],
       [120,  52],
       [129,  55],
       [146,  11],
       [123,  29],
       [140,  68],
       [127,  53],
       [124,  96],
       [138,  93],
       [138,  81],
       [140,  77],
       [120,  24],
       [150,  84],
       [130,

In [17]:
Y_test

array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1,
       1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
       0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,
       0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 1], dtype=int64)

In [18]:
Y_train

array([0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1,
       1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
       1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1,

In [19]:
from sklearn.linear_model import LogisticRegression

In [20]:
clf = LogisticRegression()
clf.fit(X_train, Y_train)

LogisticRegression()

In [21]:
inputFeatures = [120, 45]
infProb = clf.predict_proba([inputFeatures])[0][1]


In [22]:
infProb

0.5053335645208221