# Analyzing Cardiac Data and creating a model that can predict based on the cardiac data of an individual that if the person has a heart disease or not

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
import plotly.express as px

In [2]:
sns.set_theme(color_codes=True)
sns.set_style('white')
%matplotlib inline
init_notebook_mode(connected=True)
cf.go_offline()

In [3]:
df = pd.read_csv('heart.csv')

In [4]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [5]:
df['target'].value_counts()

1    165
0    138
Name: target, dtype: int64

In [None]:
df.corr()['target'][:-1].sort_values().iplot(kind='bar')

In [None]:
df.corr().iplot(kind='bar')

In [None]:
sns.heatmap(df.isnull())

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['target'].iplot(kind='hist')

In [None]:
sns.countplot(data=df, x='target')

In [None]:
pp_df = df[['age','trestbps','chol','thalach','target']]

In [None]:
sns.pairplot(data=pp_df, hue='target')

In [None]:
plt.figure(figsize=(20,8))
sns.heatmap(df.corr(),annot=True,cmap='viridis')

In [None]:
X = df.drop('target',axis=1)

In [None]:
y = df['target']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=101)

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

In [None]:
X_train = scaler.fit_transform(X_train)

In [None]:
X_test = scaler.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
#help(LogisticRegression)

In [None]:
base_model = LogisticRegression(max_iter=5000)

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
penalty = ['l1','l2','elasticnet']
l1_ratio = np.linspace(0,1,10)
C = [100, 10, 1.0, 0.1, 0.01]

param_grid = {'penalty':penalty, 'l1_ratio':l1_ratio, 'C':C}

In [None]:
grid_model = GridSearchCV(base_model,param_grid,cv=5,verbose=1)

In [None]:
grid_model.fit(X_train, y_train)

In [None]:
grid_model.best_params_

In [None]:
grid_pred = grid_model.predict(X_test)

In [None]:
from sklearn.metrics import classification_report,plot_confusion_matrix,plot_roc_curve,confusion_matrix

In [None]:
print(classification_report(y_test,grid_pred))

In [None]:
plot_confusion_matrix(grid_model,X_test,y_test,normalize='all')

In [None]:
confusion_matrix(y_test,grid_pred)

In [None]:
plot_roc_curve(grid_model,X_test,y_test)

In [None]:
grid_model.predict_proba(X_test)

In [None]:
patient = [[54.,1.,0.,122.,286.,0.,0.,116.,1.,3.2,1.,2.,2.]]

In [None]:
grid_model.predict(patient)

In [None]:
grid_model.predict_proba(patient)