In [None]:
import io
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import metrics
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.figure_factory as ff
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import log_loss,roc_auc_score,precision_score,f1_score,recall_score,roc_curve,auc
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score,fbeta_score,matthews_corrcoef

# Load the dataset
# Importing the dataset
from google.colab import files
df = files.upload()

In [None]:
for fn in df.keys():
  print('User upload file "{name}" with {length} bytes'.format(name=fn, length=len(df[fn])))

In [None]:
df=pd.read_csv("heart_disease_uci.csv")
df.head()

In [None]:
df.dropna(inplace = True)
from sklearn.utils import shuffle
data = shuffle(df)
data.info()

In [None]:
fig = go.Figure()
fig.add_trace(go.Box(y=data['age'].values , name='Age', marker_color = 'green',boxmean=True))
fig.add_trace(go.Box(y=data[data['sex']=='Male']['age'].values, name ='Male only', marker_color = 'blue', boxmean = True))
fig.add_trace(go.Box(y=data[data['sex']=='Female']['age'].values, name ='Female only', marker_color = 'red', boxmean = True))
fig.update_layout(title = 'Age Distribution(all)', yaxis_title = 'Age', title_x = 0.5)
fig.update_xaxes(showline=True, linewidth=2, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=2, linecolor='black', mirror=True)
fig.show()

In [None]:
group_labels = ['Age Distribution'] # name of the dataset
fig = ff.create_distplot([data.age], group_labels)
fig.update_layout(title = 'Age Distribution(all)', yaxis_title = 'propotion', xaxis_title = 'Age', title_x = 0.5)
fig.show()

In [None]:
gdf=df['sex'].value_counts().reset_index().rename(columns={'index':'sex','sex':'count'})
fig = go.Figure([go.Pie(labels=['Male', 'Female'],values=gdf['count'], hole = 0.5)])
fig.update_traces(hoverinfo='label+percent', textinfo='value+percent', textfont_size=15,insidetextorientation='radial')
fig.update_layout(title="Male to Female ratio in the study",title_x=0.5)
fig.show()

In [None]:
gdf=df['restecg'].value_counts().reset_index().rename(columns={'index':'restecg','restecg':'count'})
fig = go.Figure([go.Pie(labels=gdf['restecg'],values=gdf['count'], hole = 0.5)])
fig.update_traces(hoverinfo='label+percent', textinfo='value+percent', textfont_size=15,insidetextorientation='radial')
fig.update_layout(title="ECG in Rest",title_x=0.5)
fig.show()

In [None]:
gdf=df['thal'].value_counts().reset_index().rename(columns={'index':'thal','thal':'count'})
fig = go.Figure([go.Pie(labels=gdf['thal'],values=gdf['count'], hole = 0.5)])
fig.update_traces(hoverinfo='label+percent', textinfo='value+percent', textfont_size=15,insidetextorientation='radial')
fig.update_layout(title="maximum heart rate achieved",title_x=0.5)
fig.show()

In [None]:
gdf=df['cp'].value_counts().reset_index().rename(columns={'index':'cp','cp':'count'})
fig = go.Figure([go.Pie(labels=gdf['cp'],values=gdf['count'], hole = 0.5)])
fig.update_traces(hoverinfo='label+percent', textinfo='value+percent', textfont_size=15,insidetextorientation='radial')
fig.update_layout(title="Chest Pain Conditions",title_x=0.5)
fig.show()

In [None]:
df.dropna(inplace = True)
from sklearn.utils import shuffle
data = shuffle(df)
data.info()

In [None]:
df.sample(5)

In [None]:
df.isna().sum()

In [None]:
df.info()

In [None]:
df.duplicated().sum()

In [None]:
#dropping null values
df.dropna(inplace=True)
df.isnull().sum()

In [None]:
df.describe().transpose()

In [None]:
df.sex.value_counts()

In [None]:
df['sex']=df['sex'].apply(lambda x:0 if x=='Male' else 1)
df['sex'].value_counts()

In [None]:
df['dataset'].value_counts()

In [None]:
df['dataset']=df['dataset'].apply(lambda x:0 if x== 'Cleveland' else 1 if x=='Hungary'else 2)
df['dataset'].value_counts()

In [None]:
df['cp'].value_counts()

In [None]:
df['cp']=df['cp'].apply(lambda x:0 if x== 'typical angina' else 1 if x=='asymptomatic'else 2 if x=='non-anginal'else 3)
df['cp'].value_counts()

In [None]:
df['fbs'].value_counts()

In [None]:
df['fbs']=pd.get_dummies(df['fbs'],drop_first=True)

In [None]:
df['exang'].value_counts()

In [None]:
df['exang']=pd.get_dummies(df['exang'],drop_first=True)

In [None]:
df['restecg'].value_counts()

In [None]:
df['restecg']=df['restecg'].apply(lambda x:0 if x== 'lv hypertrophy' else 1 if x=='normal'else 2)
df['restecg'].value_counts()

In [None]:
df['slope'].value_counts()

In [None]:
df['slope']=df['slope'].apply(lambda x:0 if x== 'downsloping' else 1 if x=='flat'else 2)
df['slope'].value_counts()

In [None]:
df['thal'].value_counts()

In [None]:
df['thal']=df['thal'].apply(lambda x:0 if x== 'fixed defect' else 1 if x=='normal'else 2)
df['thal'].value_counts()

In [None]:
# heatmap
corr = df.corr()
plt.figure(figsize=(14,14))
sns.heatmap(corr, annot=True, fmt= '.2f',annot_kws={'size': 15}, cmap= 'coolwarm')
plt.show()
print(corr)

In [None]:
X = df.drop(['num'],axis=1)
y = df['num']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2,shuffle=True, random_state=5)

In [None]:
# 1. Accelerated Gradient Methods
model_accelerated = LogisticRegression(solver='saga', max_iter=10000, penalty='elasticnet', l1_ratio=0.5)
model_accelerated.fit(X_train, y_train)
y_pred_accelerated = model_accelerated.predict(X_test)
print("Test Accuracy of Accelerated Gradient Methods: {}%".format(round(model_accelerated.score(X_test,y_test)*100,2)))
AG = model_accelerated.score(X_test,y_test)*100

# 2. Newton Methods
model_newton = LogisticRegression(solver='newton-cg', max_iter=10000)
model_newton.fit(X_train, y_train)
y_pred_newton = model_newton.predict(X_test)
print("Test Accuracy of Newton Methods: {}%".format(round(model_newton.score(X_test,y_test)*100,2)))
NM = model_newton.score(X_test,y_test)*100

# 3. Descent Methods
model_descent = SGDClassifier(loss='log', max_iter=10000)
model_descent.fit(X_train, y_train)
y_pred_descent = model_descent.predict(X_test)
print("Test Accuracy of Descent Methods: {}%".format(round(model_descent.score(X_test,y_test)*100,2)))
DM = model_descent.score(X_test,y_test)*100

In [None]:
import matplotlib.pyplot as plt
fig = plt.figure()

ax = fig.add_axes([0,0,1,1])
ax.set_title('Test Accuracy Difference')
ax.set_ylabel('Scores')
algos = ['Accelerated Gradient', 'Newton Methods', 'Descent Methods']
Accurarcy = [AG,NM,DM]
acc = ax.bar(algos,Accurarcy)

for rect in acc:
  height = rect.get_height()
  ax.text(rect.get_x() + rect.get_width()/2., 0.99*height,
            '%d' % int(height) + "%", ha='center', va='bottom',color='g')
plt.show()