In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt
import plotly.express as px
import plotly.graph_objs as go
import warnings
warnings.filterwarnings("ignore")


In [None]:
df=pd.read_csv('../input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv')

In [None]:
df.head(8)

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
features=df.columns
for feature in features:
     print(f'{feature}--->{df[feature].nunique()}')

In [None]:
df.describe().T.sort_values(ascending =0,by='mean').style.background_gradient(cmap='BuGn').bar(subset=['std'], color='red').bar(subset=['mean'], color='blue')

In [None]:
df.corr().style.background_gradient(cmap='viridis')

In [None]:
pd.crosstab(df['age'], df['DEATH_EVENT']).plot(kind='bar')

In [None]:
df.columns

In [None]:
f, ax = plt.subplots(nrows=2, ncols=3, figsize=(15, 15))

#fig, axes = plt.subplots((2, 2),figsize=(15, 15))

#create chart in each subplot
#sns.boxplot(data=df, x='age', y='DEATH_EVENT', ax=ax[0,0])
sns.countplot(x='anaemia',data=df,  ax=ax[0,0])
#sns.countplot(x='creatinine_phosphokinase',data=df,  ax=ax[1,0])
sns.countplot(x='diabetes',data=df,  ax=ax[1,0])
sns.countplot(x='ejection_fraction',data=df,  ax=ax[0,1])
sns.countplot(x='high_blood_pressure',data=df,  ax=ax[1,1])
#sns.countplot(x='platelets',data=df,  ax=ax[2,1])
#sns.countplot(x='serum_creatinine',data=df,  ax=ax[3,1])
#sns.countplot(x='serum_sodium',data=df,  ax=ax[0,2])
sns.countplot(x='sex',data=df,  ax=ax[0,2])
sns.countplot(x='smoking',data=df,  ax=ax[1,2])
#sns.countplot(x='time',data=df,  ax=ax[3,2])


In [None]:


fig = px.box(df, y="age", x="smoking", color="DEATH_EVENT", points="all",  hover_data=df.columns)
fig.update_layout(title_text="Death depndance on diabetes and smoking")
fig.show()

In [None]:
sns.distplot(df['age'])
plt.show()

In [None]:
#df.groupby('age')[features].mean()

In [None]:
true_anaemia = df[df["sex"]==1]
false_anaemia = df[df["sex"]==0]

true_anaemia_nodeath = true_anaemia[df["DEATH_EVENT"]==0]
true_anaemia_death = true_anaemia[df["DEATH_EVENT"]==1]
false_anaemia_nodeath = false_anaemia[df["DEATH_EVENT"]==0]
false_anaemia_death = false_anaemia[df["DEATH_EVENT"]==1]

labels = ['true_anaemia_nodeath','true_anaemia_death', 'false_anaemia_nodeath', 'false_anaemia_death']
values = [len(true_anaemia[df["DEATH_EVENT"]==0]),len(true_anaemia[df["DEATH_EVENT"]==1]),
         len(false_anaemia[df["DEATH_EVENT"]==0]),len(false_anaemia[df["DEATH_EVENT"]==1])]
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.4)])
fig.update_layout(
    title_text="Anaemia analysis")
fig.show()

In [None]:
true_smoking = df[df["sex"]==1]
false_smoking = df[df["sex"]==0]

true_smoking_nodeath = true_smoking[df["DEATH_EVENT"]==0]
true_smoking_death = true_smoking[df["DEATH_EVENT"]==1]
false_smoking_nodeath = false_smoking[df["DEATH_EVENT"]==0]
false_smoking_death = false_smoking[df["DEATH_EVENT"]==1]

labels = ['true_smoking_nodeath','true_smoking_death', 'false_smoking_nodeath', 'false_smoking_death']
values = [len(true_smoking[df["DEATH_EVENT"]==0]),len(true_smoking[df["DEATH_EVENT"]==1]),
         len(false_smoking[df["DEATH_EVENT"]==0]),len(false_smoking[df["DEATH_EVENT"]==1])]
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.4)])
fig.update_layout(
    title_text="smoking analysis")
fig.show()

In [None]:
df.time.mean(),df.time.std()

In [None]:
upperlimit =df.time.mean()+df.time.std()
lowerlimit =df.time.mean()-df.time.std()

In [None]:
df.columns

In [None]:
cols_to_scale = ['age',  'creatinine_phosphokinase', 
       'ejection_fraction', 'platelets',
       'serum_creatinine', 'serum_sodium', 'time']
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df[cols_to_scale] = scaler.fit_transform(df[cols_to_scale])

In [None]:
X=df.drop(['DEATH_EVENT'],axis='columns')
y=df['DEATH_EVENT']

In [None]:
X.shape

In [None]:
y.shape

In [None]:
from imblearn.over_sampling import SMOTE
smote = SMOTE(sampling_strategy='minority')
X_sm, y_sm = smote.fit_resample(X, y)

y_sm.value_counts()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_sm, y_sm,test_size=0.2,random_state=5)

# Machine learing analysis

In [None]:
from sklearn.model_selection import ShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB


model_params = {
             
    
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    },
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }  
    },
   'naive_bayes_gaussian': {
        'model': GaussianNB(),
        'params': {}
    },
    'naive_bayes_multinomial': {
        'model': MultinomialNB(),
        'params': {}
    },
    'decision_tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'criterion': ['gini','entropy'],
        }   
    },
    
       'XGB_Classifier':{
        'model':XGBClassifier(),
        'params':{
            'base_score':[0.5]
            
        }
    } 
        
     
}


In [None]:
from sklearn.model_selection import GridSearchCV
scores = []
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
for model_name, mp in model_params.items():
    clf =  GridSearchCV(mp['model'], mp['params'], cv=cv, return_train_score=False)
    clf.fit(X,y)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

In [None]:
model=svm.SVC()
model.fit(X_train, y_train)


In [None]:
model.score( X_test, y_test)

In [None]:
y_predicted = model.predict(X_test)

In [None]:
y_predicted[:5]

In [None]:
y_test[:5]

In [None]:
from sklearn.metrics import confusion_matrix , classification_report

print(classification_report(y_test,y_predicted))

In [None]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test,y_predicted)
print(f'{mean_squared_error}: {mse}')

In [None]:
y_predicted = model.predict(X_test)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_predicted)
plt.figure(figsize = (10,7))
sns.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')

# Deep learing analysis

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
mode = keras.Sequential([
    keras.layers.Dense(64, input_dim=12, activation='relu'),
    keras.layers.Dropout(0.5),#Dropout Layer
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dropout(0.5),#Dropout Layer
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dropout(0.5),#Dropout Layer
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dropout(0.5),#Dropout Layer
    keras.layers.Dense(16, activation='relu'),
    keras.layers.Dropout(0.5),#Dropout Layer
    keras.layers.Dense(1, activation='sigmoid')
])

mode.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history =mode.fit(X_train, y_train, epochs=100,verbose=2 ,batch_size=4)

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
#plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['accuracy'])
#plt.plot(history.history['val_loss'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
mode.evaluate(X_test, y_test)

In [None]:
y_predicated=mode.predict(X_test).reshape(-1)


In [None]:
y_predicated=np.round(y_predicated)
#y_predicated

In [None]:
print(classification_report(y_test, y_predicated))

In [None]:
y_predicated = model.predict(X_test)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_predicated)
plt.figure(figsize = (7,7))
sns.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')

In [None]:
xp = model.predict(X_train)

In [None]:
Y_pre = np.append(xp, y_predicated)
model_resul = pd.DataFrame(Y_pre)


In [None]:
model_resul.plot.hist(figsize=(7,6))

plt.ylabel('cases_number')

In [None]:
def predict_death(anaemia,high_blood_pressure,serum_creatinine,serum_sodium,smoking):    
    anaemia_index = X.columns.get_loc(anaemia)
    
    return mode.predict([x])[0]

death = predict_death(1, 1, 1.9, 137, 1)

In [None]:
print(death) 