# Simple Machine Learning
for more information, https://www.kaggle.com/ronitf/heart-disease-uci/

# Import Dependancies

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact
from IPython.display import display, HTML

In [None]:
df_heart = pd.read_csv("data/heart.csv")

In [None]:
df_heart.dtypes

In [None]:
df_heart.head(3)

In [None]:
df_heart.hist(['chol','age', 'cp'])

In [None]:
wgt_col_select_x = widgets.Dropdown(
    options=list(df_heart.columns),
    value=df_heart.columns[0],
    description='Column X',
    disabled=False,
)
wgt_col_select_y = widgets.Dropdown(
    options=list(df_heart.columns),
    value=df_heart.columns[1],
    description='Column Y',
    disabled=False,
)


ui_3 = widgets.HBox([wgt_col_select_x, wgt_col_select_y], layout=box_layout)

def show_lmplot(x, y):
    ax = sns.lmplot(x=x, y=y, data=df_heart)
    
# setup interactive widgets
out_3 = widgets.interactive_output(
    show_lmplot, 
    {
        'x': wgt_col_select_x,
        'y': wgt_col_select_y
    }
)

# show widgets
display(ui_3, out_3)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.tree import export_graphviz
from sklearn.externals.six import StringIO  
from IPython.display import Image  

"""
NOTE: If using conda, install graphviz with, 'conda install graphviz'
"""
import pydotplus

In [None]:
def train(features):
    model = DecisionTreeClassifier(max_depth=4)

    # get columns
    y, x = df_heart['target'], df_heart[features]

    X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=1)
    # y_train
    clf = model.fit(X_train, y_train)
    
    y_predicted = model.predict(X_test)
    
    # Model Accuracy, how often is the classifier correct?
    return (metrics.accuracy_score(y_test, y_predicted), clf)

# Explore Model Accuracy

In [None]:
features = ['sex', 'chol', 'ca', 'thal', 'slope']
results = train(features)
print(f"Accuracy: {results[0]}")

print("Feature Importance")
for i in range(len(results[1].feature_importances_)):
    feat_imp = results[1].feature_importances_[i]
    feat = features[i]
    print("{:>8} - {:>4}".format(feat, feat_imp))

# Plot Decision Tree

In [None]:
from sklearn.externals.six import StringIO  
from IPython.display import Image  
from sklearn.tree import export_graphviz
import pydotplus

dot_data = StringIO()
export_graphviz(results[1], 
                out_file=dot_data,  
                filled=True, 
                rounded=True,
                special_characters=True, 
                feature_names=features, 
                class_names=['0','1']
               )
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
graph.write_png('heart_disease.png')
Image(graph.create_png())