In [15]:
import pandas as pd
from sklearn import linear_model
from time import time
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import tree
from sklearn.metrics import accuracy_score,confusion_matrix, classification_report
from sklearn import svm
import networkx as nx
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from ipywidgets import widgets
from IPython.display import display, clear_output

output = widgets.Output()


df = pd.read_csv("heart.csv")
df

title = widgets.HTML(value = "<h1 style='color:red;'>ML PROJECT</h1>")
display(title)

HTML(value='<h1>ML PROJECT</h1>')

In [16]:
# Basic information

sub_label_1 = widgets.HTML(value="<h3>Topic: Healthcare</h3>")
sub_label_2 = widgets.HTML(value="<h3>Dataset chosen: heart.csv</h3>")
sub_label_3 = widgets.HTML(value="<br/><hr/>")


display(sub_label_1)
display(sub_label_2)
display(sub_label_3)

# Heart dataset parameters
sub_label_4 = widgets.HTML(value="<h3>Dataset parameters</h3>")
ul_s = widgets.HTML(value='''
    <ul>
        <li>age: Age of the person in years</li>
        <li>sex: Male(1) or Female(0)</li>
        <li>cp: Chest pain type (0-3)</li>
        <li>trestbps: Resting blood pressure (in mm/Hg on admission to hospital)</li>
        <li>chol: Serum cholestrol in mg/dl</li>
        <li>fbs: Fasting blood sugar (>120mg/dl => 1: True, 0:False) </li>
        <li>restecg: Resting electrocardiographic results (0-2)</li>
        <li>thalach: Maximum heart rate achieved</li>
        <li>exang: Exercise induced angina (1: Yes, 0: No)</li>
        <li>oldpeak: ST depression rate induced by exercise relative to rest</li>
        <li>slope: The slope of the peak exercise ST segment (0-2)</li>
        <li>ca: Number of major vessels coloured by fluoroscopy (0-3)</li>
        <li>thal: 1=normal, 2=fixed, 3=reversible defect</li>
        <li>target: 1 or 0</li>
    </ul>
''')
display(sub_label_4)
display(ul_s)

HTML(value='<h3>Topic: Healthcare</h3>')

HTML(value='<h3>Dataset chosen: heart.csv</h3>')

HTML(value='<br/><hr/>')

HTML(value='<h3>Dataset parameters</h3>')

HTML(value='\n    <ul>\n        <li>age: Age of the person in years</li>\n        <li>sex: Male(1) or Female(0…

In [17]:
@output.capture(clear_output=True,wait=True)
def linear_regression(b):
    df.trestbps = df.trestbps.fillna(df.trestbps.median())
    reg = linear_model.LinearRegression()
    reg.fit(df[['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak','slope','ca','thal']], df.target)
    
    coff_label = widgets.Label(f'Coefficient: {reg.coef_}')
    intc_label = widgets.Label(f'Intercept: {reg.intercept_}')
    
    display(coff_label)
    display(intc_label)
    
    age_val = widgets.Text(placeholder="Enter age")
    sex_tb = widgets.Text(placeholder="Enter 0 or 1")
    chest_pain_val = widgets.Text(placeholder="Enter Chest pain type (4 values)")
    trestbps_val = widgets.Text(placeholder="Resting blood pressure")
    chol_val = widgets.Text(placeholder="Cholestrol in mg/dl")
    fbs_val = widgets.Text(placeholder="Fasting blood sugar > 120 mg/dl")
    restecg_val = widgets.Text(placeholder="Resting ECG value")
    thalach_val = widgets.Text(placeholder="Maximum heart rate acheived")
    exang_val = widgets.Text(placeholder="Exercise Induced Angina")
    old_peak_val = widgets.Text(placeholder="ST Depression induced by exercise relative to rest")
    slope_val = widgets.Text(placeholder="Slope of the peak exercise ST Segment")
    ca_val = widgets.Text(placeholder="Number of major vessels coloured by fluoroscopy")
    thal_val = widgets.Text(placeholder="Thal")
    
    display(age_val)
    display(sex_tb)
    display(chest_pain_val)
    display(trestbps_val)
    display(chol_val)
    display(fbs_val)
    display(restecg_val)
    display(thalach_val)
    display(exang_val)
    display(old_peak_val)
    display(slope_val)
    display(ca_val)
    display(thal_val)
   
    @output.capture(clear_output=False,wait=True)
    def calc_target(b):
        target = reg.predict([[int(age_val.value),int(sex_tb.value),int(chest_pain_val.value),int(trestbps_val.value),int(chol_val.value),int(fbs_val.value),int(restecg_val.value),int(thalach_val.value),int(exang_val.value),int(old_peak_val.value),int(slope_val.value),int(ca_val.value),int(thal_val.value)]])
        targ_lab = widgets.Label(f'Target value: {target}')
        display(targ_lab)
    
    calc_btn = widgets.Button(description="Calculate Target")
    calc_btn.on_click(calc_target)
    display(calc_btn)
    

    
                               
    

In [18]:
# helper function
def plot_confusionmatrix(y_train_pred,y_train,classes,dom):
    print(f'{dom} Confusion matrix')
    cf = confusion_matrix(y_train_pred,y_train)
    sns.heatmap(cf,annot=True,yticklabels=classes,xticklabels=classes,cmap='Blues', fmt='g')
    plt.tight_layout()
    plt.show()

@output.capture(clear_output=True,wait=True)    
def CART_analysis(b):
    X = df.drop(columns=['target','age','trestbps','chol','thalach','oldpeak','cp','slope','thal','ca','exang','restecg'])
    y = df['target']
    x_train,x_test,y_train,y_test = train_test_split(X,y,stratify=y)
    clf = tree.DecisionTreeClassifier(random_state=0)
    clf.fit(x_train,y_train)
    y_train_pred = clf.predict(x_train)
    y_test_pred = clf.predict(x_test)
    
    # Print decision tree.
    plt.figure(figsize=(20,20))
    features = df.columns
    classes = ['Not heart disease','heart disease']
    tree.plot_tree(clf,feature_names=features,class_names=classes,filled=True)
    plt.show()
    
    # Matrix diagram (heat map)
    print(f'Train score: {accuracy_score(y_train_pred,y_train)}')
    print(f'Test score: {accuracy_score(y_test_pred,y_test)}')
    plot_confusionmatrix(y_train_pred,y_train,classes,dom='Train')
    plot_confusionmatrix(y_test_pred,y_test,classes,dom='Test')



In [19]:
@output.capture(clear_output=True,wait=True)
def heart_svm(b):
    x = df.drop(columns=["target"])
    y = df["target"]
    x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3, random_state=109)
    clf = svm.SVC(kernel='linear')
    clf.fit(x_train,y_train)
    y_pred = clf.predict(x_test)
    
    # Print out metrics.
    print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
    print("Precision:",metrics.precision_score(y_test, y_pred))
    print("Recall:",metrics.recall_score(y_test, y_pred))
    
    # Plot data
    plt.plot(x_test, y_pred, "*")
    plt.show()

In [20]:
@output.capture(clear_output=True,wait=True)
def graph_clustering(b):
    x = df["age"]
    y = df["chol"]
    q = df['trestbps']
    p = df["target"]
    z = [(y[i],p[i]) for i in range(len(x))]
    
    G = nx.Graph()
    G.add_edges_from(z)
    plt.figure(figsize=(12,12))
    nx.draw(G,with_labels=True, node_color='green')
    plt.show()

In [21]:
@output.capture(clear_output=True,wait=True)
def DBSCAN_algo(b):
    # Scaling the data to bring all the attributes to a comparable level
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(df)

    # Normalizing the data so that 
    # the data approximately follows a Gaussian distribution
    X_normalized = normalize(X_scaled)

    # Converting the numpy array into a pandas DataFrame
    X_normalized = pd.DataFrame(X_normalized)
    
    pca = PCA(n_components = 2)
    X_principal = pca.fit_transform(X_normalized)
    X_principal = pd.DataFrame(X_principal)
    X_principal.columns = ['P1', 'P2']
    
    db_default = DBSCAN(eps = 0.0375, min_samples = 3).fit(X_principal)
    labels = db_default.labels_
    
    db = DBSCAN(eps = 0.0375, min_samples = 50).fit(X_principal)
    labels1 = db.labels_
    
    colours1 = {}
    colours1[0] = 'r'
    colours1[1] = 'g'
    colours1[2] = 'b'
    colours1[3] = 'c'
    colours1[4] = 'y'
    colours1[5] = 'm'
    colours1[-1] = 'k'

    cvec = [colours1[label%6] for label in labels]
    colors = ['r', 'g', 'b', 'c', 'y', 'm', 'k' ]

    r = plt.scatter(
            X_principal['P1'], X_principal['P2'], marker ='o', color = colors[0])
    g = plt.scatter(
            X_principal['P1'], X_principal['P2'], marker ='o', color = colors[1])
    b = plt.scatter(
            X_principal['P1'], X_principal['P2'], marker ='o', color = colors[2])
    c = plt.scatter(
            X_principal['P1'], X_principal['P2'], marker ='o', color = colors[3])
    y = plt.scatter(
            X_principal['P1'], X_principal['P2'], marker ='o', color = colors[4])
    m = plt.scatter(
            X_principal['P1'], X_principal['P2'], marker ='o', color = colors[5])
    k = plt.scatter(
            X_principal['P1'], X_principal['P2'], marker ='o', color = colors[6])

    plt.figure(figsize =(9, 9))
    plt.scatter(X_principal['P1'], X_principal['P2'], c = cvec)
    plt.legend((r, g, b, c, y, m, k),
               ('Label 0', 'Label 1', 'Label 2', 'Label 3','Label 4','Label 5', 'Label -1'),
               scatterpoints = 1,
               loc ='upper left',
               ncol = 3,
               fontsize = 8)
    plt.show()

In [22]:
@output.capture(clear_output=True,wait=True)
def ensemble_learning(b):
    y = df["target"]
    x = df.drop(columns=["target"])

    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 22)

    estimator_range = [2,4,6,8,10,12,14,16]

    models = []
    scores = []

    for n_estimators in estimator_range:

        # Create bagging classifier
        clf = BaggingClassifier(n_estimators = n_estimators, random_state=22)

        # Fit the model
        clf.fit(X_train, y_train)

        # Append the model and score to their respective list
        models.append(clf)
        scores.append(accuracy_score(y_true = y_test, y_pred = clf.predict(X_test)))

    # Generate the plot of scores against number of estimators
    plt.figure(figsize=(9,6))
    plt.plot(estimator_range, scores)

    # Adjust labels and font (to make visable)
    plt.xlabel("n_estimators", fontsize = 18)
    plt.ylabel("score", fontsize = 18)
    plt.tick_params(labelsize = 16)

    print(scores)
    # Visualize plot
    # Bagging plots
    plt.show() 
    
    
    # Boosting plots
    estimator_range = [2,4,6,8,10,12,14,16]

    models = []
    scores = []

    for n_estimators in estimator_range:

        # Create bagging classifier
        clf = AdaBoostClassifier(n_estimators = n_estimators)

        # Fit the model
        clf.fit(X_train, y_train)

        # Append the model and score to their respective list
        models.append(clf)
        scores.append(accuracy_score(y_true = y_test, y_pred = clf.predict(X_test)))

    # Generate the plot of scores against number of estimators
    plt.figure(figsize=(9,6))
    plt.plot(estimator_range, scores)

    # Adjust labels and font (to make visable)
    plt.xlabel("n_estimators", fontsize = 18)
    plt.ylabel("score", fontsize = 18)
    plt.tick_params(labelsize = 16)


    print(scores)

    # Visualize plot
    plt.show() 

In [23]:
!jupyter nbextension enable --py --sys-prefix widgetsnbextension 
!jupyter serverextension enable voila --sys-prefix

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: ok
Enabling: voila
- Writing config: C:\Users\adiun\Github\ML_Mini_Project\ml_venv\etc\jupyter
    - Validating...
      voila 0.3.6 ok


In [30]:
def setup_ui(data_frame):
    out = widgets.Output()
    with out:
        display(data_frame)
    return out

@output.capture(clear_output=False,wait=True)
def show_table(b):
    op_1 = setup_ui(df)
    main_box = widgets.VBox([op_1])
    display(main_box)
    
@output.capture(clear_output=True,wait=True)
def clear_outs(b):
    clear_output()


label_title = widgets.HTML(value = "<h2>Choose any of your buttons</h2>")
display(label_title)

# Display table
tab_disp_btn = widgets.Button(description="Show Table")
tab_disp_btn.on_click(show_table)
#display(tab_disp_btn)

# Display Linear Regression results
lin_reg_btn = widgets.Button(description="Linear Regression")
lin_reg_btn.on_click(linear_regression)
#display(lin_reg_btn)

# Display CART Analysis
cart_an_btn = widgets.Button(description="CART Analysis")
cart_an_btn.on_click(CART_analysis)
#display(cart_an_btn)

# SVM
svm_btn = widgets.Button(description="SVM")
svm_btn.on_click(heart_svm)
#display(svm_btn)

# Graph Clustering
gc_btn = widgets.Button(description="Graph Clustering")
gc_btn.on_click(graph_clustering)
#display(gc_btn)

# DBSCAN Algorithm
dbscan_btn = widgets.Button(description="DBSCAN Algorithm")
dbscan_btn.on_click(DBSCAN_algo)
#display(dbscan_btn)

# Ensemble learning algorithm
ens_btn = widgets.Button(description="Ensemble Learning")
ens_btn.on_click(ensemble_learning)
#display(ens_btn)

models_hbox = widgets.HBox([tab_disp_btn,lin_reg_btn,cart_an_btn,svm_btn,gc_btn,dbscan_btn, ens_btn])
display(models_hbox)

# Clear output button
clr_btn = widgets.Button(description="Clear button")
clr_btn.on_click(clear_outs)
display(sub_label_3)
display(clr_btn)


op_label = widgets.HTML(value = "<h2>View your output here: </h2>")
display(op_label)

output

HTML(value='<h2>Choose any of your buttons</h2>')

HBox(children=(Button(description='Show Table', style=ButtonStyle()), Button(description='Linear Regression', …

HTML(value='<br/><hr/>')

Button(description='Clear button', style=ButtonStyle())

HTML(value='<h2>View your output here: </h2>')

Output()