In [1]:
import math
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
df = pd.read_csv('heart.csv')

In [3]:
df.slope.unique()

array([2, 0, 1], dtype=int64)

In [4]:
df.dropna()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1


In [5]:
df.groupby(df.target).count()

Unnamed: 0_level_0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,499,499,499,499,499,499,499,499,499,499,499,499,499
1,526,526,526,526,526,526,526,526,526,526,526,526,526


In [6]:
df['age'].nunique()

41

In [7]:
columns = df.columns[1:-1]
y = df.columns[-1:]
columns

Index(['sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang',
       'oldpeak', 'slope', 'ca', 'thal'],
      dtype='object')

In [8]:
target = df['target']

In [9]:
#Create dummies
to_produce = []
for col in columns: 
    to_produce.append(pd.get_dummies(df[col], drop_first=False, prefix=col, dtype=int))
data = pd.concat(to_produce, axis = 1).sort_index()

In [10]:
data['target'] = target 
data

Unnamed: 0,sex_0,sex_1,cp_0,cp_1,cp_2,cp_3,trestbps_94,trestbps_100,trestbps_101,trestbps_102,...,ca_0,ca_1,ca_2,ca_3,ca_4,thal_0,thal_1,thal_2,thal_3,target
0,0,1,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
1,0,1,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
2,0,1,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
3,0,1,1,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
4,1,0,1,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,0,1,0,1,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,1
1021,0,1,1,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
1022,0,1,1,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0
1023,1,0,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,1


In [11]:
#Train models
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

In [12]:
X = data.drop(['target'],axis=1)
y = data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=66)
rfcModel =  RandomForestClassifier(n_estimators=20, max_depth=100,max_features=5)

In [13]:
rfcModel.fit(X_train,y_train)

RandomForestClassifier(max_depth=100, max_features=5, n_estimators=20)

In [14]:
rfcModel_pred = rfcModel.predict(X_test)

In [15]:
print(confusion_matrix(y_test,rfcModel_pred))
print(classification_report(y_test,rfcModel_pred))

[[121   0]
 [  3 133]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       121
           1       1.00      0.98      0.99       136

    accuracy                           0.99       257
   macro avg       0.99      0.99      0.99       257
weighted avg       0.99      0.99      0.99       257



In [16]:
#Let's do a grid search
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

In [17]:
pip = make_pipeline(StandardScaler(), RandomForestClassifier(criterion='gini', random_state=1))
param_grid_rfc = [{
    'randomforestclassifier__max_depth':[2,5,10,100,200],
    'randomforestclassifier__max_features':[2, 3, 4, 5, 6,8,10],
    'randomforestclassifier__n_estimators':[1,5,10,20,100]
}]

gs = GridSearchCV(estimator=pip,
                     param_grid = param_grid_rfc,
                     scoring='accuracy',
                     cv=10,
                     refit=True,
                     n_jobs=1)

In [18]:
#Test the model 

#Comment out when running app

#gs = gs.fit(X_train, y_train)
# Print the training score of the best model
#print(gs.best_score_)

# Print the model parameters of the best model
#print(gs.best_params_)

# Print the test score of the best model
#clfRFC = gs.best_estimator_
#print('Test accuracy: %.3f' % clfRFC.score(X_test, y_test))

In [19]:
columns

Index(['sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang',
       'oldpeak', 'slope', 'ca', 'thal'],
      dtype='object')

In [20]:
new_input = [[1,2,130,10,130,0,90,1,0,1,0,0]]
input_df = pd.DataFrame(new_input, columns = columns)

In [21]:
input_df

Unnamed: 0,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,1,2,130,10,130,0,90,1,0,1,0,0


In [22]:
#Create dummies
to_produce_input = []
for col in columns: 
    to_produce_input.append(pd.get_dummies(input_df[col], drop_first=False, prefix=col, dtype=int))
data_input = pd.concat(to_produce_input, axis = 1).sort_index()

In [23]:
data_input

Unnamed: 0,sex_1,cp_2,trestbps_130,chol_10,fbs_130,restecg_0,thalach_90,exang_1,oldpeak_0,slope_1,ca_0,thal_0
0,1,1,1,1,1,1,1,1,1,1,1,1


In [24]:
#Create an empty df to have all columns
to_fill = pd.DataFrame(columns = X.columns)
to_fill

Unnamed: 0,sex_0,sex_1,cp_0,cp_1,cp_2,cp_3,trestbps_94,trestbps_100,trestbps_101,trestbps_102,...,slope_2,ca_0,ca_1,ca_2,ca_3,ca_4,thal_0,thal_1,thal_2,thal_3


In [25]:
data_input

Unnamed: 0,sex_1,cp_2,trestbps_130,chol_10,fbs_130,restecg_0,thalach_90,exang_1,oldpeak_0,slope_1,ca_0,thal_0
0,1,1,1,1,1,1,1,1,1,1,1,1


In [26]:
final_input_df = data_input.join(to_fill[to_fill.columns.difference(data_input.columns)])
final_input_df = final_input_df.fillna(0)
final_input_df = final_input_df.reindex(columns=to_fill.columns)

In [27]:
final_input_df

Unnamed: 0,sex_0,sex_1,cp_0,cp_1,cp_2,cp_3,trestbps_94,trestbps_100,trestbps_101,trestbps_102,...,slope_2,ca_0,ca_1,ca_2,ca_3,ca_4,thal_0,thal_1,thal_2,thal_3
0,0,1,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,0


In [28]:
to_feed = final_input_df.iloc[0].to_numpy()
to_feed

array([0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [29]:
rfcModel.predict(to_feed.reshape(1,-1))

array([0], dtype=int64)

In [30]:
#'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang','oldpeak', 'slope', 'ca', 'thal'

In [31]:
# WebApp starts here
from pywebio.input import input, FLOAT
from pywebio.output import put_text
from pywebio.output import *

input_data = []
def heart():
    #Sex input start
    put_table([
        ['Sex', 'Code'],
        ['Male', '1'],
        ['Female', '0'],
    ]).show
    Sex = input("Input your sex：", type=FLOAT)
    input_data.append(Sex)
    #Sex input end
    
    #Chest pain input start
    put_table([
        ['Code', 'Explanation'],
        ['0', 'Typical angina'],
        ['1', 'Atypical angina'],
        ['2', 'Non-anginal pain'],
        ['3','Asymptomatic']
    ]).show()
    Chest_pain = input("Input chest pain type:" , type=FLOAT)
    input_data.append(Chest_pain)
    #Chest pain input end
    
    #Resting blood pressure start
    Resting_blood_pressure = input("Input resting blood pressure (in mm Hg): ",type=FLOAT)
    input_data.append(Resting_blood_pressure)
    #Resting blood pressure end
    
    #Cholesterol input start
    Cholesterol = input("Input serum cholestoral in mg/dl: ", type=FLOAT)
    input_data.append(Cholesterol)
    #Cholesterol input end
    
    #Fasting blood sugar start 
    put_table([
        ['Fasting blood sugar > 120 mg/dl', 'Code'],
        ['True', '1'],
        ['False', '0'],
    ]).show
    Fasting_Blood_Sugar = input("Input fasting blood sugar (fasting blood sugar > 120 mg/dl)", type=FLOAT)
    input_data.append(Fasting_Blood_Sugar)
    #Fasting blood sugar end
    
    #Resting_electrocardiographic start
    put_table([
        ['Code', 'Explanation'],
        ['0', 'Normal'],
        ['1', 'Having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)'],
        ['2', 'Showing probable or definite left ventricular hypertrophy by Estes\' criteria']
    ]).show()
    
    Resting_electrocardiographic = input("Input resting electrocardiographic results :", type=FLOAT)
    input_data.append(Resting_electrocardiographic)
    #Resting_electrocardiographic end 
    
    #Max heart rate start
    Maximum_heart_rate_achieved = input("Input maximum heart rate achieved: ",type=FLOAT)
    input_data.append(Maximum_heart_rate_achieved)
    #Max heart rate end
    
    #Exercise_induced_angina start
    Exercise_induced_angina = radio("Input exercise induced angina: ", options=['Yes','No'])
    if Exercise_induced_angina == 'Yes': 
        Exercise_induced_angina = 1
    elif Exercise_induced_angina == 'No': 
        Exercise_induced_angina = 0
    input_data.append(Exercise_induced_angina)
    #Exercise_induced_angina end
    
    Oldpeak = input("Input ST depression induced by exercise relative to rest: ",type=FLOAT)
    input_data.append(Oldpeak)
    
    Slope = input("Input the slope of the peak exercise ST segment: ", type=FLOAT)
    input_data.append(Slope)
    
    Major_Vessels = input("Input number of major vessels (0-3) colored by flourosopy: ", type=FLOAT)
    input_data.append(Major_Vessels)
    
    Thalessemia = input("Input thalessemia level: \n\
                        0 = normal\n\
                        1 = fixed defect\n \
                        2 = reversable defect",type=FLOAT)
    input_data.append(Thalessemia)
    input_data_res = [input_data]
    print(input_data_res)
    
if __name__ == '__main__':
    heart()

NameError: name 'radio' is not defined