In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df= pd.read_csv('C://Users//User//Desktop//MSc Westminster//Dissertation//DataSets//Heart_Attack_Prediction.csv')

In [2]:
columns=df.columns

for column in columns:
    if df[column].dtype=="int32":
        df[column]=df[column].astype("int16")
    elif df[column].dtype=="float64":
        df[column]=df[column].astype("float16")
    elif df[column].dtype=="object":
        df[column]=df[column].astype("category")

In [3]:
df['Sex']= df['Sex'].map({'Female': 0, 'Male': 1})
df['Sex']= pd.to_numeric(df['Sex'])

df['Diet']= df['Diet'].map({'Healthy': 0, 'Average': 1, 'Unhealthy':2})
df['Diet']= pd.to_numeric(df['Diet'])

df[['HBP', 'LBP']]= df['Blood Pressure'].str.split('/', expand= True)
df['HBP']= pd.to_numeric(df['HBP'])
df['LBP']= pd.to_numeric(df['LBP'])

df['Diabetes'] = df['Diabetes'].map({0: 1, 1: 0})

df['Exercise Hours Per Week']= round(df['Exercise Hours Per Week'], 0)

df['Sedentary Hours Per Day']= round(df['Sedentary Hours Per Day'], 0)

df['Income']= round(df['Income'], 0)

df['BMI']= round(df['BMI'], 0)

df = df.drop(columns=['Patient ID', 'Blood Pressure', 'Country', 'Continent', 'Hemisphere'])

In [4]:
X= df.drop(['Heart Attack Risk'], axis= 1)
y= df['Heart Attack Risk']

In [5]:
def fisher_score(X, y):
    labels= np.unique(y)
    mean_overall= np.mean(X, axis=0)
    fisher_scores= []
    for i in range(X.shape[1]):
        numerator= 0
        denominator= 0
        for label in labels:
            X_label= X[y== label]
            mean_label= np.mean(X_label[:, i])
            numerator += len(X_label) * (mean_label - mean_overall[i])**2
            denominator += np.sum((X_label[:, i] - mean_label)**2)
        fisher_scores.append(numerator / (denominator + 1e-10))
    return np.array(fisher_scores)

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size= 0.2, random_state= 15, stratify= y)

from sklearn.preprocessing import RobustScaler
scaler= RobustScaler()
scaler.fit(X_train)

X_train= scaler.transform(X_train)
X_test= scaler.transform(X_test)

In [7]:
from imblearn.under_sampling import ClusterCentroids
from imblearn.over_sampling import SMOTE
from sklearn.cluster import KMeans

sm= ClusterCentroids(random_state= 15, estimator= KMeans(n_init= 10))
tl= SMOTE(random_state= 15)

X_sm, y_sm= sm.fit_resample(X_train, y_train)
X_sm_tl, y_sm_tl= tl.fit_resample(X_sm, y_sm)

In [8]:
fisher_scores= fisher_score(X_sm_tl, y_sm_tl)
k= 10
top_k_indices= np.argsort(fisher_scores)[-k:]

X_sm_tl_selected= X_sm_tl[:, top_k_indices]
X_sm_tl_test_selected= X_test[:, top_k_indices]

In [9]:
feature_names = X.columns
fisher_df= pd.DataFrame({'Feature': feature_names,
                          'Fisher Score': fisher_scores})

fisher_df= fisher_df.sort_values(by= 'Fisher Score', ascending= False)
fisher_df

Unnamed: 0,Feature,Fisher Score
1,Sex,0.004284135
4,Diabetes,0.002631912
6,Smoking,0.001661873
19,Sleep Hours Per Day,0.0008869668
2,Cholesterol,0.0006476622
0,Age,0.0005621014
18,Physical Activity Days Per Week,0.000525105
20,HBP,0.0004697889
3,Heart Rate,0.0003971215
13,Stress Level,0.0003748206


--- LogisticRegression ---

In [10]:
from sklearn.metrics import confusion_matrix, classification_report, precision_score, roc_auc_score, accuracy_score
from sklearn.linear_model import LogisticRegression
lr= LogisticRegression(random_state= 15)
lr.fit(X_sm_tl_selected, y_sm_tl)
y_pred_lr= lr.predict(X_sm_tl_test_selected)
y_pred_prob_lr= lr.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_lr, labels= lr.classes_)
print(classification_report(y_test, y_pred_lr, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_lr, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_lr))
print('AUC:', roc_auc_score(y_test, y_pred_prob_lr))

              precision    recall  f1-score   support

           0       0.64      0.46      0.54      1125
           1       0.36      0.54      0.43       628

    accuracy                           0.49      1753
   macro avg       0.50      0.50      0.49      1753
weighted avg       0.54      0.49      0.50      1753

Precision: 0.36046511627906974
Accuracy: 0.49115801483171706
AUC: 0.5009610757254069


--- DecisionTreeClassifier ---

In [11]:
from sklearn.tree import DecisionTreeClassifier
dt= DecisionTreeClassifier(random_state=15)
dt.fit(X_sm_tl_selected, y_sm_tl)
y_pred_dt= dt.predict(X_sm_tl_test_selected)
y_pred_prob_dt= dt.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_dt, labels= dt.classes_)
print(classification_report(y_test, y_pred_dt, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_dt, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_dt))
print('AUC:', roc_auc_score(y_test, y_pred_prob_dt))

              precision    recall  f1-score   support

           0       0.66      0.45      0.54      1125
           1       0.37      0.59      0.46       628

    accuracy                           0.50      1753
   macro avg       0.52      0.52      0.50      1753
weighted avg       0.56      0.50      0.51      1753

Precision: 0.3747474747474748
Accuracy: 0.5002852253280091
AUC: 0.5202710544939845


--- Tuned - DecisionTreeClassifier ---

In [12]:
from sklearn.model_selection import GridSearchCV
param_grid= {
             'max_depth': [None, 10, 20, 30, 40, 50],
             'min_samples_split': [2, 10, 20],
             'min_samples_leaf': [1, 5, 10],
             'max_features': [None, 'sqrt', 'log2'],
             'criterion': ['gini', 'entropy']
            }

gs_dt= GridSearchCV(estimator= dt, param_grid= param_grid, cv= 5, scoring= 'precision')
gs_dt.fit(X_sm_tl_selected, y_sm_tl)

print("Best Parameters:", gs_dt.best_params_)
print("Best Precision Score:", gs_dt.best_score_)

Best Parameters: {'criterion': 'gini', 'max_depth': None, 'max_features': None, 'min_samples_leaf': 10, 'min_samples_split': 2}
Best Precision Score: 0.581723857134946


In [13]:
tuned_dt= gs_dt.best_estimator_
y_pred_tuned_dt= tuned_dt.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_dt= tuned_dt.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_dt, labels= tuned_dt.classes_)
print(classification_report(y_test, y_pred_tuned_dt, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_dt, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_dt))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_dt))

              precision    recall  f1-score   support

           0       0.64      0.48      0.55      1125
           1       0.36      0.53      0.43       628

    accuracy                           0.50      1753
   macro avg       0.50      0.50      0.49      1753
weighted avg       0.54      0.50      0.51      1753

Precision: 0.36095965103598693
Accuracy: 0.49629207073588133
AUC: 0.4997572540693559


--- KNeighborsClassifier ---

In [14]:
from sklearn.neighbors import KNeighborsClassifier
knn= KNeighborsClassifier()
knn.fit(X_sm_tl_selected, y_sm_tl)
y_pred_knn= knn.predict(X_sm_tl_test_selected)
y_pred_prob_knn= knn.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_knn, labels= knn.classes_)
print(classification_report(y_test, y_pred_knn, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_knn, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_knn))
print('AUC:', roc_auc_score(y_test, y_pred_prob_knn))

              precision    recall  f1-score   support

           0       0.64      0.48      0.55      1125
           1       0.36      0.52      0.43       628

    accuracy                           0.50      1753
   macro avg       0.50      0.50      0.49      1753
weighted avg       0.54      0.50      0.51      1753

Precision: 0.3599562363238512
Accuracy: 0.4957216200798631
AUC: 0.5070481245576787


--- Tuned - KNeighborsClassifier ---

In [15]:
param_grid= {
             'n_neighbors': np.arange(1,40),
             'weights': ['uniform', 'distance'],
             'metric': ['euclidean', 'manhattan', 'minkowski']
            }

gs_knn= GridSearchCV(estimator= knn, param_grid= param_grid, cv=5, scoring= 'precision')
gs_knn.fit(X_sm_tl_selected, y_sm_tl)
print("Best Parameters:", gs_knn.best_params_)
print("Best Precision Score:", gs_knn.best_score_)

Best Parameters: {'metric': 'manhattan', 'n_neighbors': 16, 'weights': 'distance'}
Best Precision Score: 0.5307938135711778


In [16]:
tuned_knn= gs_knn.best_estimator_
y_pred_tuned_knn= tuned_knn.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_knn= tuned_knn.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_knn, labels= tuned_knn.classes_)
print(classification_report(y_test, y_pred_tuned_knn, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_knn, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_knn))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_knn))

              precision    recall  f1-score   support

           0       0.65      0.42      0.51      1125
           1       0.36      0.60      0.45       628

    accuracy                           0.48      1753
   macro avg       0.51      0.51      0.48      1753
weighted avg       0.55      0.48      0.49      1753

Precision: 0.36443148688046645
Accuracy: 0.48260125499144324
AUC: 0.5084401981599433


--- GaussianNB ---

In [17]:
from sklearn.naive_bayes import GaussianNB
nb= GaussianNB()
nb.fit(X_sm_tl_selected, y_sm_tl)
y_pred_nb= nb.predict(X_sm_tl_test_selected)
y_pred_prob_nb= nb.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_nb, labels= nb.classes_)
print(classification_report(y_test, y_pred_nb, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_nb, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_nb))
print('AUC:', roc_auc_score(y_test, y_pred_prob_nb))

              precision    recall  f1-score   support

           0       0.67      0.33      0.44      1125
           1       0.37      0.70      0.48       628

    accuracy                           0.46      1753
   macro avg       0.52      0.52      0.46      1753
weighted avg       0.56      0.46      0.46      1753

Precision: 0.3692564745196324
Accuracy: 0.4632059326868226
AUC: 0.5060523708421798


--- SVM ---

In [18]:
from sklearn.svm import SVC
svc= SVC(kernel= 'rbf',probability= True, gamma= 1, random_state=15)
svc.fit(X_sm_tl_selected, y_sm_tl)
y_pred_svc= svc.predict(X_sm_tl_test_selected)
y_pred_prob_svc= svc.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_svc, labels= svc.classes_)
print(classification_report(y_test, y_pred_svc, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_svc, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_svc))
print('AUC:', roc_auc_score(y_test, y_pred_prob_svc))

              precision    recall  f1-score   support

           0       0.65      0.46      0.54      1125
           1       0.36      0.55      0.44       628

    accuracy                           0.49      1753
   macro avg       0.50      0.50      0.49      1753
weighted avg       0.54      0.49      0.50      1753

Precision: 0.3617245005257624
Accuracy: 0.4917284654877353
AUC: 0.508342533616419


--- Random Forest ---

In [19]:
from sklearn.ensemble import RandomForestClassifier
rf= RandomForestClassifier(random_state=15)
rf.fit(X_sm_tl_selected, y_sm_tl)
y_pred_rf= rf.predict(X_sm_tl_test_selected)
y_pred_prob_rf= rf.predict_proba(X_sm_tl_test_selected)[:,1]

In [20]:
cm= confusion_matrix(y_test, y_pred_rf, labels= rf.classes_)
print(classification_report(y_test, y_pred_rf, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_rf, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_rf))
print('AUC:', roc_auc_score(y_test, y_pred_prob_rf))

              precision    recall  f1-score   support

           0       0.62      0.30      0.41      1125
           1       0.35      0.67      0.46       628

    accuracy                           0.43      1753
   macro avg       0.49      0.49      0.43      1753
weighted avg       0.52      0.43      0.43      1753

Precision: 0.3488759367194005
Accuracy: 0.43468339988590987
AUC: 0.5011542816702053


In [21]:
param_grid= {
             'n_estimators': [100, 200, 300],
             'max_depth': [None, 10, 20, 30],
             'min_samples_split': [2, 5, 10],
             'min_samples_leaf': [1, 2, 4],
            }

gs_tuned_rf= GridSearchCV(estimator= rf, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_tuned_rf.fit(X_sm_tl_selected, y_sm_tl)
print("Best Parameters:", gs_tuned_rf.best_params_)
print("Best Precision Score:", gs_tuned_rf.best_score_)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best Parameters: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 300}
Best Precision Score: 0.6310587007489618


In [22]:
tuned_rf= gs_tuned_rf.best_estimator_
y_pred_tuned_rf= tuned_rf.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_rf= tuned_rf.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_rf, labels= tuned_rf.classes_)
print(classification_report(y_test, y_pred_tuned_rf, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_rf, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_rf))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_rf))

              precision    recall  f1-score   support

           0       0.64      0.26      0.37      1125
           1       0.36      0.73      0.48       628

    accuracy                           0.43      1753
   macro avg       0.50      0.50      0.42      1753
weighted avg       0.54      0.43      0.41      1753

Precision: 0.3560371517027864
Accuracy: 0.4295493439817456
AUC: 0.5081924982307149


--- AdaBoost ---

In [23]:
from sklearn.ensemble import AdaBoostClassifier
ada= AdaBoostClassifier(random_state=15)
ada.fit(X_sm_tl_selected, y_sm_tl)
y_pred_ada= ada.predict(X_sm_tl_test_selected)
y_pred_prob_ada= ada.predict_proba(X_sm_tl_test_selected)[:,1]

In [24]:
cm= confusion_matrix(y_test, y_pred_ada, labels= ada.classes_)
print(classification_report(y_test, y_pred_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_ada))

              precision    recall  f1-score   support

           0       0.63      0.35      0.45      1125
           1       0.35      0.64      0.46       628

    accuracy                           0.45      1753
   macro avg       0.49      0.49      0.45      1753
weighted avg       0.53      0.45      0.45      1753

Precision: 0.354295837023915
Accuracy: 0.4540787221905305
AUC: 0.5015916489738146


In [25]:
param_grid= {'n_estimators': [50, 100, 200]}

gs_ada= GridSearchCV(estimator= ada, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_ada.fit(X_sm_tl_selected, y_sm_tl)
print("Best Parameters:", gs_ada.best_params_)
print("Best Precision Score:", gs_ada.best_score_)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'n_estimators': 200}
Best Precision Score: 0.6495492153252731


In [26]:
tuned_ada= gs_ada.best_estimator_
y_pred_tuned_ada= tuned_ada.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_ada= tuned_ada.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_ada, labels= tuned_ada.classes_)
print(classification_report(y_test, y_pred_tuned_ada, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_ada, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_ada))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_ada))

              precision    recall  f1-score   support

           0       0.66      0.23      0.34      1125
           1       0.36      0.79      0.50       628

    accuracy                           0.43      1753
   macro avg       0.51      0.51      0.42      1753
weighted avg       0.55      0.43      0.40      1753

Precision: 0.3635693215339233
Accuracy: 0.4306902452937821
AUC: 0.4995661712668082


--- GradientBoosting ---

In [27]:
from sklearn.ensemble import GradientBoostingClassifier
grb= GradientBoostingClassifier(random_state=15)
grb.fit(X_sm_tl_selected, y_sm_tl)
y_pred_grb= grb.predict(X_sm_tl_test_selected)
y_pred_prob_grb= grb.predict_proba(X_sm_tl_test_selected)[:,1]


cm= confusion_matrix(y_test, y_pred_grb, labels= grb.classes_)
print(classification_report(y_test, y_pred_grb, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_grb, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_grb))
print('AUC:', roc_auc_score(y_test, y_pred_prob_grb))

              precision    recall  f1-score   support

           0       0.65      0.15      0.25      1125
           1       0.36      0.85      0.51       628

    accuracy                           0.40      1753
   macro avg       0.50      0.50      0.38      1753
weighted avg       0.55      0.40      0.34      1753

Precision: 0.3595430107526882
Accuracy: 0.4033086138049059
AUC: 0.4979426751592357


In [28]:
param_grid= {'learning_rate': [0.01, 0.1, 0.2]}

gs_grb= GridSearchCV(estimator= grb, param_grid= param_grid, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_grb.fit(X_sm_tl_selected, y_sm_tl)

print("Best Parameters:", gs_grb.best_params_)
print("Best Precision Score:", gs_grb.best_score_)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'learning_rate': 0.2}
Best Precision Score: 0.6680168108218085


In [29]:
tuned_grb= gs_grb.best_estimator_
y_pred_tuned_grb= tuned_grb.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_grb= tuned_grb.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_grb, labels= tuned_grb.classes_)
print(classification_report(y_test, y_pred_tuned_grb, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_grb, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_grb))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_grb))

              precision    recall  f1-score   support

           0       0.65      0.17      0.28      1125
           1       0.36      0.83      0.50       628

    accuracy                           0.41      1753
   macro avg       0.51      0.50      0.39      1753
weighted avg       0.55      0.41      0.36      1753

Precision: 0.3606331727460427
Accuracy: 0.4107244723331432
AUC: 0.4954451521585279


--- XGB ---

In [30]:
from xgboost import XGBClassifier
xgb= XGBClassifier(random_state=15)
xgb.fit(X_sm_tl_selected, y_sm_tl)
y_pred_xgb= xgb.predict(X_sm_tl_test_selected)
y_pred_prob_xgb= xgb.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_xgb, labels= xgb.classes_)
print(classification_report(y_test, y_pred_xgb, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_xgb, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_xgb))
print('AUC:', roc_auc_score(y_test, y_pred_prob_xgb))

              precision    recall  f1-score   support

           0       0.65      0.27      0.38      1125
           1       0.36      0.74      0.49       628

    accuracy                           0.44      1753
   macro avg       0.51      0.50      0.43      1753
weighted avg       0.55      0.44      0.42      1753

Precision: 0.3610248447204969
Accuracy: 0.4375356531660011
AUC: 0.4949355980184006


In [31]:
params_XGBoost= {'learning_rate': [0.01, 0.1, 1.0]}

gs_xgb= GridSearchCV(estimator= xgb, param_grid= params_XGBoost, cv= 5, scoring= 'precision', n_jobs= -1, verbose= 2)
gs_xgb.fit(X_sm_tl_selected, y_sm_tl)

print("Best Parameters:", gs_xgb.best_params_)
print("Best Precision Score:", gs_xgb.best_score_)

tuned_xgb= gs_xgb.best_estimator_
y_pred_tuned_xgb= tuned_xgb.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_xgb= tuned_xgb.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_xgb, labels= tuned_xgb.classes_)
print(classification_report(y_test, y_pred_tuned_xgb, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_xgb, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_xgb))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_xgb))

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Best Parameters: {'learning_rate': 0.1}
Best Precision Score: 0.6685314183964889
              precision    recall  f1-score   support

           0       0.64      0.20      0.30      1125
           1       0.36      0.80      0.50       628

    accuracy                           0.42      1753
   macro avg       0.50      0.50      0.40      1753
weighted avg       0.54      0.42      0.37      1753

Precision: 0.35891968727789625
Accuracy: 0.4152880775812892
AUC: 0.4962915782024062


--- LGBM ---

In [32]:
from lightgbm import LGBMClassifier
lgm= LGBMClassifier(random_state=15)
lgm.fit(X_sm_tl_selected, y_sm_tl)
y_pred_lgm= lgm.predict(X_sm_tl_test_selected)
y_pred_prob_lgm= lgm.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_lgm, labels= lgm.classes_)
print(classification_report(y_test, y_pred_lgm, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_lgm, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_lgm))
print('AUC:', roc_auc_score(y_test, y_pred_prob_lgm))

[LightGBM] [Info] Number of positive: 2511, number of negative: 2511
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000247 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1180
[LightGBM] [Info] Number of data points in the train set: 5022, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
              precision    recall  f1-score   support

           0       0.67      0.24      0.35      1125
           1       0.37      0.79      0.50       628

    accuracy                           0.44      1753
   macro avg       0.52      0.52      0.43      1753
weighted avg       0.56      0.44      0.40      1753

Precision: 0.3671817512877116
Accuracy: 0.43582430119794635
AUC: 0.49873743807501764


In [33]:
params_LGB= {'learning_rate': [0.001, 0.01, 0.1, 1.0],
             'num_leaves': [31, 127],
             'reg_alpha': [0.1, 0.5],
             'min_data_in_leaf': [30, 50, 100, 300, 400]}

gs_lgm= GridSearchCV(estimator= lgm, param_grid= params_LGB, cv=5, scoring='precision', n_jobs=-1, verbose=2)
gs_lgm.fit(X_sm_tl_selected, y_sm_tl)

print("Best Parameters:", gs_lgm.best_params_)
print("Best Precision Score:", gs_lgm.best_score_)

tuned_lgm= gs_lgm.best_estimator_
y_pred_tuned_lgm= tuned_lgm.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_lgm= tuned_lgm.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_lgm, labels= tuned_lgm.classes_)
print(classification_report(y_test, y_pred_tuned_lgm, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_lgm, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_lgm))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_lgm))

Fitting 5 folds for each of 80 candidates, totalling 400 fits
[LightGBM] [Info] Number of positive: 2511, number of negative: 2511
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002460 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1180
[LightGBM] [Info] Number of data points in the train set: 5022, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
Best Parameters: {'learning_rate': 0.1, 'min_data_in_leaf': 30, 'num_leaves': 31, 'reg_alpha': 0.5}
Best Precision Score: 0.6622917058783154
              precision    recall  f1-score   support

           0       0.68      0.24      0.36      1125
           1       0.37      0.79      0.50       628

    accuracy                           0.44      1753
   macro avg       0.52      0.52      0.43      1753
weighted avg       0.57  

--- CatBoost ---

In [34]:
from catboost import CatBoostClassifier
cat= CatBoostClassifier(random_state=15)
cat.fit(X_sm_tl_selected, y_sm_tl)
y_pred_cat= cat.predict(X_sm_tl_test_selected)
y_pred_prob_cat= cat.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_cat, labels= cat.classes_)
print(classification_report(y_test, y_pred_cat, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_cat, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_cat))
print('AUC:', roc_auc_score(y_test, y_pred_prob_cat))

Learning rate set to 0.020521
0:	learn: 0.6880937	total: 145ms	remaining: 2m 24s
1:	learn: 0.6836401	total: 149ms	remaining: 1m 14s
2:	learn: 0.6791661	total: 153ms	remaining: 51s
3:	learn: 0.6758419	total: 159ms	remaining: 39.7s
4:	learn: 0.6723116	total: 164ms	remaining: 32.6s
5:	learn: 0.6693265	total: 167ms	remaining: 27.7s
6:	learn: 0.6663557	total: 174ms	remaining: 24.6s
7:	learn: 0.6637406	total: 182ms	remaining: 22.6s
8:	learn: 0.6610097	total: 188ms	remaining: 20.7s
9:	learn: 0.6585413	total: 195ms	remaining: 19.3s
10:	learn: 0.6567063	total: 200ms	remaining: 18s
11:	learn: 0.6547797	total: 206ms	remaining: 16.9s
12:	learn: 0.6531633	total: 210ms	remaining: 15.9s
13:	learn: 0.6510752	total: 215ms	remaining: 15.1s
14:	learn: 0.6491442	total: 221ms	remaining: 14.5s
15:	learn: 0.6473821	total: 226ms	remaining: 13.9s
16:	learn: 0.6457143	total: 230ms	remaining: 13.3s
17:	learn: 0.6438659	total: 236ms	remaining: 12.9s
18:	learn: 0.6425695	total: 241ms	remaining: 12.5s
19:	learn: 0.

161:	learn: 0.5586907	total: 1.02s	remaining: 5.3s
162:	learn: 0.5580046	total: 1.03s	remaining: 5.29s
163:	learn: 0.5577863	total: 1.05s	remaining: 5.36s
164:	learn: 0.5574659	total: 1.06s	remaining: 5.37s
165:	learn: 0.5573262	total: 1.07s	remaining: 5.36s
166:	learn: 0.5569224	total: 1.07s	remaining: 5.36s
167:	learn: 0.5566632	total: 1.08s	remaining: 5.37s
168:	learn: 0.5563068	total: 1.09s	remaining: 5.38s
169:	learn: 0.5559281	total: 1.11s	remaining: 5.4s
170:	learn: 0.5557358	total: 1.12s	remaining: 5.41s
171:	learn: 0.5553798	total: 1.13s	remaining: 5.43s
172:	learn: 0.5552114	total: 1.14s	remaining: 5.44s
173:	learn: 0.5549787	total: 1.14s	remaining: 5.43s
174:	learn: 0.5546184	total: 1.15s	remaining: 5.41s
175:	learn: 0.5542964	total: 1.15s	remaining: 5.4s
176:	learn: 0.5540624	total: 1.16s	remaining: 5.39s
177:	learn: 0.5538228	total: 1.16s	remaining: 5.37s
178:	learn: 0.5535788	total: 1.17s	remaining: 5.37s
179:	learn: 0.5533191	total: 1.18s	remaining: 5.36s
180:	learn: 0.5

342:	learn: 0.5101601	total: 2.08s	remaining: 4s
343:	learn: 0.5099520	total: 2.1s	remaining: 4s
344:	learn: 0.5097659	total: 2.1s	remaining: 3.99s
345:	learn: 0.5096313	total: 2.11s	remaining: 3.98s
346:	learn: 0.5093602	total: 2.11s	remaining: 3.98s
347:	learn: 0.5090855	total: 2.12s	remaining: 3.97s
348:	learn: 0.5088517	total: 2.13s	remaining: 3.97s
349:	learn: 0.5086023	total: 2.13s	remaining: 3.96s
350:	learn: 0.5082319	total: 2.14s	remaining: 3.95s
351:	learn: 0.5079906	total: 2.14s	remaining: 3.94s
352:	learn: 0.5075726	total: 2.15s	remaining: 3.93s
353:	learn: 0.5074293	total: 2.15s	remaining: 3.92s
354:	learn: 0.5071826	total: 2.16s	remaining: 3.92s
355:	learn: 0.5069331	total: 2.16s	remaining: 3.91s
356:	learn: 0.5067607	total: 2.17s	remaining: 3.9s
357:	learn: 0.5065666	total: 2.17s	remaining: 3.9s
358:	learn: 0.5063057	total: 2.18s	remaining: 3.89s
359:	learn: 0.5061140	total: 2.18s	remaining: 3.88s
360:	learn: 0.5058887	total: 2.19s	remaining: 3.87s
361:	learn: 0.5057429	

501:	learn: 0.4746188	total: 2.98s	remaining: 2.95s
502:	learn: 0.4743650	total: 2.99s	remaining: 2.96s
503:	learn: 0.4741185	total: 3s	remaining: 2.95s
504:	learn: 0.4739471	total: 3.01s	remaining: 2.95s
505:	learn: 0.4737296	total: 3.01s	remaining: 2.94s
506:	learn: 0.4734971	total: 3.02s	remaining: 2.94s
507:	learn: 0.4732470	total: 3.02s	remaining: 2.93s
508:	learn: 0.4729653	total: 3.03s	remaining: 2.92s
509:	learn: 0.4727059	total: 3.04s	remaining: 2.92s
510:	learn: 0.4724479	total: 3.04s	remaining: 2.91s
511:	learn: 0.4722500	total: 3.05s	remaining: 2.9s
512:	learn: 0.4720434	total: 3.05s	remaining: 2.9s
513:	learn: 0.4718316	total: 3.06s	remaining: 2.89s
514:	learn: 0.4716219	total: 3.06s	remaining: 2.89s
515:	learn: 0.4713910	total: 3.07s	remaining: 2.88s
516:	learn: 0.4711667	total: 3.08s	remaining: 2.87s
517:	learn: 0.4709881	total: 3.08s	remaining: 2.87s
518:	learn: 0.4706352	total: 3.09s	remaining: 2.86s
519:	learn: 0.4704273	total: 3.09s	remaining: 2.85s
520:	learn: 0.470

686:	learn: 0.4366213	total: 4.04s	remaining: 1.84s
687:	learn: 0.4364256	total: 4.04s	remaining: 1.83s
688:	learn: 0.4362459	total: 4.05s	remaining: 1.83s
689:	learn: 0.4360620	total: 4.06s	remaining: 1.82s
690:	learn: 0.4358950	total: 4.07s	remaining: 1.82s
691:	learn: 0.4357469	total: 4.07s	remaining: 1.81s
692:	learn: 0.4354441	total: 4.08s	remaining: 1.81s
693:	learn: 0.4352472	total: 4.08s	remaining: 1.8s
694:	learn: 0.4350580	total: 4.09s	remaining: 1.79s
695:	learn: 0.4349098	total: 4.09s	remaining: 1.79s
696:	learn: 0.4347016	total: 4.1s	remaining: 1.78s
697:	learn: 0.4345042	total: 4.1s	remaining: 1.77s
698:	learn: 0.4343317	total: 4.11s	remaining: 1.77s
699:	learn: 0.4340590	total: 4.11s	remaining: 1.76s
700:	learn: 0.4338529	total: 4.12s	remaining: 1.76s
701:	learn: 0.4336863	total: 4.12s	remaining: 1.75s
702:	learn: 0.4335327	total: 4.13s	remaining: 1.74s
703:	learn: 0.4333206	total: 4.13s	remaining: 1.74s
704:	learn: 0.4331483	total: 4.14s	remaining: 1.73s
705:	learn: 0.4

845:	learn: 0.4102560	total: 4.94s	remaining: 900ms
846:	learn: 0.4100907	total: 4.95s	remaining: 895ms
847:	learn: 0.4099127	total: 4.96s	remaining: 889ms
848:	learn: 0.4097073	total: 4.96s	remaining: 883ms
849:	learn: 0.4095002	total: 4.97s	remaining: 877ms
850:	learn: 0.4092696	total: 4.97s	remaining: 871ms
851:	learn: 0.4091131	total: 4.98s	remaining: 865ms
852:	learn: 0.4089150	total: 4.99s	remaining: 859ms
853:	learn: 0.4088176	total: 4.99s	remaining: 854ms
854:	learn: 0.4086834	total: 5s	remaining: 847ms
855:	learn: 0.4085988	total: 5s	remaining: 841ms
856:	learn: 0.4084527	total: 5.01s	remaining: 836ms
857:	learn: 0.4082719	total: 5.01s	remaining: 830ms
858:	learn: 0.4080644	total: 5.02s	remaining: 824ms
859:	learn: 0.4078659	total: 5.02s	remaining: 818ms
860:	learn: 0.4077198	total: 5.03s	remaining: 812ms
861:	learn: 0.4075462	total: 5.03s	remaining: 806ms
862:	learn: 0.4073942	total: 5.04s	remaining: 800ms
863:	learn: 0.4072837	total: 5.05s	remaining: 795ms
864:	learn: 0.4071

In [35]:
params_CatBoost= {
                  'depth': [3,5,10],
                  'learning_rate' : [0.01,0.1,1],
                  'iterations' : [5,10,50,100]
                 }

gs_cat= GridSearchCV(estimator= cat, param_grid= params_CatBoost, cv=5, scoring='precision', n_jobs=-1, verbose=2)
gs_cat.fit(X_sm_tl_selected, y_sm_tl)

print("Best Parameters:", gs_cat.best_params_)
print("Best Precision Score:", gs_cat.best_score_)

tuned_cat= gs_cat.best_estimator_
y_pred_tuned_cat= tuned_cat.predict(X_sm_tl_test_selected)
y_pred_prob_tuned_cat= tuned_cat.predict_proba(X_sm_tl_test_selected)[:,1]

cm= confusion_matrix(y_test, y_pred_tuned_cat, labels= tuned_cat.classes_)
print(classification_report(y_test, y_pred_tuned_cat, zero_division=0))
print('Precision:', precision_score(y_test, y_pred_tuned_cat, zero_division=0))
print('Accuracy:', accuracy_score(y_test, y_pred_tuned_cat))
print('AUC:', roc_auc_score(y_test, y_pred_prob_tuned_cat))

Fitting 5 folds for each of 36 candidates, totalling 180 fits
0:	learn: 0.6456624	total: 2.41ms	remaining: 118ms
1:	learn: 0.6341558	total: 4.22ms	remaining: 101ms
2:	learn: 0.6226136	total: 5.83ms	remaining: 91.3ms
3:	learn: 0.6157894	total: 7.26ms	remaining: 83.5ms
4:	learn: 0.6055397	total: 8.95ms	remaining: 80.5ms
5:	learn: 0.5992766	total: 10.5ms	remaining: 77ms
6:	learn: 0.5962342	total: 12ms	remaining: 73.9ms
7:	learn: 0.5956362	total: 13.7ms	remaining: 71.8ms
8:	learn: 0.5850860	total: 16.5ms	remaining: 75ms
9:	learn: 0.5762825	total: 18.9ms	remaining: 75.5ms
10:	learn: 0.5740568	total: 20.4ms	remaining: 72.3ms
11:	learn: 0.5721422	total: 22ms	remaining: 69.7ms
12:	learn: 0.5636169	total: 23.7ms	remaining: 67.5ms
13:	learn: 0.5582266	total: 25.3ms	remaining: 65.1ms
14:	learn: 0.5527603	total: 26.9ms	remaining: 62.8ms
15:	learn: 0.5480721	total: 28.6ms	remaining: 60.8ms
16:	learn: 0.5463645	total: 31.5ms	remaining: 61.2ms
17:	learn: 0.5430575	total: 33.9ms	remaining: 60.2ms
18:	

--- Visualisation ---

In [36]:
precision_scores= {
                    'Logistic Regression Precision:': precision_score(y_test, y_pred_lr, zero_division= 0),
                    'Decision Tree Precision:': precision_score(y_test, y_pred_dt, zero_division= 0),
                    'Tuned Decision Tree Precision:': precision_score(y_test, y_pred_tuned_dt, zero_division= 0),
                    'KNeighborsClassifier Precision:': precision_score(y_test, y_pred_knn, zero_division= 0),
                    'Tuned KNeighborsClassifier Precision:': precision_score(y_test, y_pred_tuned_knn, zero_division= 0),
                    'GaussianNB Precision:': precision_score(y_test, y_pred_nb, zero_division= 0),
                    'SVM Precision:': precision_score(y_test, y_pred_svc, zero_division= 0),
                    'Random Forest Precision:': precision_score(y_test, y_pred_rf, zero_division= 0),
                    'Tuned Random Forest Precision:': precision_score(y_test, y_pred_tuned_rf, zero_division= 0),
                    'AdaBoost Precision:': precision_score(y_test, y_pred_ada, zero_division= 0),
                    'Tuned AdaBoost Precision:': precision_score(y_test, y_pred_tuned_ada, zero_division= 0),
                    'GradientBoosting Precision:': precision_score(y_test, y_pred_grb, zero_division= 0),
                    'Tuned GradientBoosting Precision:': precision_score(y_test, y_pred_tuned_grb, zero_division= 0),
                    'XGB Precision:': precision_score(y_test, y_pred_xgb, zero_division= 0),
                    'Tuned XGB Precision:': precision_score(y_test, y_pred_tuned_xgb, zero_division= 0),
                    'LGBM Precision:': precision_score(y_test, y_pred_lgm, zero_division= 0),
                    'Tuned LGBM Precision:': precision_score(y_test, y_pred_tuned_lgm, zero_division= 0),
                    'CatBoost Precision:': precision_score(y_test, y_pred_cat, zero_division= 0),
                    'Tuned CatBoost Precision:': precision_score(y_test, y_pred_tuned_cat, zero_division= 0)
                  }

annova_precision= pd.DataFrame(list(precision_scores.items()), columns= ['Model', 'Precision Score'])
annova_precision= annova_precision.sort_values(by= 'Precision Score', ascending=False)
print(annova_precision)

                                    Model  Precision Score
1                Decision Tree Precision:         0.374747
5                   GaussianNB Precision:         0.369256
16                  Tuned LGBM Precision:         0.368968
15                        LGBM Precision:         0.367182
4   Tuned KNeighborsClassifier Precision:         0.364431
10              Tuned AdaBoost Precision:         0.363569
17                    CatBoost Precision:         0.362752
6                          SVM Precision:         0.361725
13                         XGB Precision:         0.361025
2          Tuned Decision Tree Precision:         0.360960
12      Tuned GradientBoosting Precision:         0.360633
0          Logistic Regression Precision:         0.360465
3         KNeighborsClassifier Precision:         0.359956
11            GradientBoosting Precision:         0.359543
14                   Tuned XGB Precision:         0.358920
18              Tuned CatBoost Precision:         0.3571

In [37]:
accuracy_scores= {
                    'Logistic Regression Accuracy:': accuracy_score(y_test, y_pred_lr),
                    'Decision Tree Accuracy:': accuracy_score(y_test, y_pred_dt),
                    'Tuned Decision Tree Accuracy:': accuracy_score(y_test, y_pred_tuned_dt),
                    'KNeighborsClassifier Accuracy:': accuracy_score(y_test, y_pred_knn),
                    'Tuned KNeighborsClassifier Accuracy:': accuracy_score(y_test, y_pred_tuned_knn),
                    'GaussianNB Accuracy:': accuracy_score(y_test, y_pred_nb),
                    'SVM Accuracy:': accuracy_score(y_test, y_pred_svc),
                    'Random Forest Accuracy:': accuracy_score(y_test, y_pred_rf),
                    'Tuned Random Forest Accuracy:': accuracy_score(y_test, y_pred_tuned_rf),
                    'AdaBoost Accuracy:': accuracy_score(y_test, y_pred_ada),
                    'Tuned AdaBoost Accuracy:': accuracy_score(y_test, y_pred_tuned_ada),
                    'GradientBoosting Accuracy:': accuracy_score(y_test, y_pred_grb),
                    'Tuned GradientBoosting Accuracy:': accuracy_score(y_test, y_pred_tuned_grb),
                    'XGB Accuracy:': accuracy_score(y_test, y_pred_xgb),
                    'Tuned XGB Accuracy:': accuracy_score(y_test, y_pred_tuned_xgb),
                    'LGBM Accuracy:': accuracy_score(y_test, y_pred_lgm),
                    'Tuned LGBM Accuracy:': accuracy_score(y_test, y_pred_tuned_lgm),
                    'CatBoost Accuracy:': accuracy_score(y_test, y_pred_cat),
                    'Tuned CatBoost Accuracy:': accuracy_score(y_test, y_pred_tuned_cat)
                  }

annova_accuracy= pd.DataFrame(list(accuracy_scores.items()), columns= ['Model', 'Accuracy Score'])
annova_accuracy= annova_accuracy.sort_values(by= 'Accuracy Score', ascending=False)
print(annova_accuracy)

                                   Model  Accuracy Score
1                Decision Tree Accuracy:        0.500285
2          Tuned Decision Tree Accuracy:        0.496292
3         KNeighborsClassifier Accuracy:        0.495722
6                          SVM Accuracy:        0.491728
0          Logistic Regression Accuracy:        0.491158
4   Tuned KNeighborsClassifier Accuracy:        0.482601
5                   GaussianNB Accuracy:        0.463206
9                     AdaBoost Accuracy:        0.454079
16                  Tuned LGBM Accuracy:        0.440388
13                         XGB Accuracy:        0.437536
15                        LGBM Accuracy:        0.435824
7                Random Forest Accuracy:        0.434683
10              Tuned AdaBoost Accuracy:        0.430690
8          Tuned Random Forest Accuracy:        0.429549
17                    CatBoost Accuracy:        0.416429
14                   Tuned XGB Accuracy:        0.415288
18              Tuned CatBoost 

In [38]:
auc_scores= {
                    'Logistic Regression AUC:': roc_auc_score(y_test, y_pred_prob_lr),
                    'Decision Tree AUC:': roc_auc_score(y_test, y_pred_prob_dt),
                    'Tuned Decision Tree AUC:': roc_auc_score(y_test, y_pred_prob_tuned_dt),
                    'KNeighborsClassifier AUC:': roc_auc_score(y_test, y_pred_prob_knn),
                    'Tuned KNeighborsClassifier AUC:': roc_auc_score(y_test, y_pred_prob_tuned_knn),
                    'GaussianNB AUC:': roc_auc_score(y_test, y_pred_prob_nb),
                    'SVM AUC:': roc_auc_score(y_test, y_pred_prob_svc),
                    'Random Forest AUC:': roc_auc_score(y_test, y_pred_prob_rf),
                    'Tuned Random Forest AUC:': roc_auc_score(y_test, y_pred_prob_tuned_rf),
                    'AdaBoost AUC:': roc_auc_score(y_test, y_pred_prob_ada),
                    'Tuned AdaBoost AUC:': roc_auc_score(y_test, y_pred_prob_tuned_ada),
                    'GradientBoosting AUC:': roc_auc_score(y_test, y_pred_prob_grb),
                    'Tuned GradientBoosting AUC:': roc_auc_score(y_test, y_pred_prob_tuned_grb),
                    'XGB AUC:': roc_auc_score(y_test, y_pred_prob_xgb),
                    'Tuned XGB AUC:': roc_auc_score(y_test, y_pred_prob_tuned_xgb),
                    'LGBM AUC:': roc_auc_score(y_test, y_pred_prob_lgm),
                    'Tuned LGBM AUC:': roc_auc_score(y_test, y_pred_prob_tuned_lgm),
                    'CatBoost AUC:': roc_auc_score(y_test, y_pred_prob_cat),
                    'Tuned CatBoost AUC:': roc_auc_score(y_test, y_pred_prob_tuned_cat)
                  }

annova_auc= pd.DataFrame(list(auc_scores.items()), columns= ['Model', 'AUC Score'])
annova_auc= annova_auc.sort_values(by= 'AUC Score', ascending=False)
print(annova_auc)

                              Model  AUC Score
1                Decision Tree AUC:   0.520271
4   Tuned KNeighborsClassifier AUC:   0.508440
6                          SVM AUC:   0.508343
8          Tuned Random Forest AUC:   0.508192
16                  Tuned LGBM AUC:   0.508031
3         KNeighborsClassifier AUC:   0.507048
17                    CatBoost AUC:   0.506269
5                   GaussianNB AUC:   0.506052
9                     AdaBoost AUC:   0.501592
7                Random Forest AUC:   0.501154
0          Logistic Regression AUC:   0.500961
2          Tuned Decision Tree AUC:   0.499757
18              Tuned CatBoost AUC:   0.499640
10              Tuned AdaBoost AUC:   0.499566
15                        LGBM AUC:   0.498737
11            GradientBoosting AUC:   0.497943
14                   Tuned XGB AUC:   0.496292
12      Tuned GradientBoosting AUC:   0.495445
13                         XGB AUC:   0.494936
