# Final Project Degree

## Prediction of cardiovascular and bleeding events in patients with double antiaggregation with machine learning models.

##### Pablo Pérez Sánchez
---
# 2. Model

In [None]:
# Import librarys

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Algorthms / graphics
from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score
# from sklearn.linear_model import LogisticRegression
# from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from yellowbrick.classifier import ClassificationReport, DiscriminationThreshold, ROCAUC, ConfusionMatrix, PrecisionRecallCurve

%pylab inline

In [None]:
# Import data

df = pd.read_csv('../../data/PACS_DAPT_preprocess.csv')
df = df.drop('Unnamed: 0', axis=1)

In [None]:
# Variables predicting our Dataset, those variables that don't have empty values (that's why BMI is missing)
var_pred = ['Age', 'Sex', 'Diabetes', 'Hypertension', 'Dyslipemia', 'PeripheralArteryDisease',
            'PriorMyocardialInfarction', 'PriorPCI', 'PriorCABG', 'PriorStroke', 'Cancer', 'Prior_Bleeding',
            'STEMI', 'UnstableAngina', 'Killip2orMore', 'LVEFless40', 'LVEFless50', 'LVEFafter50', 'Hemoglobin_Admission',
            'Creatinine_Admission', 'Femoral_Access', 'Multivessel', 'DrugElutingStent', 'NoStent',
            'Revascularization_Complete', 'ReAMI_inhospital', 'Bleeding_inhospital',
            'Clopidogrel', 'Ticagrelor', 'Prasugrel', 'OralAnticoagulation', 'Betablockers', 'ACEI_ARB', 'Statin']

# Hemorragia

In [None]:
X_train = df.loc[(df.Registro == 'BLEEMACS')|(df.Registro == 'RENAMI'), var_pred]
X_test = df.loc[(df.Registro == 'Vigo_Arritxaca'), var_pred]

y_train = df.loc[(df.Registro == 'BLEEMACS')|(df.Registro == 'RENAMI'), 'bleedingless12Dapt']
y_test = df.loc[(df.Registro == 'Vigo_Arritxaca'), 'bleedingless12Dapt']

### Random Forest

In [None]:
# cross-validation

clf = RandomForestClassifier(max_depth= 2, max_features= "auto",random_state=1234)
scores = cross_val_score(clf, X_train, y_train, cv=10, scoring='roc_auc')
print(scores ,'\n\n')
print(scores.mean())

In [None]:
clf = clf.fit(X_train, y_train)
y_prob = clf.predict_proba(X_test)
print(roc_auc_score(y_test, y_prob[:,1]))

In [None]:
clf.feature_importances_

In [None]:
visualizer = ROCAUC(clf, classes=["No bleeding", "bleeding"])

visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
visualizer.score(X_test, y_test)        # Evaluate the model on the test data
visualizer.poof();

In [None]:
# visualizer = PrecisionRecallCurve(clf, classes=['no bleeding', 'bleeding'])

# visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
# visualizer.score(X_test, y_test)        # Evaluate the model on the test data
# visualizer.show()

In [None]:
visualizer = ConfusionMatrix(clf, classes=['no bleeding', 'bleeding'])

visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
visualizer.score(X_test, y_test)        # Evaluate the model on the test data
visualizer.show()

In [None]:
visualizer = ClassificationReport(clf, classes=['no bleeding', 'bleeding'])

visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
visualizer.score(X_test, y_test)        # Evaluate the model on the test data
visualizer.show()

---
# Reinfarto

In [None]:
X_train = df.loc[(df.Registro == 'BLEEMACS')|(df.Registro == 'RENAMI'), var_pred]
X_test = df.loc[(df.Registro == 'Vigo_Arritxaca'), var_pred]

y_train = df.loc[(df.Registro == 'BLEEMACS')|(df.Registro == 'RENAMI'), 'reAMIless12Dapt']
y_test = df.loc[(df.Registro == 'Vigo_Arritxaca'), 'reAMIless12Dapt']

### Random Forest

In [None]:
# cross-validation

clf = RandomForestClassifier(max_depth= 2, max_features= 1,random_state=1234)
scores = cross_val_score(clf, X_train, y_train, cv=10, scoring='roc_auc')
print(scores ,'\n\n')
print(scores.mean())

In [None]:
clf = clf.fit(X_train, y_train)
y_prob = clf.predict_proba(X_test)
print(roc_auc_score(y_test, y_prob[:,1]))

In [None]:
clf.feature_importances_

In [None]:
visualizer = ROCAUC(clf, classes=["No reAMI", "reAMI"])

visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
visualizer.score(X_test, y_test)        # Evaluate the model on the test data
visualizer.poof();

In [None]:
# visualizer = PrecisionRecallCurve(clf, classes=['no reAMI', 'reAMI'])

# visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
# visualizer.score(X_test, y_test)        # Evaluate the model on the test data
# visualizer.show()

In [None]:
visualizer = ConfusionMatrix(clf, classes=['no reAMI', 'reAMI'])

visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
visualizer.score(X_test, y_test)        # Evaluate the model on the test data
visualizer.show()

In [None]:
visualizer = ClassificationReport(clf, classes=['no reAMI', 'reAMI'])

visualizer.fit(X_train, y_train)        # Fit the training data to the visualizer
visualizer.score(X_test, y_test)        # Evaluate the model on the test data
visualizer.show()