In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import recall_score, precision_score, confusion_matrix
from sklearn.model_selection import train_test_split

# Dataset

In [2]:
data = pd.read_excel('./data/Coeur.xlsx')
df = data.copy()

df.head()

Unnamed: 0,AGE,SEXE,TDT,PAR,CHOLESTEROL,GAJ,ECG,FCMAX,ANGINE,DEPRESSION,PENTE,CŒUR
0,40,homme,AA,140,289,0,Normal,172,Non,0.0,Ascendant,0
1,49,femme,DNA,160,180,0,Normal,156,Non,1.0,Plat,1
2,37,homme,AA,130,283,0,ST,98,Non,0.0,Ascendant,0
3,48,femme,ASY,138,214,0,Normal,108,Oui,1.5,Plat,1
4,54,homme,DNA,150,195,0,Normal,122,Non,0.0,Ascendant,0


# Normalisation des variables continue

In [3]:
for col in df.drop(['CŒUR'], axis=1).select_dtypes(np.number).columns:
    df[col] = (df[col] - df[col].mean())/ df[col].std()

# Recodage des variables discrète

In [4]:
for col in df.select_dtypes('object').columns:
    df[col] = df[col].astype('category').cat.codes

# Division du dataset

In [5]:
train_set, test_set = train_test_split(df, random_state=0, test_size=0.3)

# Regression Logistique

In [14]:
# Initialisation du modèle
lr = LogisticRegression()

# Entraînement du modèle
lr.fit(train_set.drop(['CŒUR'], axis=1), train_set['CŒUR'])

# Performance sur le train set
lr.score(train_set.drop(['CŒUR'], axis=1), train_set['CŒUR'])

0.8629283489096573

# Arbre de décision

In [16]:
# Initialisation du modèle
tree = DecisionTreeClassifier()

# Entraînement du model
tree.fit(train_set.drop(['CŒUR'], axis=1), train_set['CŒUR'])

# Performance sur le train set
tree.score(train_set.drop(['CŒUR'], axis=1), train_set['CŒUR'])

1.0

# Performance des deux modèles

In [8]:
# Performance Régression Logistique
print(lr.score(test_set.drop(['CŒUR'], axis=1), test_set['CŒUR']))

# Performance Arbre de décision
print(tree.score(test_set.drop(['CŒUR'], axis=1), test_set['CŒUR']))

0.8369565217391305
0.8260869565217391


# Prédiction

In [9]:
# Prédiction régression logistique
lr_pred = lr.predict(test_set.drop(['CŒUR'], axis=1))

# Prédiction arbre de décision
tree_pred = tree.predict(test_set.drop(['CŒUR'], axis=1))

# Matrice de confusion

In [10]:
# Matrice de confusion régression logistique
print(confusion_matrix(test_set['CŒUR'], lr_pred))

print('------------')

# Matrice de confusion arbre de décision
print(confusion_matrix(test_set['CŒUR'], tree_pred))

[[ 92  21]
 [ 24 139]]
------------
[[ 93  20]
 [ 28 135]]


# Sensibilité

In [11]:
# Sensibité régression logistique
print(recall_score(test_set['CŒUR'], lr_pred))

# Sensibilité arbre de décision
print(recall_score(test_set['CŒUR'], tree_pred))

0.852760736196319
0.8282208588957055


# Précision

In [13]:
# Précision régression logistique
print(precision_score(test_set['CŒUR'], lr_pred))

# Précision arbre de décision
print(precision_score(test_set['CŒUR'], tree_pred))

0.86875
0.8709677419354839
