In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score, precision_score, confusion_matrix
from sklearn.model_selection import train_test_split

# Dataset

In [2]:
data = pd.read_excel('./data/Coeur.xlsx')
df = data.copy()

df.head()

Unnamed: 0,AGE,SEXE,TDT,PAR,CHOLESTEROL,GAJ,ECG,FCMAX,ANGINE,DEPRESSION,PENTE,CŒUR
0,40,homme,AA,140,289,0,Normal,172,Non,0.0,Ascendant,0
1,49,femme,DNA,160,180,0,Normal,156,Non,1.0,Plat,1
2,37,homme,AA,130,283,0,ST,98,Non,0.0,Ascendant,0
3,48,femme,ASY,138,214,0,Normal,108,Oui,1.5,Plat,1
4,54,homme,DNA,150,195,0,Normal,122,Non,0.0,Ascendant,0


# Normalisation des variables continue

In [3]:
for col in df.drop(['CŒUR'], axis=1).select_dtypes(np.number).columns:
    df[col] = df[col] / df[col].max()

df

Unnamed: 0,AGE,SEXE,TDT,PAR,CHOLESTEROL,GAJ,ECG,FCMAX,ANGINE,DEPRESSION,PENTE,CŒUR
0,0.519481,homme,AA,0.70,0.479270,0.0,Normal,0.851485,Non,0.000000,Ascendant,0
1,0.636364,femme,DNA,0.80,0.298507,0.0,Normal,0.772277,Non,0.161290,Plat,1
2,0.480519,homme,AA,0.65,0.469320,0.0,ST,0.485149,Non,0.000000,Ascendant,0
3,0.623377,femme,ASY,0.69,0.354892,0.0,Normal,0.534653,Oui,0.241935,Plat,1
4,0.701299,homme,DNA,0.75,0.323383,0.0,Normal,0.603960,Non,0.000000,Ascendant,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,0.584416,homme,AT,0.55,0.437811,0.0,Normal,0.653465,Non,0.193548,Plat,1
914,0.883117,homme,ASY,0.72,0.320066,1.0,Normal,0.698020,Non,0.548387,Plat,1
915,0.740260,homme,ASY,0.65,0.217247,0.0,Normal,0.569307,Oui,0.193548,Plat,1
916,0.740260,femme,AA,0.65,0.391376,0.0,LVH,0.861386,Non,0.000000,Plat,1


# Recodage des variables discrète

In [4]:
for col in df.select_dtypes('object').columns:
    df[col] = df[col].astype('category').cat.codes

# Division du dataset

In [5]:
train_test, test_set = train_test_split(df, random_state=0, test_size=0.3)


# Intanciation et entraînement du modèle avec des données de test

In [6]:
model = LogisticRegression()

model.fit(train_test.drop(['CŒUR'], axis=1), train_test['CŒUR'])

model.score(train_test.drop(['CŒUR'], axis=1), train_test['CŒUR'])

0.8660436137071651

# Prédiction

In [7]:
pred = model.predict(test_set.drop(['CŒUR'], axis=1))

array([1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1,
       1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0,
       1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1])

# Matrice de confusion

In [8]:
confusion_matrix(test_set['CŒUR'], pred)

array([[ 90,  23],
       [ 17, 146]])

# Sensibilité

In [9]:
recall_score(test_set['CŒUR'], pred)

0.8957055214723927

# Précision

In [10]:
precision_score(test_set['CŒUR'], pred)

0.863905325443787