In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
df_heart = pd.read_csv("Datasets\heart.csv")

In [3]:
df_heart.head(10)


Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
5,57,1,0,140,192,0,1,148,0,0.4,1,0,1,1
6,56,0,1,140,294,0,0,153,0,1.3,1,0,2,1
7,44,1,1,120,263,0,1,173,0,0.0,2,0,3,1
8,52,1,2,172,199,1,1,162,0,0.5,2,0,3,1
9,57,1,2,150,168,0,1,174,0,1.6,2,0,2,1


In [4]:
df_heart.drop_duplicates(inplace=True)

# Preprocessing et séparation de l'output du df

In [5]:
def preprocessing_inputs(df_heart, scaler):
    df_heart = df_heart.copy()
    
    y = df_heart["output"].copy()
    X = df_heart.drop("output", axis=1).copy()
    
    return X, y 



In [6]:
X, y = preprocessing_inputs(df_heart, MinMaxScaler())

In [7]:
X

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3


In [8]:
y

0      1
1      1
2      1
3      1
4      1
      ..
298    0
299    0
300    0
301    0
302    0
Name: output, Length: 302, dtype: int64

#  split test et training

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,stratify=y, random_state=0)

In [10]:
print(X.shape, X_train.shape, X_test.shape)

(302, 13) (241, 13) (61, 13)


#  REGRESSION LOGISTIQUE (88,52%)

In [11]:
model = LogisticRegression(C=0.1, solver='liblinear')

In [12]:
model.fit(X_train, y_train)

LogisticRegression(C=0.1, solver='liblinear')

In [13]:
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, y_train)

In [14]:
training_data_accuracy

0.8506224066390041

In [15]:
X_test_prediction = model.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction, y_test)

In [16]:
testing_data_accuracy

0.8852459016393442

In [17]:
print("le score d'accuracy est de : ", testing_data_accuracy*100, "%")

le score d'accuracy est de :  88.52459016393442 %


#  DECISION TREE (73,77%)

In [18]:
model_2 = DecisionTreeClassifier(random_state=0)

In [19]:
model_2.fit(X_train, y_train)

DecisionTreeClassifier(random_state=0)

In [20]:
X_train_prediction_2 = model_2.predict(X_train)
training_data_accuracy_2 = accuracy_score(X_train_prediction_2, y_train)

In [21]:
training_data_accuracy_2

1.0

In [22]:
X_test_prediction_2 = model_2.predict(X_test)
testing_data_accuracy_2 = accuracy_score(X_test_prediction_2, y_test)

In [23]:
testing_data_accuracy_2

0.7377049180327869

# RANDOM FOREST (83,60%)

In [24]:
model_3 = RandomForestClassifier(random_state = 0)

In [25]:
model_3.fit(X_train, y_train)

RandomForestClassifier(random_state=0)

In [26]:
X_train_prediction_3 = model_3.predict(X_train)
training_data_accuracy_3 = accuracy_score(X_train_prediction_3, y_train)
training_data_accuracy_3

1.0

In [27]:
X_test_prediction_3 = model_3.predict(X_test)
testing_data_accuracy_3 = accuracy_score(X_test_prediction_3, y_test)
testing_data_accuracy_3

0.8360655737704918

# Kn neighbours (70,49%)

In [28]:
model_4 = KNeighborsClassifier(n_neighbors = 4)

In [29]:
model_4.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=4)

In [30]:
X_train_prediction_4 = model_4.predict(X_train)
training_data_accuracy_4 = accuracy_score(X_train_prediction_4, y_train)
training_data_accuracy_4

0.7468879668049793

In [31]:
X_test_prediction_4 = model_4.predict(X_test)
testing_data_accuracy_4 = accuracy_score(X_test_prediction_4, y_test)
testing_data_accuracy_4

0.7377049180327869

# Optimisation avec GridSearch


In [32]:
log_reg_params = {"penalty": ['l1', 'l2'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], "solver": ["liblinear"]}

model_opti = GridSearchCV(LogisticRegression(), log_reg_params)

In [33]:
model_opti.fit(X_train, y_train)


model_opti.best_score_

0.8259353741496598

In [34]:
log_reg = model_opti.best_estimator_

In [35]:
log_reg

LogisticRegression(C=0.1, solver='liblinear')

# Système prédictif

In [36]:
from tkinter import *
import tkinter as tk
from PIL import ImageTk, Image

In [37]:
def action():
    input_data = (entry1.get(), entry2.get(), entry3.get(), entry4.get(), entry5.get(), entry6.get(),
                  entry7.get(), entry8.get(), entry9.get(), entry10.get(), entry11.get(), entry12.get(), entry13.get())
    input_data_as_numpy_array = np.asarray(input_data, dtype='float64')

    input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

    prediction = model.predict([input_data_as_numpy_array])
    
    entry_final_text.delete(0, END)
    
    if (prediction[0]== 0):        
        entry_final_text.insert(0, "maladie cardiovasculaire  ")
        
    else:
        entry_final_text.insert(0, "pas de maladie cardiovasculaire")
        
    

In [70]:


fen = tk.Tk()
fen.title("Programme de prédiction de maladie cardiovasculaire")
fen.geometry("600x650")
fen.iconbitmap('Datasets/heart.ico')
img = ImageTk.PhotoImage(Image.open("Datasets/Téléchargement.png"))
panel = Label(fen, image = img).place(x = 180, y = 458)

input_data_age = Label(fen, text = "Entrer L'age : ")
input_data_age.place(x = 50, y = 50)
entry1 = Entry(fen)
entry1.place(x = 400, y = 50)

input_data_sex = Label(fen, text = "Entrer le Sexe : 1 = Masculin , 2 = féminin : ")
input_data_sex.place(x = 50, y = 70)
entry2 = Entry(fen)
entry2.place(x = 400, y = 70)

input_data_sex = Label(fen, text = "Entrer le type de chest pain : ")
input_data_sex.place(x = 50, y = 90)
entry3 = Entry(fen)
entry3.place(x = 400, y = 90)

input_data_sex = Label(fen, text = "Entrer la pression sanguine : ")
input_data_sex.place(x = 50, y = 110)
entry4 = Entry(fen)
entry4.place(x = 400, y = 110)

input_data_sex = Label(fen, text = "Entrer le taux de cholestérol : ")
input_data_sex.place(x = 50, y = 130)
entry5 = Entry(fen)
entry5.place(x = 400, y = 130)

input_data_sex = Label(fen, text = "Diabète?   1 = oui , 2 = non : ")
input_data_sex.place(x = 50, y = 150)
entry6 = Entry(fen)
entry6.place(x = 400, y = 150)

input_data_sex = Label(fen, text = "Entrer les résultats de l'électrocardiographe au repos:")
input_data_sex.place(x = 50, y = 170)
entry7 = Entry(fen)
entry7.place(x = 400, y = 170)

input_data_sex = Label(fen, text = "Entrer le battement de coeur max atteint:")
input_data_sex.place(x = 50, y = 190)
entry8 = Entry(fen)
entry8.place(x = 400, y = 190)

input_data_sex = Label(fen, text = "Asthme?   1 = oui , 2 = non:")
input_data_sex.place(x = 50, y = 210)
entry9 = Entry(fen)
entry9.place(x = 400, y = 210)

input_data_sex = Label(fen, text = "Entrer la valeur de l'oldpeak :")
input_data_sex.place(x = 50, y = 230)
entry10 = Entry(fen)
entry10.place(x = 400, y = 230)

input_data_sex = Label(fen, text = "Entrer le slp :")
input_data_sex.place(x = 50, y = 250)
entry11 = Entry(fen)
entry11.place(x = 400, y = 250)

input_data_sex = Label(fen, text = "Entrer le caa :")
input_data_sex.place(x = 50, y = 270)
entry12 = Entry(fen)
entry12.place(x = 400, y = 270)

input_data_sex = Label(fen, text = "Entrer le thall :")
input_data_sex.place(x = 50, y = 290)
entry13 = Entry(fen)
entry13.place(x = 400, y = 290)

Validation = Button(fen, text = "Envoyer les données à l'IA", command = action)
Validation.place( x = 400, y = 330)

entry_final = Entry(fen)
entry_final_text = Entry(fen, bg = "light Grey", justify = CENTER, bd = 1, font = ("Calibri",20), width = 25,)
entry_final_text.place(x = 125, y = 420, height = 40)



fen.mainloop()

