In [111]:
from cgitb import reset
import pandas as pd
import numpy as np

data = {
        'Age': np.random.randint(40, 80, 5),
        'Sex': np.random.choice([0, 1], size=5),
        'ChestPainType': np.random.choice([0, 1, 2, 3], size=5),
        'RestingBP': np.random.randint(90, 200, 5),
        'Cholesterol': np.random.randint(120, 600, 5),
        'FastingBS': np.random.choice([0, 1], size=5),
        'RestingECG': np.random.choice([0, 1, 2], size=5),
        'MaxHR': np.random.randint(70, 210, 5),
        'ExerciseAngina': np.random.choice([0, 1], size=5),
        'Oldpeak': np.random.uniform(0.0, 7.0, 5),
        'ST_Slope': np.random.choice([0, 1, 2], size=5),
        'ca': np.random.choice([0, 1, 2, 3, 4], size=5),
        'thal': np.random.choice([0, 1, 2, 3], size=5)
        }

df = pd.DataFrame(data)
print(df)

# ChestPainType
chest_pain_dummies = pd.get_dummies(df['ChestPainType'], prefix='ChestPainType', columns=[0, 1, 2, 3])
missing_values = set([0, 1, 2, 3]) - set(df['ChestPainType'])
for value in missing_values:
    chest_pain_dummies[f'ChestPainType_{value}'] = False
chest_pain_dummies = chest_pain_dummies[['ChestPainType_0', 'ChestPainType_1', 'ChestPainType_2', 'ChestPainType_3']]
chest_pain_dummies.columns = ['ChestPainType_ASY', 'ChestPainType_ATA', 'ChestPainType_NAP', 'ChestPainType_TA']

# ChestPainType
resting_ecg_dummies = pd.get_dummies(df['RestingECG'], prefix='RestingECG', columns=[0, 1, 2])
missing_values = set([0, 1, 2]) - set(df['RestingECG'])
for value in missing_values:
    resting_ecg_dummies[f'RestingECG_{value}'] = False
resting_ecg_dummies = resting_ecg_dummies[['RestingECG_0', 'RestingECG_1', 'RestingECG_2']]
resting_ecg_dummies.columns = ['RestingECG_LVH', 'RestingECG_Normal', 'RestingECG_ST']

# ExerciseAngina
exerciseAngina_dummies = pd.get_dummies(df['ExerciseAngina'], prefix='ExerciseAngina', columns=[0, 1])
missing_values = set([0, 1]) - set(df['ExerciseAngina'])
for value in missing_values:
    exerciseAngina_dummies[f'ExerciseAngina_{value}'] = False
exerciseAngina_dummies = exerciseAngina_dummies[['ExerciseAngina_0', 'ExerciseAngina_1']]
exerciseAngina_dummies.columns = ['ExerciseAngina_N', 'ExerciseAngina_Y']

# ST_Slope
st_slope_dummies = pd.get_dummies(df['ST_Slope'], prefix='ST_Slope', columns=[0, 1, 2])
missing_values = set([0, 1, 2]) - set(df['ST_Slope'])
for value in missing_values:
    st_slope_dummies[f'ST_Slope_{value}'] = False
st_slope_dummies = st_slope_dummies[['ST_Slope_0', 'ST_Slope_1', 'ST_Slope_2']]
st_slope_dummies.columns = ['ST_Slope_Down', 'ST_Slope_Flat', 'ST_Slope_Up']

# Sex
sex_dummies = pd.get_dummies(df['Sex'], prefix='Sex', columns=[0, 1])
missing_values = set([0, 1]) - set(df['Sex'])
for value in missing_values:
    sex_dummies[f'Sex_{value}'] = False
sex_dummies = sex_dummies[['Sex_0', 'Sex_1']]
sex_dummies.columns = ['Sex_F', 'Sex_M']

result = pd.concat([df['Age'], df['RestingBP'], df['Cholesterol'], df['FastingBS'], df['MaxHR'], df['Oldpeak'], sex_dummies, chest_pain_dummies, resting_ecg_dummies, exerciseAngina_dummies, st_slope_dummies], axis=1)
result


   Age  Sex  ChestPainType  RestingBP  Cholesterol  FastingBS  RestingECG  \
0   51    1              0        169          553          1           0   
1   46    0              3        137          570          0           2   
2   67    0              2        119          218          1           0   
3   73    1              0        173          183          0           1   
4   41    0              0        185          268          1           2   

   MaxHR  ExerciseAngina   Oldpeak  ST_Slope  ca  thal  
0    122               1  6.033454         1   3     2  
1    176               1  3.589269         0   2     0  
2     96               0  4.382584         1   3     0  
3    147               0  2.908545         2   4     3  
4    108               1  4.255713         0   1     2  


Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,Sex_F,Sex_M,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ExerciseAngina_N,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
0,51,169,553,1,122,6.033454,False,True,True,False,False,False,True,False,False,False,True,False,True,False
1,46,137,570,0,176,3.589269,True,False,False,False,False,True,False,False,True,False,True,True,False,False
2,67,119,218,1,96,4.382584,True,False,False,False,True,False,True,False,False,True,False,False,True,False
3,73,173,183,0,147,2.908545,False,True,True,False,False,False,False,True,False,True,False,False,False,True
4,41,185,268,1,108,4.255713,True,False,True,False,False,False,False,False,True,False,True,True,False,False


In [136]:
import joblib

model = joblib.load('../data/randomforest_model.joblib')
old_model = joblib.load('../data/modele_log_reg_2.joblib')

test = pd.read_csv('tests_data.csv').sample(n=10)
x_test = test.drop(columns=['HeartDisease'])

new_predictions = model.predict(x_test)
old_predictions = old_model.predict(x_test)
real_predictions = test['HeartDisease']

df_predictions = pd.DataFrame({
    'Features Tests': test.index,  
    'Old Predictions': old_predictions,
    'New Predictions': new_predictions,
    'Real Predictions': real_predictions
})

df_predictions

Unnamed: 0,Features Tests,Old Predictions,New Predictions,Real Predictions
193,193,0,0,0
181,181,0,0,0
51,51,0,0,0
156,156,1,1,1
185,185,1,0,0
89,89,1,1,1
127,127,0,0,0
194,194,1,1,1
48,48,0,0,0
55,55,0,0,0
