# Apprentissage Automatique (Machine Learning)

## Classification (Diagnostic)

In [None]:
# Load CSV file with Pandas
from pandas import read_csv
# Scikit-learn
from sklearn.model_selection import train_test_split # Split data
from sklearn.linear_model import LogisticRegression # Algorithme d'apprentissage
filename = 'datasets/pima-indians-diabetes.data.csv'
col_names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
data = read_csv(filename, names=col_names)
patient = [f'patient_{x}' for x in range(1, 769)]
data.index = patient
print(f'Shape: {data.shape}')
print(f'Dataframe\n----------\n{data}')

: 

In [None]:
# Get dataframe values as 2D-Array
array = data.values

In [None]:
# Split Input (X) annd Output (Y)
X = array[ :  , : -1]
Y = array[ : , -1]

In [None]:
# Split data into train and test
test_proportion = 0.30
seed = 11 # Permet de fixer l'ensemble de test et d'apprentissage
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_proportion, random_state=seed)

In [None]:
# Training the model
model = LogisticRegression(solver='newton-cg')
model.fit(X_train, Y_train) # Entrainement de modèle/ Apprentissage automatique

In [None]:
# Evaluation de modèle
result = model.score(X_test, Y_test)
print(f'Accuracy: {(result*100).round(2)}%')

Accuracy: 78.35%


In [None]:
prediction = model.predict(X_test)
for instance, pred, diag in zip(X_test, prediction, Y_test):
    print(f'{instance} -> Pred: {pred} vs Diag: {diag}')
    

[ 2.   81.   60.   22.    0.   27.7   0.29 25.  ] -> Pred: 0.0 vs Diag: 0.0
[  1.    130.     70.     13.    105.     25.9     0.472  22.   ] -> Pred: 0.0 vs Diag: 0.0
[  0.    107.     60.     25.      0.     26.4     0.133  23.   ] -> Pred: 0.0 vs Diag: 0.0
[  7.    114.     76.     17.    110.     23.8     0.466  31.   ] -> Pred: 0.0 vs Diag: 0.0
[ 1.    80.    74.    11.    60.    30.     0.527 22.   ] -> Pred: 0.0 vs Diag: 0.0
[ 2.    84.     0.     0.     0.     0.     0.304 21.   ] -> Pred: 0.0 vs Diag: 0.0
[ 1.    95.    74.    21.    73.    25.9    0.673 36.   ] -> Pred: 0.0 vs Diag: 0.0
[  5.    114.     74.      0.      0.     24.9     0.744  57.   ] -> Pred: 0.0 vs Diag: 0.0
[  1.    153.     82.     42.    485.     40.6     0.687  23.   ] -> Pred: 0.0 vs Diag: 0.0
[  1.    146.     56.      0.      0.     29.7     0.564  29.   ] -> Pred: 0.0 vs Diag: 0.0
[  4.    114.     64.      0.      0.     28.9     0.126  24.   ] -> Pred: 0.0 vs Diag: 0.0
[ 1.    99.    58.    10.   

## Live Prediction

In [None]:
# Sauvegarder le model en local (disk) pour l'utiliser dans une application
import pickle
model_name = 'diabetes_model.pkl'
pickle.dump(model, open(model_name, 'wb'))


In [None]:
# Importer le modèle
model_name = 'diabetes_model.pkl'
loaded_model = pickle.load(open(model_name, 'rb'))

In [None]:
# New data
import numpy
patient1 = [11,119,0,30,0,39.9,0.47,32]
patient1_array = numpy.array(patient1)
pred, proba = int(loaded_model.predict([patient1_array])), loaded_model.predict_proba([patient1_array])
print(f'Class: {pred}, Probability: {proba}')
patient2 = [10,67,74,40,54,35.3,0.378,38]
patient2_array = numpy.array(patient2)
pred, proba = int(loaded_model.predict([patient2_array])), loaded_model.predict_proba([patient2_array])
print(f'Class: {pred}, Probability: {proba}')
patient3 = [1,184,76,36,249,36.5,0.775,27]
patient3_array = numpy.array(patient3)
pred, proba = int(loaded_model.predict([patient3_array])), loaded_model.predict_proba([patient3_array])
print(f'Class: {pred}, Probability: {proba}')


Class: 1, Probability: [[0.18819645 0.81180355]]
Class: 0, Probability: [[0.87684293 0.12315707]]
Class: 1, Probability: [[0.24564393 0.75435607]]


In [None]:
patient1 = [11,119,0,30,0,39.9,0.47,32]
patient1_array = numpy.array(patient1)
pred, proba = int(loaded_model.predict([patient1_array])), loaded_model.predict_proba([patient1_array])[0]
print(f'Class: {pred}, Probability: {proba}')
patient2 = [10,67,74,40,54,35.3,0.378,38]
patient2_array = numpy.array(patient2)
pred, proba = int(loaded_model.predict([patient2_array])), loaded_model.predict_proba([patient2_array])[0]
print(f'Class: {pred}, Probability: {proba}')
patient3 = [1,184,76,36,249,36.5,0.775,27]
patient3_array = numpy.array(patient3)
pred, proba = int(loaded_model.predict([patient3_array])), loaded_model.predict_proba([patient3_array])[0]
print(f'Class: {pred}, Probability: {proba}')


Class: 1, Probability: [0.18819645 0.81180355]
Class: 0, Probability: [0.87684293 0.12315707]
Class: 1, Probability: [0.24564393 0.75435607]


In [None]:
patient1 = [11,119,0,30,0,39.9,0.47,32]
patient1_array = numpy.array(patient1)
pred, proba = int(loaded_model.predict([patient1_array])), loaded_model.predict_proba([patient1_array])[0]
print(f'Class: {pred}, Probability: {proba[pred]}')
patient2 = [10,67,74,40,54,35.3,0.378,38]
patient2_array = numpy.array(patient2)
pred, proba = int(loaded_model.predict([patient2_array])), loaded_model.predict_proba([patient2_array])[0]
print(f'Class: {pred}, Probability: {proba[pred]}')
patient3 = [1,184,76,36,249,36.5,0.775,27]
patient3_array = numpy.array(patient3)
pred, proba = int(loaded_model.predict([patient3_array])), loaded_model.predict_proba([patient3_array])[0]
print(f'Class: {pred}, Probability: {proba[pred]}')


Class: 1, Probability: 0.8118035502642829
Class: 0, Probability: 0.8768429270572059
Class: 1, Probability: 0.7543560676146516
