In [1]:
import time
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm, datasets
from sklearn.svm import SVC 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import  mean_squared_error

from sklearn.decomposition import PCA

from sklearn.tree import plot_tree
from sklearn.metrics import f1_score
import time

#Data cleaning and Spliting

In [2]:
ionosphere = pd.read_csv("ionosphere_data.csv", header=None)


# Séparer les données en variables d'apprentissage et de test

le = LabelEncoder()
for col in ionosphere.columns:
    if ionosphere[col].dtype == 'object':
        ionosphere[col] = le.fit_transform(ionosphere[col])
X = ionosphere.drop(0,axis=0)
Y = ionosphere.drop(0,axis=0)

X_train, X_test, y_train, y_test = train_test_split(X.drop(34, axis=1), Y[34], test_size=0.3, random_state=42)


#Decision Trees

In [3]:
start_time = time.time()
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
dt_time = time.time() -  start_time
y_pred = clf.predict(X_test)
confusion = confusion_matrix(y_test, y_pred)
print("confusion matrix :\n", confusion)
# Calculer le taux d'erreur
err = 1 - np.diag(confusion).sum() / confusion.sum()
print("error with best params : ",err)
#f_score = f1_score(y, y_pred)
print("Decision Tree execution time: {:.4f} seconds".format(dt_time))
#print(" F score: {:.2f}%".format(f_score))

confusion matrix :
 [[34  5]
 [ 1 66]]
error with best params :  0.05660377358490565
Decision Tree execution time: 0.0114 seconds


#Adaboost

In [4]:
#Train an AdaBoost classifier with decision tree base estimator
start_time = time.time()
ada_clf = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=42)
ada_clf.fit(X_train, y_train)
ada_time = time.time() - start_time
# Make predictions on the testing set
y_pred_ada = ada_clf.predict(X_test)

confusion = confusion_matrix(y_test, y_pred_ada)
print("confusion matrix :\n", confusion)
# Calculer le taux d'erreur
err = 1 - np.diag(confusion).sum() / confusion.sum()
print("error with best params : ",err)
#f_score = f1_score(y, y_pred)
print("Adaboost execution time: {:.4f} seconds".format(dt_time))
#print(" F score: {:.2f}%".format(f_score))


confusion matrix :
 [[32  7]
 [ 4 63]]
error with best params :  0.10377358490566035
Adaboost execution time: 0.0114 seconds




#SVM

In [7]:
start_time = time.time()
param_grid = {'C': [1], 'kernel': ['linear'], 'gamma': [ 1]}
svm_classifier = svm.SVC()

clf = GridSearchCV(svm_classifier, param_grid)
clf.fit(X_train, y_train)
end_time = time.time()
print('Best hyperparameters:',clf.best_params_)
# predict on test set to give probability
y_pred = clf.predict(X_test)

confusion = confusion_matrix(y_test, y_pred)
print("confusion matrix :\n", confusion)
# Calculer le taux d'erreur
err = 1 - np.diag(confusion).sum() / confusion.sum()
print("error with best params : ",err)
#f_score = f1_score(y, y_pred)
print("SVM execution time: {:.4f} seconds".format(dt_time))
#print(" F score: {:.2f}%".format(f_score))


Best hyperparameters: {'C': 1, 'gamma': 1, 'kernel': 'linear'}
confusion matrix :
 [[27 12]
 [ 4 63]]
error with best params :  0.15094339622641506
SVM execution time: 4.7239 seconds


#Neural Networks

In [5]:

start_time = time.time()
RN = MLPClassifier(hidden_layer_sizes=(5,), alpha= 10, activation = 'logistic')
RN.fit(X_train, y_train)

dt_time = time.time() -  start_time

pred = RN.predict(X_test)

confusion = confusion_matrix(y_test, pred)
print("confusion matrix :\n", confusion)
# Calculer le taux d'erreur
err = 1 - np.diag(confusion).sum() / confusion.sum()
print("error with best params : ",err)

print("Neural Networks execution time: {:.4f} seconds".format(dt_time))

confusion matrix :
 [[ 9 30]
 [ 0 67]]
error with best params :  0.28301886792452835
Neural Networks execution time: 0.1542 seconds




#Neural Networks  with PCA

In [6]:
# Réduction de dimensionnalité avec PCA
start_time = time.time()
pca = PCA(n_components=15)
Appren_pca = pca.fit_transform(X_train)
Test_pca = pca.transform(X_test)


# Utiliser GridSearchCV pour ajuster les hyperparamètres
parameters = {'hidden_layer_sizes': [(4,), (5,)], 'alpha': [0.001, 0.01, 0.1, 1, 10]}
tuned_model = GridSearchCV(MLPClassifier(), parameters, cv=5)
tuned_model.fit(Appren_pca, y_train)
print("Meilleurs hyperparamètres:", tuned_model.best_params_)

# Entraîner un nouveau modèle avec les meilleurs hyperparamètres
RN_best = MLPClassifier(hidden_layer_sizes=tuned_model.best_params_['hidden_layer_sizes'], alpha=tuned_model.best_params_['alpha'])
RN_best.fit(Appren_pca, y_train)
dt_time = time.time() -  start_time

# Faire des prédictions sur les données de test avec le meilleur modèle
pred_best = RN_best.predict(Test_pca)

# Calculer la matrice de confusion avec le meilleur modèle
confusion_best = confusion_matrix(y_test, pred_best)
print("Matrice de confusion (Meilleur modèle):")
print(confusion_best)

# Calculer le taux d'erreur avec le meilleur modèle
err_best = 1 - np.diag(confusion_best).sum() / confusion_best.sum()
print("Taux d'erreur (Meilleur modèle):", err_best)

print("Neural Networks wwith PCA and GridSearch execution time: {:.4f} seconds".format(dt_time))




Meilleurs hyperparamètres: {'alpha': 1, 'hidden_layer_sizes': (5,)}
Matrice de confusion (Meilleur modèle):
[[22 17]
 [ 2 65]]
Taux d'erreur (Meilleur modèle): 0.17924528301886788
Neural Networks wwith PCA and GridSearch execution time: 4.7239 seconds


