In [7]:
import numpy as np
from sklearn.datasets import make_moons

# from sklearn.datasets import fetch_openml
# mnist = fetch_openml('mnist_784', version=1)

# X, y = mnist["data"], mnist["target"]
# n_test = 10000
# n_validation = 10000

X, y = make_moons(n_samples = 10000, noise = 0.4)
n_test = 0.2
n_validation = 0.2

In [8]:
import matplotlib.pyplot as plt

print(f"X shape: {X.shape}")
print(f"y example: {y[0]}")

# plt.figure()
# plt.imshow(X[0].reshape(28, 28), cmap="binary")
# plt.show()

X shape: (10000, 2)
y example: 0


In [9]:
from sklearn.model_selection import train_test_split, GridSearchCV

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = n_test, random_state=42)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size = n_validation, random_state=42)
print(X_train.shape)


(6000, 2)


In [10]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.model_selection import GridSearchCV

# Find best params for SVC
params_grid = [
    {
        "gamma":  [0.1, 1, 10, 100],
        "C" : [0.1, 1, 10, 100]
    }
]
gaussian = SVC(kernel="rbf")
gaussian_search = GridSearchCV(gaussian, cv=5, param_grid=params_grid, refit=True)
gaussian_search.fit(X_train, y_train)
print(f"Best params : {gaussian_search.best_params_}")
print(f"Best score : {gaussian_search.best_score_}")




Best params : {'C': 100, 'gamma': 1}
Best score : 0.8688333333333333


In [11]:
# Find best params for RandomForest
params_randomForest = [
    {
        "n_estimators": [10, 100, 1000],
        "max_depth" : [10, 100, 1000],
        "max_leaf_nodes": [10 ,100, 1000]
    }
]
randomForest = RandomForestClassifier(n_jobs=-1)
randomForest_search = GridSearchCV(randomForest, cv=5, param_grid=params_randomForest, refit=True)
randomForest_search.fit(X_train, y_train)
print(f"Best params : {randomForest_search.best_params_}")
print(f"Best score : {randomForest_search.best_score_}")


Best params : {'max_depth': 10, 'max_leaf_nodes': 100, 'n_estimators': 1000}
Best score : 0.8658333333333333


In [17]:
# Find best params for Adaboost
params_adaboost = [
    {
        "n_estimators": [10, 100, 1000],
    }
]
adaboost = AdaBoostClassifier(base_estimator = None)
adaboost_search = GridSearchCV(adaboost, cv=5, param_grid=params_adaboost, refit=True)
adaboost_search.fit(X_train, y_train)
print(f"Best params : {adaboost_search.best_params_}")
print(f"Best score : {adaboost_search.best_score_}")

Best params : {'n_estimators': 100}
Best score : 0.8608333333333333


In [23]:
# On combine les différents modèles pour faire un modèle à vote
from sklearn.ensemble import VotingClassifier

svm_votant = SVC(kernel="rbf", **gaussian_search.best_params_)
random_forest_votant = RandomForestClassifier(n_jobs=-1, **randomForest_search.best_params_)
adaboost_votant = AdaBoostClassifier(**adaboost_search.best_params_)

voting_clf = VotingClassifier(
    estimators = [
        ('svc', svm_votant),
        ('random_forest', random_forest_votant),
        ('adaboost', adaboost_votant)
    ],
    voting='hard'
)
# voting_clf = VotingClassifier(
#     estimators = [
#         ('svc', gaussian_search.best_estimator_),
#         ('random_forest', randomForest_search.best_estimator_),
#         ('adaboost',adaboost_search.best_estimator_)
#     ],
#     voting='hard'
# )
voting_clf.fit(X_train, y_train)
# voting_clf.score(X_validation, y_validation)


VotingClassifier(estimators=[('svc',
                              SVC(C=100, cache_size=200, class_weight=None,
                                  coef0=0.0, decision_function_shape='ovr',
                                  degree=3, gamma=1, kernel='rbf', max_iter=-1,
                                  probability=False, random_state=None,
                                  shrinking=True, tol=0.001, verbose=False)),
                             ('random_forest',
                              RandomForestClassifier(bootstrap=True,
                                                     class_weight=None,
                                                     criterion='gini',
                                                     max_depth=10,
                                                     max_featu...
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                      

In [25]:
estimators = [gaussian_search, randomForest_search, adaboost_search, voting_clf]
scores = [estimator.score(X_test, y_test) for estimator in estimators]

print(f"Précisions sur le jeu de test: {scores}")

Précisions sur le jeu de test: [0.8455, 0.8435, 0.8445, 0.8455]
