In [141]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split, cross_val_score, loss_curve
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

In [142]:
df_data = pd.read_csv('../data/train.csv')
df_data.head(1)

Unnamed: 0,id,species,margin1,margin2,margin3,margin4,margin5,margin6,margin7,margin8,...,texture55,texture56,texture57,texture58,texture59,texture60,texture61,texture62,texture63,texture64
0,1,Acer_Opalus,0.007812,0.023438,0.023438,0.003906,0.011719,0.009766,0.027344,0.0,...,0.007812,0.0,0.00293,0.00293,0.035156,0.0,0.0,0.004883,0.0,0.025391


In [143]:
df_data.shape

(990, 194)

In [144]:
X = df_data.copy().drop(['id', 'species'],axis='columns').values

species = df_data['species'].unique()

# Création des vecteurs one-hot pour la matrice de targets
t = np.zeros(df_data.shape[0])
for i in range(species.size):
    t[df_data['species'] == species[i]] = i

In [145]:
X_train, X_test, t_train, t_test = train_test_split(X, t, test_size=0.3, random_state=0)

# SVM linéaire

In [165]:
#C = 1
classifieur_linear = svm.SVC(kernel='linear') # noyau lineaire
classifieur_linear.fit(X_train, t_train)

In [167]:
classifieur_linear.loss_curve
        

AttributeError: 'SVC' object has no attribute 'loss_curve'

In [147]:
print("Score X_train, t_train : ", classifieur_linear.score(X_train, t_train))
print("Score X_test, t_test : ",classifieur_linear.score(X_test, t_test))
t_pred = classifieur_linear.predict(X_test)
#print(classification_report(t_test,t_pred))

Score X_train, t_train :  0.14285714285714285
Score X_test, t_test :  0.02356902356902357


In [148]:
all_values_C = [1,2,3,4,5,6,7,8,9,10]
# un petit C fera que certain point seront en dessous de la marge et un grand C fera que les point seront sur le vecteur de support
models_linear = []
nb_cv = 3
results_linear = np.empty((0, nb_cv))
for val_C in all_values_C:
    model_linear = svm.SVC(kernel='linear', C= val_C) # noyau lineaire
    models_linear.append(model_linear)
    result_linear = cross_val_score(model_linear, X_train, t_train, cv=nb_cv)
    results_linear = np.vstack((results_linear, result_linear))

In [149]:
max_index = np.unravel_index(np.argmax(results_linear, axis=None), results_linear.shape)
# Afficher le résultat
print("Indice du résultat le plus élevé :", max_index)
print("Valeur du résultat le plus élevé :", results_linear[max_index])

Indice du résultat le plus élevé : (9, 1)
Valeur du résultat le plus élevé : 0.7186147186147186


In [150]:
best_linear = models_linear[max_index[0]]
best_linear # pour moi il choisit toujours le plus grand C 

In [151]:
best_linear.fit(X_train, t_train)
print("Score best X_train, t_train : ", best_linear.score(X_train, t_train))
print("Score best X_test, t_test : ", best_linear.score(X_test, t_test))

Score best X_train, t_train :  0.9191919191919192
Score best X_test, t_test :  0.7407407407407407


# SVM noyau rbf

In [152]:
all_values_C = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# un petit C fera que certain point seront en dessous de la marge et un grand C fera que les point seront sur le vecteur de support
all_values_Gamma = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
all_values_Degree = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

models_rbf = []
nb_cv = 5
results_rbf = np.empty((0, nb_cv))
for val_C in all_values_C:
    for val_gamma in all_values_Gamma:
        model_rbf = svm.SVC(kernel='rbf',gamma=val_gamma, C=val_C)
        models_rbf.append(model_rbf)
        result_rbf = cross_val_score(model_rbf, X_train, t_train, cv=nb_cv)
        results_rbf = np.vstack((results_rbf, result_rbf))




In [155]:
max_index = np.unravel_index(np.argmax(results_rbf, axis=None), results_rbf.shape)
# Afficher le résultat
print("Indice du résultat le plus élevé :", max_index)
print("Valeur du résultat le plus élevé :", results_rbf[max_index])

Indice du résultat le plus élevé : (99, 4)
Valeur du résultat le plus élevé : 0.9130434782608695


In [163]:
best_rbf = models_rbf[max_index[0]]
best_rbf  # pour moi il choisit toujours le plus grand C 

In [164]:
best_rbf.fit(X_train, t_train)
print("Score best X_train, t_train : ", best_rbf.score(X_train, t_train))
print("Score best X_test, t_test : ", best_rbf.score(X_test, t_test))

Score best X_train, t_train :  0.9797979797979798
Score best X_test, t_test :  0.8619528619528619


# SVM Noyau Poly

In [158]:
all_values_C = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
all_values_Degree = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

models_poly = []
nb_cv = 5
results_poly = np.empty((0, nb_cv))
for val_C in all_values_C:
    for val_deg in all_values_Degree:
        model_poly = svm.SVC(kernel="poly", degree=val_deg, gamma="auto", C=val_C)
        models_poly.append(model_poly)
        result_poly = cross_val_score(model_poly, X_train, t_train, cv=nb_cv)
        results_poly = np.vstack((results_poly, result_poly))



In [159]:
max_index = np.unravel_index(np.argmax(results_rbf, axis=None), results_rbf.shape)
# Afficher le résultat
print("Indice du résultat le plus élevé :", max_index)
print("Valeur du résultat le plus élevé :", results_poly[max_index])

Indice du résultat le plus élevé : (99, 4)
Valeur du résultat le plus élevé : 0.09420289855072464


In [161]:
best_poly = models_rbf[max_index[0]]
best_poly  # pour moi il choisit toujours le plus grand C 

In [162]:
best_poly .fit(X_train, t_train)
print("Score best X_train, t_train : ", best_poly .score(X_train, t_train))
print("Score best X_test, t_test : ", best_poly .score(X_test, t_test))

Score best X_train, t_train :  0.9797979797979798
Score best X_test, t_test :  0.8619528619528619
