# Scikit Learn

In [267]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split # permette di splittare i dati in train e test
from sklearn.metrics import accuracy_score # permette di calcolare l'accuratezza del modello
from sklearn.preprocessing import LabelEncoder # permette di convertire delle stringhe in numeri
from sklearn import datasets # permette di caricare alcuni datasets

## <span style='background :yellow'>KNN

In [268]:
from sklearn.neighbors import KNeighborsClassifier # algoritmo KNN (classificazione)

### <span style='background :yellow'>Import dei dati

In [269]:
df = pd.read_csv('Dataset/car.data')
df.columns = ['Buying', 'Maint', 'Doors', 'Persons', 'Lug boot', 'Safety', 'Evalutation']
df

Unnamed: 0,Buying,Maint,Doors,Persons,Lug boot,Safety,Evalutation
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc
...,...,...,...,...,...,...,...
1722,low,low,5more,more,med,med,good
1723,low,low,5more,more,med,high,vgood
1724,low,low,5more,more,big,low,unacc
1725,low,low,5more,more,big,med,good


### <span style='background :yellow'>Preparazione dei dati
Questo step prevede:
- eliminazione di valori vuoti, nulli, duplicati (se presenti)
- estrazione di X ed y

In [270]:
X = df[['Buying', 'Maint', 'Safety']].values
y = df['Evalutation']

In [271]:
X

array([['vhigh', 'vhigh', 'med'],
       ['vhigh', 'vhigh', 'high'],
       ['vhigh', 'vhigh', 'low'],
       ...,
       ['low', 'low', 'low'],
       ['low', 'low', 'med'],
       ['low', 'low', 'high']], dtype=object)

In [272]:
y

0       unacc
1       unacc
2       unacc
3       unacc
4       unacc
        ...  
1722     good
1723    vgood
1724    unacc
1725     good
1726    vgood
Name: Evalutation, Length: 1727, dtype: object

### <span style='background :yellow'>Conversione dei valori testuali in numeri

In [273]:
label_encoder = LabelEncoder()

# X
for i in range(len(X[0])): # ciclo per colonne
    X[:,i] = label_encoder.fit_transform(X[:,i]) # trasformo ogni colonna

# y
y_mapping = {'unacc':0, 'acc':1, 'good':2, 'vgood':3}
y = np.array(y.map(y_mapping))

In [274]:
X

array([[3, 3, 2],
       [3, 3, 0],
       [3, 3, 1],
       ...,
       [1, 1, 1],
       [1, 1, 2],
       [1, 1, 0]], dtype=object)

In [275]:
y

array([0, 0, 0, ..., 0, 2, 3], dtype=int64)

### <span style='background :yellow'>Addestramento e predizione

In [216]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2) # split dei dati in train (80%) e test (20%)

model = KNeighborsClassifier(n_neighbors=25) # creazione del modello
model.fit(X_train,y_train) # train del modello
predictions = model.predict(X_test) # predizioni del modello

score = accuracy_score(y_test, predictions) # valutazione accuratezza
print(score)

print(f'{y_test}\n')
print(f'{predictions}\n')

0.7138728323699421
[2 1 0 1 0 1 1 0 1 0 1 0 0 0 0 1 1 1 1 1 0 0 0 2 2 0 0 0 0 0 0 2 0 0 1 2 0
 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 2 3 0 1 0 3 0 3 0 0 1 0 1 0 0 0 0 0
 1 0 1 3 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 2 0 0 1 0 1 2 0 1 0 3
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 3 0 0 0 0 0 1 0 0 0 0
 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 2 1 0 0 1 0 0 0 2 0 1 0 0 1 0 0 0 2 0
 3 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0
 0 0 0 2 0 0 0 1 0 0 0 1 0 2 0 0 2 0 0 1 0 0 1 0 0 1 0 0 3 1 0 1 0 0 1 1 2
 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 3 0 1 0 0 0 0 0 0 0 3 1 1 0 0 2 0 0 0 0 0
 0 0 2 0 3 1 0 0 1 1 0 0 0 0 0 1 1 0 0 1 0 0 2 0 0 0 0 0 1 0 1 0 0 0 0 0 0
 0 3 0 3 1 0 0 0 0 1 1 0 0]

[0 1 0 1 0 0 1 0 0 3 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 3 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 3 0
 1 0 1 0 0 3 0 0 1 0 0 0 0 0 0 0 3 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 3 0 0 0 0 0 1 1 0 0 0 0

## <span style='background :yellow'>SVM

In [230]:
from sklearn.svm import SVC

### <span style='background :yellow'>Import dei dati

In [257]:
df = datasets.load_iris()
X = df.data
y = df.target

classes_map = {0:'Setosa', 1:'Versicolour', 2:'Virginica'}


### <span style='background :yellow'>Addestramento e predizione

In [293]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2) # split dei dati in train (80%) e test (20%)

model = SVC() # creazione del modello
model.fit(X_train,y_train) # train del modello
predictions = model.predict(X_test) # predizioni del modello

score = accuracy_score(y_test, predictions) # valutazione accuratezza
print(score)

0.7398843930635838


In [294]:
print(f'result:     {y_test}')
print(f'prediction: {predictions}')

result:     [0 0 1 0 0 2 2 0 0 2 0 0 3 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 3 1 0 0 0 1
 0 0 2 1 0 0 3 0 0 0 0 1 2 0 0 0 0 0 0 0 0 0 2 0 0 0 0 1 0 0 1 1 0 0 0 0 3
 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 3 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0
 1 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 3 0 0 0 1
 0 1 0 0 1 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 1 0 3 0 0 0 0 0 1 0 0 0 0 1 0 0 0
 0 0 0 0 1 0 1 0 0 0 0 1 1 0 3 0 0 0 3 1 1 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 1
 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1 2 2 0 0 0 1 0 0 0 0
 3 0 1 0 1 0 1 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0
 1 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1 1 0 0 3 0
 1 1 0 1 0 1 0 2 0 0 0 0 0]
prediction: [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0
 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0

In [295]:
y_test_label = pd.Series(y_test).map(classes_map) # conversione label in stringa
predictions_label = pd.Series(predictions).map(classes_map) # conversione delle predizioni in stringa

print(f'result:     {y_test_label}')
print(f'prediction: {y_test_label}')

result:     0           Setosa
1           Setosa
2      Versicolour
3           Setosa
4           Setosa
          ...     
341         Setosa
342         Setosa
343         Setosa
344         Setosa
345         Setosa
Length: 346, dtype: object
prediction: 0           Setosa
1           Setosa
2      Versicolour
3           Setosa
4           Setosa
          ...     
341         Setosa
342         Setosa
343         Setosa
344         Setosa
345         Setosa
Length: 346, dtype: object
