This notebook can be run on colab: [here](https://colab.research.google.com/github/DataScience4Geoscience/Toulouse2020/blob/master/Notebooks/Classification/N2_Importance_of_scaling.ipynb)

In [None]:
# Import modules
%matplotlib inline
import matplotlib
import scipy as sp
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate data
sp.random.seed(10)
X = sp.random.randn(200,2)
X[:,0] *= 0.1
X[:,1] *= 100
y = sp.ones((200,))
y[X[:,0]<0]=0

# Plot
plt.scatter(X[:,0], X[:,1], c=y)

In [None]:
from sklearn.svm import SVC
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.40, random_state=0, stratify=y)
model = SVC(kernel='rbf', gamma="auto", C=10)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print("Classification accuracy before scaling: {}".format(accuracy_score(y_test,y_pred)))

In [None]:
from sklearn.preprocessing import MinMaxScaler
# Scale data
sc = MinMaxScaler()
X_train_s= sc.fit_transform(X_train) # Scale data between 0 and 1
X_test_s= sc.transform(X_test) # Scale data between 0 and 1

In [None]:
model = SVC(kernel='rbf', gamma="auto", C=10)
model.fit(X_train_s,y_train)
y_pred = model.predict(X_test_s)
print("Classification accuracy after scaling: {}".format(accuracy_score(y_test,y_pred)))

In [None]:
from sklearn.preprocessing import StandardScaler
# Standardize the data
sc = StandardScaler()
X_train_s= sc.fit_transform(X_train)
X_test_s= sc.transform(X_test)

model = SVC(kernel='rbf', gamma="auto", C=10)
model.fit(X_train_s, y_train)
y_pred = model.predict(X_test_s)
print("Classification accuracy after standardization: {}".format(accuracy_score(y_test,y_pred)))

Using a different kernel, for instance a polynomial kernel

In [None]:
model = SVC(kernel='poly', gamma="auto", degree=1, C=10)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
print("Classification accuracy before scaling with polynomial kernel: {}".format(accuracy_score(y_test,y_pred)))

In [None]:
model = SVC(kernel='poly', gamma="auto", degree=1,C=10)
model.fit(X_train_s,y_train)
y_pred = model.predict(X_test_s)
print("Classification accuracy after scaling with polynomial kernel: {}".format(accuracy_score(y_test,y_pred)))