# Support Vector Machine - Test Features
This notebook tests sklearn implementations used for SVM

author = Caroline Magg <br>
date = 20 August 2020 <br> 

______________________________________
history: <br>
2020-20-08 first SVM test <br>
2020-29-08 SVM brain test <br>
2020-30-08 K-fold cross validation <br>
2020-04-09 Test how to make prediction with trained SVR <br>

In [None]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import scipy
import logging as log
import skimage.segmentation as segmentation
from scipy.spatial.distance import cdist

In [None]:
log.basicConfig(format='%(levelname)s:%(message)s', level=log.INFO)

### Add dependencies

In [None]:
# add KidsBrainProject main folder to paths
sys.path.append(os.path.abspath('../../'))
sys.path.append(os.path.abspath('../utils/'))

In [None]:
# add path to data here
path_data = "../../Data/" 

data_brain = pd.read_csv("../../Data/data_Brain.csv")

# Chi-squared test

In [None]:
from sklearn.feature_selection import chi2

In [None]:
features = pd.read_csv("../../Data/features/features_Brain.csv", sep=';')
errors = pd.read_csv("../../Data/features/error_metrics_Brain.csv", sep=';')
print(len(features), len(errors))

X = np.array(features[:1000])
X_test = np.array(features[1000:])
y = errors[:1000]
y_test = errors[1000:]
np.shape(X), np.shape(X_test), np.shape(y), np.shape(y_test)

In [None]:
y_ = np.array(y['jaccard_dist']).reshape(-1, 1)
y_f = np.array(y['dice_coeff'])
np.shape(y_), np.shape(y_f), y_f.dtype, y_.dtype

# K-Fold cross validation

In [None]:
from sklearn.model_selection import KFold

In [None]:
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
y = np.array([1, 2, 3, 4])

In [None]:
kf = KFold(n_splits=2)
kf.get_n_splits(X)

In [None]:
for train_index, test_index in kf.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

In [None]:
features = pd.read_csv("../../Data/features/features_Brain.csv", sep=';')
errors = pd.read_csv("../../Data/features/error_metrics_Brain.csv", sep=';')
print(len(features), len(errors))

X = np.array(features[:1000])
X_test = np.array(features[1000:])
y = np.array(errors[:1000])
y_test = np.array(errors[1000:])
np.shape(X), np.shape(X_test), np.shape(y), np.shape(y_test)

In [None]:
kf = KFold(n_splits=10)
print(kf)

In [None]:
for train_index, test_index in kf.split(X):
    print("TRAIN:", np.shape(train_index), "TEST:", np.shape(test_index))

# SVM Tests

In [None]:
from sklearn import svm

### Regression

#### Synthetic data

In [None]:
# Generate sample data
X = np.sort(5 * np.random.rand(40, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - np.random.rand(8))
np.shape(X), np.shape(y)

In [None]:
fig = plt.subplots(nrows=1, ncols=1, figsize=(5,5), sharey=True)
plt.scatter(X,y, facecolor="none", edgecolor="k", s=50, label='training data')

In [None]:
regr = svm.SVR(kernel='rbf', C=100, epsilon=0.1, degree=3)

In [None]:
regr.fit(X, y)

In [None]:
regr.score(X,y)

In [None]:
fig = plt.subplots(nrows=1, ncols=1, figsize=(5,5), sharey=True)
plt.scatter(X,y, facecolor="none", edgecolor="k", s=50, label='training data')
plt.scatter(X, regr.fit(X, y).predict(X))
plt.show()

#### Prediction

In [None]:
sv = regr.support_vectors_
b = regr.intercept_
dual_coeff = regr.dual_coef_
gamma = regr._gamma

In [None]:
gamma, b, sv, dual_coeff

In [None]:
regr.gamma

In [None]:
idx = 12
X_test = X[idx].reshape(1,-1)
y_test = y[idx]
np.shape(X_test), np.shape(y_test)

In [None]:
X_test  = np.array(0.5).reshape(1,-1)

In [None]:
def make_prediction(X, dual_coeff, sv, gamma, b):
    return np.sum(dual_coeff.T*np.exp(-gamma*(np.abs(sv-X)**2))) + b

In [None]:
make_prediction(X_test, dual_coeff, sv, gamma, b)

In [None]:
regr.predict(X_test)

#### Real data

In [None]:
features = pd.read_csv("../../Data/features/features_Brain.csv", sep=';')
errors = pd.read_csv("../../Data/features/error_metrics_Brain.csv", sep=';')
print(len(features), len(errors))

X = np.array(features[:1000])
X_test = np.array(features[1000:])
y = np.array(errors[:1000])
y_test = np.array(errors[1000:])
np.shape(X), np.shape(X_test), np.shape(y), np.shape(y_test)

In [None]:
regr = svm.SVR(kernel='rbf', C=100, epsilon=0.1, degree=3)

In [None]:
regr.fit(X,y[:,0])

In [None]:
regr.score(X,y[:,0])

In [None]:
regr.dual_coef_, np.shape(regr.dual_coef_)

In [None]:
np.shape(regr.support_vectors_)

In [None]:
regr.intercept_

In [None]:
errors_pred = regr.predict(X_test)
np.mean(np.abs(errors_pred - y_test[:,0]))

In [None]:
plt.scatter(y_test[:,1], errors_pred)

### Mulitoutput Regressor

In [None]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.datasets import make_regression

#### Synthetic data

In [None]:
model = svm.SVR()
wrapper = MultiOutputRegressor(model)

In [None]:
wrapper

In [None]:
X, y = make_regression(n_samples=100, n_features=10, n_informative=5, n_targets=4, random_state=1, noise=0.5)

In [None]:
wrapper.fit(X, y)

In [None]:
row = [0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249]
yhat = wrapper.predict([row])
yhat

#### Predict

In [None]:
wrapper.estimators_

In [None]:
def make_prediction(X, sv, dual_coeff, gamma, b):
    return np.sum(dual_coeff.T*np.exp(-gamma*(np.abs(sv-X)**2))) + b

In [None]:
sv = []
b = []
dual_coeff = []
gamma = []
for estimator in wrapper.estimators_:
    sv.append(estimator.support_vectors_)
    b.append(estimator.intercept_)
    dual_coeff.append(estimator.dual_coef_)
    gamma.append(estimator._gamma)

In [None]:
row = np.array([0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249])
wrapper.predict([row])

In [None]:
wrapper.estimators_[0].predict(row.reshape(1,-1))

In [None]:
result = np.zeros((len(wrapper.estimators_)))
for idx, sv_, b_, dc_, g_ in zip(range(len(wrapper.estimators_)), sv, b, dual_coeff, gamma):
    result[idx] = make_prediction(row, sv_, dc_, g_, b_)
result

#### Real data

In [None]:
features = pd.read_csv("features_Brain.csv", sep=';')
errors = pd.read_csv("error_metrics_Brain.csv", sep=';')
print(len(features), len(errors))

X = np.array(features[:1000])
X_test = np.array(features[1000:])
y = np.array(errors[:1000])
y_test = np.array(errors[1000:])
np.shape(X), np.shape(X_test), np.shape(y), np.shape(y_test)

In [None]:
model = svm.SVR()
wrapper = MultiOutputRegressor(model)

In [None]:
y_ = y
wrapper.fit(X, y_)

In [None]:
np.shape(y_)

In [None]:
wrapper.score(X,y_)

In [None]:
errors_pred = wrapper.predict(X_test)

In [None]:
plt.scatter(y_test[:,0], errors_pred[:,0])

In [None]:
plt.scatter(y_test[:,1], errors_pred[:,1])

In [None]:
plt.scatter(y_test[:,2], errors_pred[:,2])

In [None]:
plt.scatter(y_test[:,3], errors_pred[:,3])

In [None]:
plt.scatter(y_test[:,4], errors_pred[:,4])

### Classificator

In [None]:
X = [[0, 0], [1, 1], [1.5, 1.5], [3, 3], [-1,-1]]
y = [0, 1, 1, 1,0]
X, y

In [None]:
clf = svm.SVC()

In [None]:
clf.fit(X, y)

In [None]:
clf.predict([[2., 2.]])

In [None]:
clf.support_vectors_, clf.n_support_

In [None]:
clf = svm.NuSVC(gamma='auto')

In [None]:
clf.fit(X, y)

In [None]:
clf.predict([[2., 2.]])

In [None]:
clf.support_vectors_, clf.n_support_