# CustomSVR for censored datasets

In [1]:
import pandas as pd
import numpy as np
import customsvr
from datasets import *
from training import *

from sklearn.preprocessing import StandardScaler, QuantileTransformer
from sklearn.neighbors import LocalOutlierFactor

## Standard CustomSVR

In [3]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
print("Custom SVR on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [1, 5, 10], 'epsilon':[0.01, 0.1, 1, 10, 50], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

result = SVR_gridsearch_holdout(X, y, customsvr.StandardCensSVR, param_grid, 10, 15, runs=5, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")

print("Best parameters set found on development set:")
print(result[0])
print("Test score: %f" % result[1])

Custom SVR on dogs dataset

Best parameters set found on development set:
{'C': 5, 'coef0': 100, 'degree': 2, 'epsilon': 0.01, 'gamma': 0.01, 'kernel': 'poly'}
Test score: 0.607143


## SVCR

In [4]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
dogs.data.shape

(161, 17)

In [5]:
print("SVCR on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [1, 5, 10], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

result = SVR_gridsearch_holdout(X, y, customsvr.SVCR, param_grid, 10, 15, runs=5, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")

print("Best parameters set found on development set:")
print(result[0])
print("Test score: %f" % result[1])

SVCR on dogs dataset

Best parameters set found on development set:
{'C': 5, 'coef0': 1, 'degree': 2, 'gamma': 0.001, 'kernel': 'poly'}
Test score: 0.750000


## SVRC

In [2]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
print("SVRC on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [(1,5), (1,10), (5,10)], 'CC': [(5,1), (10,1), (10,5)], 'epsilon':[0.1, 1, 10], 'epsilonC':[0.1, 1, 10], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

result = SVR_gridsearch_holdout(X, y, customsvr.SVRC, param_grid, 10, 15, runs=1, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")

print("Best parameters set found on development set:")
print(result[0])
print("Test score: %f" % result[1])

SVRC on dogs dataset

Best parameters set found on development set:
{'C': (1, 5), 'CC': (10, 5), 'coef0': 100, 'degree': 2, 'epsilon': 1, 'epsilonC': 0.1, 'gamma': 0.01, 'kernel': 'poly'}
Test score: 0.681818


## RankSVMC (Simplified constraints)

In [3]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
print("RankSVMC on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [1, 5, 10], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

result = SVR_gridsearch_holdout(X, y, customsvr.SimpleRankSVMC, param_grid, 10, 15, runs=5, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")

print("Best parameters set found on development set:")
print(result[0])
print("Test score: %f" % result[1])

RankSVMC on dogs dataset

Best parameters set found on development set:
{'C': 10, 'coef0': 0, 'degree': 3, 'gamma': 0.01, 'kernel': 'poly'}
Test score: 0.464286


## Model 1 (from Van Belle et al.)

In [4]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
print("Model 1 on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [1, 5, 10], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

result = SVR_gridsearch_holdout(X, y, customsvr.Model1, param_grid, 10, 15, runs=5, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")

print("Best parameters set found on development set:")
print(result[0])
print("Test score: %f" % result[1])

Model 1 on dogs dataset

Best parameters set found on development set:
{'C': 10, 'coef0': 100, 'degree': 2, 'gamma': 0.01, 'kernel': 'poly'}
Test score: 0.548387


## Model 2 (from Van Belle et al.)

In [2]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
print("Model 2 on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C_rank': [1, 5, 10], 'C_pred': [1, 5, 10], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

result = SVR_gridsearch_holdout(X, y, customsvr.Model2, param_grid, 10, 15, runs=2, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")

print("Best parameters set found on development set:")
print(result[0])
print("Test score: %f" % result[1])

Model 2 on dogs dataset

Best parameters set found on development set:
{'C_pred': 5, 'C_rank': 1, 'coef0': 0, 'degree': 2, 'gamma': 0.001, 'kernel': 'poly'}
Test score: 0.533333
