# CustomSVR for censored datasets

In [1]:
import pandas as pd
import numpy as np
import customsvr
from datasets import *
from training import *

from sklearn.preprocessing import StandardScaler, QuantileTransformer
from sklearn.neighbors import LocalOutlierFactor

## Standard CustomSVR

In [6]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
print("Custom SVR on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [1, 5, 10], 'epsilon':[0.01, 0.1, 1, 10, 50], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

best_result = (0,-np.inf)

for i in range(5):
    curr_result = SVR_gridsearch_holdout(X, y, customsvr.StandardCensSVR, param_grid, 10, 15, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
    if curr_result[1] > best_result[1]:
        best_result = curr_result

print("Best parameters set found on development set:")
print(best_result[0])
print("Test score: %f" % best_result[1])

mean_score = random_split_tests(X, y, customsvr.StandardCensSVR, best_result[0], 25, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
print("Mean score of model on random test splits: %f" % mean_score)

Custom SVR on dogs dataset

Best parameters set found on development set:
{'C': 1, 'coef0': 1, 'degree': 3, 'epsilon': 1, 'gamma': 0.001, 'kernel': 'poly'}
Test score: 0.727273
Mean score of model on random test splits: 0.630220


## SVCR

In [7]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
dogs.data.shape

(161, 17)

In [8]:
print("SVCR on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [1, 5, 10], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

best_result = (0,-np.inf)

for i in range(5):
    curr_result = SVR_gridsearch_holdout(X, y, customsvr.SVCR, param_grid, 10, 15, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
    if curr_result[1] > best_result[1]:
        best_result = curr_result

print("Best parameters set found on development set:")
print(best_result[0])
print("Test score: %f" % best_result[1])

mean_score = random_split_tests(X, y, customsvr.SVCR, best_result[0], 25, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
print("Mean score of model on random test splits: %f" % mean_score)

SVCR on dogs dataset

Best parameters set found on development set:
{'C': 5, 'coef0': 100, 'degree': 3, 'gamma': 0.0001, 'kernel': 'poly'}
Test score: 0.676471
Mean score of model on random test splits: 0.615029


## SVRC

In [3]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
print("SVRC on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [(1,5), (1,10), (5,10)], 'CC': [(5,1), (10,1), (10,5)], 'epsilon':[0.1, 1, 10], 'epsilonC':[0.1, 1, 10], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

best_result = (0,-np.inf)

for i in range(1):
    curr_result = SVR_gridsearch_holdout(X, y, customsvr.SVRC, param_grid, 10, 15, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
    if curr_result[1] > best_result[1]:
        best_result = curr_result

print("Best parameters set found on development set:")
print(best_result[0])
print("Test score: %f" % best_result[1])

mean_score = random_split_tests(X, y, customsvr.SVRC, best_result[0], 25, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
print("Mean score of model on random test splits: %f" % mean_score)

SVRC on dogs dataset

Best parameters set found on development set:
{'C': (1, 10), 'CC': (10, 1), 'coef0': 100, 'degree': 2, 'epsilon': 1, 'epsilonC': 1, 'gamma': 0.001, 'kernel': 'poly'}
Test score: 0.714286
Mean score of model on random test splits: 0.590623


## RankSVMC (Simplified constraints)

In [3]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
print("RankSVMC on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [1, 5, 10], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

best_result = (0,-np.inf)

for i in range(5):
    curr_result = SVR_gridsearch_holdout(X, y, customsvr.SimpleRankSVMC, param_grid, 10, 15, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
    if curr_result[1] > best_result[1]:
        best_result = curr_result

print("Best parameters set found on development set:")
print(best_result[0])
print("Test score: %f" % best_result[1])

mean_score = random_split_tests(X, y, customsvr.SimpleRankSVMC, best_result[0], 25, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
print("Mean score of model on random test splits: %f" % mean_score)

RankSVMC on dogs dataset

Best parameters set found on development set:
{'C': 5, 'coef0': 1, 'degree': 3, 'gamma': 0.01, 'kernel': 'poly'}
Test score: 0.692308
Mean score of model on random test splits: 0.511362


## Model 1 (from Van Belle et al.)

In [2]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
print("Model 1 on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [1, 5, 10], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

best_result = (0,-np.inf)

for i in range(5):
    curr_result = SVR_gridsearch_holdout(X, y, customsvr.Model1, param_grid, 10, 15, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
    if curr_result[1] > best_result[1]:
        best_result = curr_result

print("Best parameters set found on development set:")
print(best_result[0])
print("Test score: %f" % best_result[1])

mean_score = random_split_tests(X, y, customsvr.Model1, best_result[0], 25, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
print("Mean score of model on random test splits: %f" % mean_score)

Model 1 on dogs dataset

Best parameters set found on development set:
{'C': 5, 'coef0': 10, 'degree': 3, 'gamma': 0.01, 'kernel': 'poly'}
Test score: 0.645161
Mean score of model on random test splits: 0.554633


## Model 2 (from Van Belle et al.)

In [2]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
print("Model 2 on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C_rank': [1, 5, 10], 'C_pred': [1, 5, 10], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

best_result = (0,-np.inf)

for i in range(1):
    curr_result = SVR_gridsearch_holdout(X, y, customsvr.Model2, param_grid, 10, 15, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
    if curr_result[1] > best_result[1]:
        best_result = curr_result

print("Best parameters set found on development set:")
print(best_result[0])
print("Test score: %f" % best_result[1])

mean_score = random_split_tests(X, y, customsvr.Model2, best_result[0], 25, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
print("Mean score of model on random test splits: %f" % mean_score)

Model 2 on dogs dataset

Best parameters set found on development set:
{'C_pred': 5, 'C_rank': 1, 'coef0': 100, 'degree': 2, 'gamma': 0.0001, 'kernel': 'poly'}
Test score: 0.642857
Mean score of model on random test splits: 0.598332
