# CustomSVR for censored datasets

In [1]:
import pandas as pd
import numpy as np
import customsvr
from datasets import *
from training import *

from sklearn.preprocessing import StandardScaler, QuantileTransformer
from sklearn.neighbors import LocalOutlierFactor

## Standard CustomSVR

In [6]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
print("Custom SVR on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [1, 5, 10], 'epsilon':[0.01, 0.1, 1, 10, 50], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

best_result = (0,-np.inf)

for i in range(5):
    curr_result = SVR_gridsearch_holdout(X, y, customsvr.StandardCensSVR, param_grid, 10, 15, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
    if curr_result[1] > best_result[1]:
        best_result = curr_result

print("Best parameters set found on development set:")
print(best_result[0])
print("Test score: %f" % best_result[1])

mean_score = random_split_tests(X, y, customsvr.StandardCensSVR, best_result[0], 25, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
print("Mean score of model on random test splits: %f" % mean_score)

Custom SVR on dogs dataset

Best parameters set found on development set:
{'C': 1, 'coef0': 1, 'degree': 3, 'epsilon': 1, 'gamma': 0.001, 'kernel': 'poly'}
Test score: 0.727273
Mean score of model on random test splits: 0.630220


## SVCR

In [7]:
dogs = load_skl_dogs_2016(NApolicy='normal', censSVR=True)
dogs.data.shape

(161, 17)

In [8]:
print("SVCR on dogs dataset\n")

X, y = dogs.data, dogs.target
param_grid = [
    {'C': [1, 5, 10], 'gamma': [0.01, 0.001, 0.0001], 'degree': [2,3], 'coef0': [0, 1, 10, 100], 'kernel': ['poly']}
 ]

best_result = (0,-np.inf)

for i in range(5):
    curr_result = SVR_gridsearch_holdout(X, y, customsvr.SVCR, param_grid, 10, 15, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
    if curr_result[1] > best_result[1]:
        best_result = curr_result

print("Best parameters set found on development set:")
print(best_result[0])
print("Test score: %f" % best_result[1])

mean_score = random_split_tests(X, y, customsvr.SVCR, best_result[0], 25, scaler=StandardScaler, outlier_detector=LocalOutlierFactor(), censSVR=True, custom_metric="c-index")
print("Mean score of model on random test splits: %f" % mean_score)

SVCR on dogs dataset

Best parameters set found on development set:
{'C': 5, 'coef0': 100, 'degree': 3, 'gamma': 0.0001, 'kernel': 'poly'}
Test score: 0.676471
Mean score of model on random test splits: 0.615029
