In [None]:
# Copyright (c) Roman Lutz. All rights reserved.
# The use and distribution terms for this software are covered by the
# Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
# which can be found in the file LICENSE.md at the root of this distribution.
# By using this software in any fashion, you are agreeing to be bound by
# the terms of this license.
# You must not remove this notice, or any other, from this software.

import pdb
import numpy as np
from sklearn.svm import SVR
from sklearn import preprocessing
from sklearn import cross_validation
from sklearn import feature_selection
from sklearn.metrics import mean_squared_error, mean_absolute_error
from create_datasets import test_players
import time
from metrics import mean_relative_error
from plots import histogram
import sklearn.model_selection
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
import sklearn
import threading


"""Values for the SVR"""
kernels = ['rbf', 'linear', 'sigmoid']
degrees = [2]
gamma_values = [0.05*k for k in range(1,4)]
C_values = [0.25*k for k in range(1, 5)]
epsilon_values = [0.05*k for k in range(1, 6)]


overall_best_params = []
def hyperparameter(x, y):
    """ 
    Get the best hyperparameters from cross validation of the training data
    Then use the best hyperparameters from (linear, rbf, sigmoid, poly) to evaluate performance on the testing data 
    Full Parameters:parameters = {'kernel':('linear', 'rbf', 'sigmoid'), 'degree':(degrees), 'C':(C_values), 'gamma':(gamma_values), 'epsilon':(epsilon_values)}
    """
    # elements of this list are dictionaries that contain the best parameters for each kernel to use on test data (2017 data)
    # Since we are predicting scores and using regression, use SVR()

    svr = sklearn.svm.SVR()
 

    # linear 
    """
    linear_kernel = ['linear']
    linear_parameters = {'kernel':(linear_kernel), 'C':(C_values), 'gamma':(gamma_values), 'epsilon':(epsilon_values)}
    clf = sklearn.model_selection.GridSearchCV(svr, linear_parameters, verbose=5, scoring='neg_mean_absolute_error')
    clf.fit(x, y)
    cvres = clf.cv_results_
    print("LINEAR:")
    for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
        print(mean_score, params)
    print("Best: " + str(clf.best_params_))
    best_linear_param = clf.best_params_
    print(type(clf.best_params_))
    overall_best_params.add(clf.best_params_)
    pdb.set_trace()

    # rbf
    rbf_kernel = ['rbf']
    rbf_parameters = {'kernel':(rbf_kernel), 'C':(C_values), 'gamma':(gamma_values), 'epsilon':(epsilon_values)}
    clf = sklearn.model_selection.GridSearchCV(svr, rbf_parameters, verbose=5, scoring='neg_mean_absolute_error')
    clf.fit(x, y)
    cvres = clf.cv_results_
    print("RBF:")
    for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
        print(mean_score, params)
    print("Best: " + str(clf.best_params_))
    overall_best_params.add(clf.best_params_)
    pdb.set_trace()

    # sigmoid
    sigmoid_kernel = ['sigmoid']
    sigmoid_parameters = {'kernel':(sigmoid_kernel), 'C':(C_values), 'gamma':(gamma_values), 'epsilon':(epsilon_values)}
    clf = sklearn.model_selection.GridSearchCV(svr, sigmoid_parameters, verbose=5, scoring='neg_mean_absolute_error')
    clf.fit(x, y)
    cvres = clf.cv_results_
    print("SIGMOID:")
    for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
        print(mean_score, params)
    print("Best: " + str(clf.best_params_))
    overall_best_params.add(clf.best_params_)
    pdb.set_trace()
    """
    # poly
    poly_kernel = ['poly']
    poly_parameters = {'kernel':(poly_kernel), 'degree':(degrees)}
    clf = sklearn.model_selection.GridSearchCV(svr, poly_parameters, verbose=5, scoring='neg_mean_absolute_error')
    clf.fit(x, y)
    cvres = clf.cv_results_
    print("POLYNOMIAL:")
    for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
        print(mean_score, params)
    print("Best: " + str(clf.best_params_))
    overall_best_params.add(clf.best_params_)
    pdb.set_trace()
    
    
   


# Only one of the feature selection methods can be chosen
FEATURE_SELECTION = False
MANUAL_FEATURE_SELECTION = False
FEATURE_NORMALIZATION = False
HYPERPARAMETER_SELECTION = True
HISTOGRAM = True


train = np.load('train.npy')
test = np.load('test.npy')

"""
# load data
# indices are
# 0: QB id
# 1: QB name
# 2: QB age
# 3: QB years pro
# 4
# 5-16: last game QB stats
# 17-28: last 10 games QB stats
# 29-32: last game defense stats
# 33-36: last 10 games defense stats
# 37: actual fantasy score = target
"""
train_x = train[:, 2:37].astype(np.float)
train_y = train[:, 37].astype(np.float)
test_x = test[:, 2:37].astype(np.float)
test_y = test[:, 37].astype(np.float)


# Feature Normalization
if FEATURE_NORMALIZATION:
    print 'started feature normalization', time.time()
    x = np.concatenate((train_x, test_x), axis=0)
    x = preprocessing.scale(x)
    train_x = x[:len(train_x)]
    test_x = x[len(train_x):]


# Recursive Feature Elimination with cross-validation (RFECV)
if FEATURE_SELECTION:
    print 'started feature selection', time.time()
    selector = feature_selection.RFECV(estimator=SVR(kernel='linear'), step=3, cv=5)
    selector.fit(train_x, train_y)
    train_x = selector.transform(train_x)
    test_x = selector.transform(test_x)
    print selector.ranking_
elif MANUAL_FEATURE_SELECTION: # leave out the two point attempts
    manual_indices = [0, 1, 2, 3, 4, 5, 8, 9, 10, 13, 14, 15, 16, 17, 20, 21, 22, 25, 26, 27, 28, 29, 30, 31, 32, 33]
    train_x = train_x[:, manual_indices]
    test_x = test_x[:, manual_indices]


# Hyperparameter Selection
if HYPERPARAMETER_SELECTION:
    hyperparameter(train_x, train_y)


