In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import json

from sklearn.model_selection import ShuffleSplit
from sklearn.metrics import mean_absolute_error

In [2]:
%matplotlib notebook
plt.style.use('seaborn-whitegrid')

In [3]:
def split_xy(df):
    y = df.Spo_decr
    x = df.drop('Spo_decr', axis=1)
    return x, y

In [8]:
def get_correlation_and_r2_patfold(gt, pred, mae):

    gt = pd.DataFrame(np.array(gt).reshape(gt.shape[0], 1))
    pred = pd.DataFrame(pred.reshape(pred.shape[0], 1))
    ccorr = pd.concat([gt, pred], axis=1).corr()
    r2 = r2_score(gt, pred)
    return ccorr, r2


def get_correlation_and_r2(gt, pred, mae):
    
    less_mae_idx = np.argmin(mae)
    new_shape = (pred[less_mae_idx].shape[0], 1)
    a = pd.DataFrame(pred[less_mae_idx].reshape(new_shape))
    b = pd.DataFrame(np.array(gt[less_mae_idx]).reshape(new_shape))
    ccorr = pd.concat([b, a], axis=1).corr()
    r2 = r2_score(b, a)
    return ccorr, r2

def split_xy(df):
    y = df.Spo_decr
    x = df.drop('Spo_decr', axis=1)
    return x, y

In [5]:
x_all = pd.read_csv('x_all.csv')
y_all_unscaled = pd.read_csv('y_all_unscaled.csv')
pat_indexes = pd.read_csv('pat_indexes.csv')

In [6]:
models_list = []

In [7]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR

from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

from sklearn.metrics import mean_absolute_error

In [13]:
def train_model_testpat(model, X, y, pat_indexes, val_pat, splitter):
    if val_pat not in range(1, 5):
        raise ValueError
    
    xy_data = pd.concat([X, y], axis=1)
    if val_pat == 1:
        test_limit = int(pat_indexes.iloc[val_pat])
        test_data = xy_data.iloc[:test_limit, :]
        train_data = xy_data.iloc[test_limit:, :]
    elif val_pat == 4:
        test_limit = int(pat_indexes.iloc[val_pat-1])
        test_data = xy_data.iloc[test_limit:, :]
        train_data = xy_data.iloc[:test_limit, :]
    else:
        test_limit_low = int(pat_indexes.iloc[val_pat-1])
        test_limit_up = int(pat_indexes.iloc[val_pat])
        test_data = xy_data.iloc[test_limit_low:test_limit_up, :]
        train_data = pd.concat([xy_data.iloc[:test_limit_low, :], xy_data.iloc[test_limit_up:, :]], axis=0)
    
    X_train, y_train = splitter(train_data)
    X_test, y_test = splitter(test_data)
    
    model.fit(X_train, y_train)
    y_preds = model.predict(X_test)
    mae = mean_absolute_error(y_preds, y_test)
    
    return y_test, y_preds, mae

In [12]:
svr = SVR(degree=3, gamma='scale')
gb = GradientBoostingRegressor()

In [23]:
s1 = train_model_testpat(svr, x_all, y_all_unscaled, pat_indexes, 1, split_xy)
s2 = train_model_testpat(svr, x_all, y_all_unscaled, pat_indexes, 2, split_xy)
s3 = train_model_testpat(svr, x_all, y_all_unscaled, pat_indexes, 3, split_xy)
s4 = train_model_testpat(svr, x_all, y_all_unscaled, pat_indexes, 4, split_xy)

g1 = train_model_testpat(gb, x_all, y_all_unscaled, pat_indexes, 1, split_xy)
g2 = train_model_testpat(gb, x_all, y_all_unscaled, pat_indexes, 2, split_xy)
g3 = train_model_testpat(gb, x_all, y_all_unscaled, pat_indexes, 3, split_xy)
g4 = train_model_testpat(gb, x_all, y_all_unscaled, pat_indexes, 4, split_xy)


In [39]:
models_dict = {}
models_dict['svr'] = {}
models_dict['svr']['1'] = {}
models_dict['svr']['2'] = {}
models_dict['svr']['3'] = {}
models_dict['svr']['4'] = {}
models_dict['svr']['1']['gt'] = list(s1[0])
models_dict['svr']['1']['preds'] = list(s1[1])
models_dict['svr']['1']['mae'] = s1[2]

models_dict['svr']['2']['gt'] = list(s2[0])
models_dict['svr']['2']['preds'] = list(s2[1])
models_dict['svr']['2']['mae'] = s2[2]

models_dict['svr']['3']['gt'] = list(s3[0])
models_dict['svr']['3']['preds'] = list(s3[1])
models_dict['svr']['3']['mae'] = s3[2]

models_dict['svr']['4']['gt'] = list(s4[0])
models_dict['svr']['4']['preds'] = list(s4[1])
models_dict['svr']['4']['mae'] = s4[2]

models_dict['gb'] = {}
models_dict['gb']['1'] = {}
models_dict['gb']['2'] = {}
models_dict['gb']['3'] = {}
models_dict['gb']['4'] = {}
models_dict['gb']['1']['gt'] = list(g1[0])
models_dict['gb']['1']['preds'] = list(g1[1])
models_dict['gb']['1']['mae'] = g1[2]

models_dict['gb']['2']['gt'] = list(g2[0])
models_dict['gb']['2']['preds'] = list(g2[1])
models_dict['gb']['2']['mae'] = g2[2]

models_dict['gb']['3']['gt'] = list(g3[0])
models_dict['gb']['3']['preds'] = list(g3[1])
models_dict['gb']['3']['mae'] = g3[2]

models_dict['gb']['4']['gt'] = list(g4[0])
models_dict['gb']['4']['preds'] = list(g4[1])
models_dict['gb']['4']['mae'] = g4[2]

In [40]:
class NumpyEncoder(json.JSONEncoder):
    """ Special json encoder for numpy types """
    def default(self, obj):
        if isinstance(obj, (np.int_, np.intc, np.intp, np.int8,
            np.int16, np.int32, np.int64, np.uint8,
            np.uint16, np.uint32, np.uint64)):
            return int(obj)
        elif isinstance(obj, (np.float_, np.float16, np.float32, 
            np.float64)):
            return float(obj)
        elif isinstance(obj,(np.ndarray,)): #### This is the fix
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

dumped = json.dumps(models_dict, cls=NumpyEncoder)

In [41]:
with open('data_gbsvr.json', 'w') as fp:
    json.dump(dumped, fp)
with open('data_gbsvr.json', 'r') as fp: 
    f = json.load(fp)
f = json.loads(f)


In [13]:
def visul_shnn(gt, preds):
    plt.figure()
    plt.scatter(gt, preds)
    plt.xlim([0, 50])
    plt.ylim([0, 50])