In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
from parse_data import get_data, get_modified_values, get_binary_values, make_data_scalar

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device: ", device)



def make_data(df, device, seq_len):

    x_train, y_train = [], []
    prev = []
    m = df.max()[0]
    #print(df)
    for row in df.values:
        
        if len(prev) < seq_len:
            before = [0]*(seq_len - len(prev))
            for a in prev:
                before.append(a)
            #print(before)
            x_train.append(before)
        else:   
            x_train.append(prev[-seq_len:])
        y_train.append(row[0]/m)
        prev.append(row[0]/m)
    return x_train,y_train

X, y = make_data(get_data(), None, 100)
#print(X)

Using device:  cuda


In [2]:
# gradient boosting for classification in scikit-learn
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, mean_absolute_error
from matplotlib import pyplot
# define dataset
#X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, random_state=1)
# evaluate the model
#model = GradientBoostingRegression()
#cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
#n_scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
#print('Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
# fit the model on the whole dataset
X = X[200:]
y = y[200:]
model = GradientBoostingRegressor()
model.fit(X, y)
# make a single prediction
row = X[200:]
#print(row)
yhat = model.predict(X)
print('Prediction: %f' % yhat[65], "Actual: ", y[65])

correct = 0
count = 0
for y_val, yh in zip(y, yhat):
    count += 1 
    if yhat[0] == yh:
        correct += 1
print(correct, count)
    

Prediction: 0.075499 Actual:  0.06655574043261231
1 7952


In [None]:
def try_model(x,y, lr, estimators, max_depth, loss="squared_error"):
    model = GradientBoostingRegressor(loss=loss,learning_rate=lr, n_estimators=estimators, max_depth=max_depth)
    
    model.fit(x,y)
    
    return model

def model_score(model,x,y):
    yhat = model.predict(x)
    return mean_absolute_error(y,yhat)

learning_rate = [0.001, 0.01, 0.1, 0.002, 0.02, 0.2, 0.005, 0.05, 0.5]
max_depth = [1, 3, 5, 10, None]
estimators = [50, 100, 200, 500, 1000]


best_m = None
best_loss = 10000000000000000000000
for lr in learning_rate:
    for depth in max_depth:
        for est in estimators:
            m = try_model(X,y, lr, est, depth, loss="absolute_error")
            loss = model_score(m,X,y)
            if loss < best_loss:
                print("New best model.\n\tNew loss:", loss, "\n\tOld loss:", best_loss)
                best_loss = loss
                best_m = m
                print(m.get_params())
            #else:
                #print("New model not better.\n\tNew loss:", loss, "\n\tOld loss:", best_loss)

yhat = best_m.predict(X)


New best model.
	New loss: 0.040854188505200915 
	Old loss: 10000000000000000000000
{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.001, 'loss': 'absolute_error', 'max_depth': 1, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 50, 'n_iter_no_change': None, 'random_state': None, 'subsample': 1.0, 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}
New best model.
	New loss: 0.04077000017426615 
	Old loss: 0.040854188505200915
{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.001, 'loss': 'absolute_error', 'max_depth': 1, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_iter_no_change': None, 'random_state': None, 'subsam

In [None]:
import matplotlib.pyplot as plt

def eval_model(y,yhat):
   
    fig, ax = plt.subplots(2)
    
    ax[0].plot(range(1,51), yhat[:50])
    ax[0].plot( range(1,51), y[:50])

    ax[1].plot(range(2000,2050), yhat[2000:2050])
    
    ax[1].plot(range(2000,2050), y[2000:2050] )
    fig.suptitle("Result when feeding correct values as input")
    fig.savefig("teacher_forcing.png")



eval_model(y,yhat)