In [None]:
import nbimporter
import stats_helper as sh
import preprocessing as pp

import time
import math
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix, cohen_kappa_score

Kohen Cappa - Kappa or Cohen’s Kappa is like classification accuracy, except that it is normalized at the baseline of random chance on your dataset. It is a more useful measure to use on problems that have an imbalance in the classes (e.g. 70-30 split for classes 0 and 1 and you can achieve 70% accuracy by predicting all instances are for class 0)  
https://machinelearningmastery.com/machine-learning-evaluation-metrics-in-r/  

The kappa statistic, which is a number between -1 and 1. The maximum value means complete agreement; zero or lower means chance agreement.

# Damped

In [None]:
class Damp:
    """
        data - dataset with timestamps
        alpha - float [0.0, 1.0], smoothing parameter
        beta - float [0.0, 1.0], smoothing parameter for trend
        theta - float [0.0, 1.0], damping paramater
    """
    
    def __init__(self, alpha, beta, theta):
        self.alpha = alpha
        self.beta = beta
        self.theta = theta
        self.result = []
    
    def fit(self, data_train):
        self.data_train = data_train
        
        # First value is same as series
        self.result.append(data_train[0])
        for n in range(1, len(data_train) + 1):
            
            # Initialising level and trend
            if n == 1:
                level, trend = data_train[0], data_train[1] - data_train[0]
            
            # Forecasting the point ahead
            if n >= len(data_train): 
                value = self.result[-1]
            else:
                value = data_train[n]
            
            last_level, level = level, self.alpha*value + (1 - self.alpha)*(level + self.theta*trend)
            trend = self.beta*(level - last_level) + (1 - self.beta)*self.theta*trend
            self.result.append(level + self.theta*trend)

        # Returning the smoothed values (without the forecast)
        return self.result[:-1]
    
    # Returns the forecasted point during the fit
    def predict_one(self):
        return self.result[-1]

    def predict(self, data_test, is_classification):
        predictions = []
        self.result.append(self.data_train[0])
        for n in range(1, len(self.data_train) + len(data_test)):
            if n == 1:
                level, trend = self.data_train[0], self.data_train[1] - self.data_train[0]
            if n >= len(self.data_train): # we are forecasting
                value = data_test[n - len(self.data_train)]
                
                predict_value = self.result[-1]
                
                # Adding it for generating binary returns
                if n == len(self.data_train) and is_classification:
                    predictions.append(predict_value)
                
                predict_last_level, predict_level = level, self.alpha*predict_value + (1 - self.alpha)*(level + self.theta*trend)
                predict_trend = self.beta*(predict_level - predict_last_level) + (1 - self.beta)*self.theta*trend
                predictions.append(predict_level + self.theta*predict_trend)
            else:
                value = self.data_train[n]
                
            last_level, level = level, self.alpha*value + (1 - self.alpha)*(level + self.theta*trend)
            trend = self.beta*(level - last_level) + (1 - self.beta)*self.theta*trend
            self.result.append(level + self.theta*trend)
        
#         print(predictions)
        if is_classification:
            return sh.output_to_binary_indicators(sh.data_daily_returns(predictions))
        
        return predictions  

## Finding optimal parameters

In [None]:
def print_metric_values(metric_values):
    print('Alpha \t\t\t Beta \t\t\t Theta \t\t\t Metric')
    for i in range(5): #range(len(metric_values)):
        print(metric_values[i][0], '\t\t\t', metric_values[i][1], '\t\t\t', metric_values[i][2], '\t\t\t', metric_values[i][3])
    print()

In [None]:
def find_optimal_parameters(data_train, num_splits, alphas, betas, thetas, is_classification):
    metric_values = list()
    if is_classification:
        print('Using Accuracy for CV')
    else:
        print('Using Mean Squared Error for CV')
        
    for alpha in alphas:
        for beta in betas:
            for theta in thetas:
#                 print("Alpha {}, Beta {}, Theta {}".format(alpha, beta, theta))
                damp = Damp(alpha, beta, theta)
                metric_value = sh.statTimeSeriesCV(data_train, num_splits, damp, is_classification)
                metric_values.append([alpha, beta, theta, metric_value])
    
#     print_metric_values(metric_values)
    
    # Sorting the Metric Values
    metric_values.sort(reverse=is_classification, key=lambda x: x[len(metric_values) - 1])
    print_metric_values(metric_values)
    
    return metric_values[0][0], metric_values[0][1], metric_values[0][2]

## Using Damp for Prediction

In [None]:
def forecast(data_train, data_test, alpha, beta, theta, is_classification):
#     print('Alpha: ', alpha, '\t Beta: ', beta, '\t Theta: ', theta)
    
    damp = Damp(alpha, beta, theta)
    print('Fitting...')
    damp.fit(data_train)
    
    print('Predicting...') 
    predictions = damp.predict(data_test, is_classification)
    
#     print(predictions)
    
    if is_classification:
        bin_data_test = sh.output_to_binary_indicators(sh.data_daily_returns(data_train[-1:] + data_test))
        sh.classification_metrics(bin_data_test, predictions)
    else:
        sh.regression_metrics(data_test, predictions)

In [None]:
def get_data_ready(symbol_name, alphas, betas, thetas, is_classification):
    start_time = time.time()
    num_splits = 10
    print('Data Prep')
    data_train, data_test = sh.prepare_data(symbol_name, train_ratio = 0.8)
    alpha, beta, theta = find_optimal_parameters(data_train, num_splits, alphas, betas, thetas, is_classification)
#     alpha, beta, theta = 0.8, 0.6, 0.5
    end_time = time.time()
    
    print('Time taken for Cross Validation:', end_time - start_time)
    return data_train, data_test, alpha, beta, theta

In [None]:
def run_Damp(symbol_name):
    is_classification = False
    alphas = np.arange(0, 1, 0.1)
    betas = np.arange(0, 1, 0.1)
    thetas = np.arange(0, 1, 0.1)
    
    data_train, data_test, alpha, beta, theta = get_data_ready(symbol_name, alphas, betas, thetas, is_classification)
    forecast(data_train, data_test, alpha, beta, theta, is_classification = True)

In [None]:
run_Damp(symbol_name = 'AMZN')