In [1]:
import nbimporter
import stats_helper as sh
import preprocessing as pp

import time
import math
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error, accuracy_score, cohen_kappa_score, confusion_matrix

Importing Jupyter notebook from stats_helper.ipynb
Importing Jupyter notebook from preprocessing.ipynb


# Simple Exponential Smoothing (SES)

In [2]:
class SES:
    """
        data - dataset with timestamps
        alpha - float [0.0, 1.0], smoothing parameter
    """
    
    def __init__(self, alpha):
        self.alpha = alpha
        self.result = []
    
    
    def fit(self, data_train):
        self.data_train = data_train
        self.result.append(data_train[0]) # first value is same as data
        for n in range(1, len(data_train)):
            self.result.append(self.alpha * data_train[n] + (1 - self.alpha) * self.result[n-1])
        return self.result
    
    
    # Predicts one point ahead once the data if 
    def predict_one(self):
        return self.result[-1]
    
    
    def predict(self, data_test, is_classification):
        data = self.data_train + data_test
        
        self.result.append(data[0]) # first value is same as data
        for n in range(1, len(data)):
            self.result.append(self.alpha * data[n] + (1 - self.alpha) * self.result[n-1])
        
        if is_classification:
            binary_returns = sh.output_to_binary_indicators(sh.data_daily_returns(self.result))
            return binary_returns[-len(data_test) - 1: -1]
        
        return self.result[-len(data_test) - 1: -1]

## Plotting SES

In [3]:
def plotExponentialSmoothing(series, alphas):
    """
        Plots exponential smoothing with different alphas
        
        series - dataset with timestamps
        alphas - list of floats, smoothing parameters
        
    """
    with plt.style.context('seaborn-white'):    
        plt.figure(figsize=(15, 7))
        for alpha in alphas:
            ses = SES(alpha)
            plt.plot(ses.fit(series), label="Alpha {}".format(alpha))
        plt.plot(series, "c", label = "Actual")
        plt.legend(loc="best")
        plt.axis('tight')
        plt.title("Exponential Smoothing")
        plt.grid(True);
        
# plotExponentialSmoothing(values[:100], [0.1, 0.5, 0.8, 0.9])

In [4]:
def plotSES(original_series, smoothed_series):
    plt.figure(figsize=(20, 7))
    plt.plot(range(len(original_series)), original_series, 'bo-')
    plt.plot(range(len(smoothed_series)), smoothed_series, 'r^--')
    plt.show()

## Finding optimal parameters

In [5]:
def print_metric_values(metric_values):
    print('Alpha \t\t\t Metric')
    for i in range(5): #range(len(metric_values)):
        print(metric_values[i][0], '\t\t\t', metric_values[i][1])
    print()

In [6]:
def find_optimal_parameters(symbol_name, num_splits, alphas, is_classification):
    metric_values = list()
    if is_classification:
        print('Using Accuracy for CV')
    else:
        print('Using Mean Squared Error for CV')
        
    data_train, data_test = sh.prepare_data(symbol_name, train_ratio = 0.75)
        
    for alpha in alphas:
        print("Alpha {}".format(alpha))
        ses = SES(alpha)
        metric_value = sh.statTimeSeriesCV(data_train, num_splits, ses, is_classification)
        metric_values.append([alpha, metric_value])
    
#     print_metric_values(metric_values)
    
    if is_classification:
        # Sorting by decreasing Accuracy Values
        metric_values.sort(reverse=True, key=lambda x: x[len(metric_values) - 1])
    else:
        # Sorting by increasing error Values
        metric_values.sort(reverse=False, key=lambda x: x[len(metric_values) - 1])
        
    print_metric_values(metric_values)
    
    return metric_values[0][0]

## Using SES for Prediction

In [7]:
def forecast(data_train, data_test, alpha, is_classification):
    print('Alpha: ', alpha)
    
    ses = SES(alpha)
    print('Fitting...')
    ses.fit(data_train)
    
    print('Predicting...') 
    predictions = ses.predict(data_test, is_classification)
    
    if is_classification:
        bin_data_test = sh.output_to_binary_indicators(sh.data_daily_returns(data_train[-1:] + data_test))
        sh.classification_metrics(bin_data_test, predictions)
    else:
        sh.regression_metrics(data_test, predictions)

In [8]:
def get_data_ready(symbol_name, alphas, is_classification):
    start_time = time.time()
    num_splits = 10
    alpha = find_optimal_parameters(symbol_name, num_splits, alphas, is_classification)
    end_time = time.time()
    print('Time taken for Cross Validation:', end_time - start_time)
    
    data_train, data_test = sh.prepare_data(symbol_name, train_ratio = 0.8)
    return data_train, data_test, alpha

In [11]:
def run_SES(symbol_name):
    is_classification = True
    alphas = np.arange(0, 1, 0.01)
    
    data_train, data_test, alpha = get_data_ready(symbol_name, alphas, is_classification)
    forecast(data_train, data_test, alpha, is_classification = True)

In [12]:
run_SES(symbol_name = 'MSFT')

Using Accuracy for CV
Alpha 0.0
Mean Metric Value:  0.5096371882086168 

Alpha 0.01
Mean Metric Value:  0.48809523809523814 

Alpha 0.02
Mean Metric Value:  0.4886621315192744 

Alpha 0.03
Mean Metric Value:  0.49121315192743764 

Alpha 0.04
Mean Metric Value:  0.4883786848072562 

Alpha 0.05
Mean Metric Value:  0.4841269841269841 

Alpha 0.06
Mean Metric Value:  0.483843537414966 

Alpha 0.07
Mean Metric Value:  0.4858276643990929 

Alpha 0.08
Mean Metric Value:  0.4852607709750567 

Alpha 0.09
Mean Metric Value:  0.4863945578231292 

Alpha 0.1
Mean Metric Value:  0.4861111111111111 

Alpha 0.11
Mean Metric Value:  0.4835600907029478 

Alpha 0.12
Mean Metric Value:  0.4832766439909297 

Alpha 0.13
Mean Metric Value:  0.4849773242630385 

Alpha 0.14
Mean Metric Value:  0.48582766439909303 

Alpha 0.15
Mean Metric Value:  0.48781179138321995 

Alpha 0.16
Mean Metric Value:  0.48781179138321995 

Alpha 0.17
Mean Metric Value:  0.48809523809523814 

Alpha 0.18
Mean Metric Value:  0.488378

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
  'precision', 'predicted', average, warn_for)
