In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from functools import reduce
from tslearn.preprocessing import TimeSeriesScalerMeanVariance, TimeSeriesResampler
import plotly.graph_objects as go
import math
from scipy.stats import zscore

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [6]:
#loading data

asset_pchange_list = []
asset_price_list = []
asset_name_list = []

df = pd.read_excel('PCA1.xlsx', sheet_name=4)
date_list = np.flip(df.iloc[:,0].to_numpy().astype('datetime64[D]'))
num_of_asset = len(df.columns)
for i in range(1, num_of_asset):
    asset_array = df.iloc[:,i].to_numpy()
    mask = ~np.isnan(asset_array)
    asset_array_no_na = asset_array[mask]
    asset_price_list.append(np.flip(asset_array_no_na))
    asset_name_list.append(df.columns[i])

p_date_list = date_list[1:]
for i in range(len(asset_price_list)):
    asset_price = asset_price_list[i]
    print(asset_name_list[i], i)
    if i < 12 or (len(asset_price_list)>20 and i<19):
    
        try:
            diff_list = np.diff(asset_price) / asset_price[:-1]
        except:
            print(i)
    else:
        diff_list = np.diff(asset_price)
    asset_pchange_list.append(diff_list)

# training_asset_pchange_list = np.array(asset_pchange_list)[:, :960]
# testing_asset_pchange_list = np.array(asset_pchange_list)[:, 960:]

# training_p_date_list = np.array(p_date_list)[:960]
# testing_p_date_list = np.array(p_date_list)[960:]

# training_asset_price_list = np.array(asset_price_list)[:,:960]
# testing_asset_price_list = np.array(asset_price_list)[:,960:]

# training_date_list = np.array(date_list)[:960]
# testing_date_list = np.array(date_list)[960:]



SPX index 0


In [29]:
class All_TSMD():
    def __init__(self, price_lists, price_date_list, data_lists, data_date_list, asset_name_list, target_index, windows=10, n_closest=5, is_p_change = True):
        self.__price_lists = price_lists
        self.__price_date_list = price_date_list
        self.__data_lists = data_lists
        self.__data_date_list = data_date_list
        self.__target_index = target_index
        self.__windows = windows
        self.__n_closest = n_closest
        self.__is_p_change = is_p_change
        self.__asset_name_list = asset_name_list

        before_target_data_lists = np.array(data_lists)[:, :target_index]
        target_data_lists = np.array(data_lists)[:, target_index:target_index+windows]
        self.__before_target_data_lists = before_target_data_lists
        self.__target_data_lists = target_data_lists

    def fit(self, sd = True):
        all_asset_e_dist_list = []
        windows = self.__windows
        is_p_change = self.__is_p_change
        before_target_data_lists = self.__before_target_data_lists
        n_closest = self.__n_closest
        target_data_lists = self.__target_data_lists
        for i in range(len(before_target_data_lists)):
            e_dist_list = []
            asset_data = before_target_data_lists[i]
            target_data = target_data_lists[i]
            for j in range(len(asset_data) - windows + 1):
                if is_p_change:
                    compare_data = asset_data[j:j+windows]
                    if sd:
                        compare_data = zscore(compare_data)
                        target_data = zscore(target_data)
                    e_dist = np.square(np.array(compare_data) - np.array(target_data))
                    n = len(e_dist)
                    indices = np.arange(n)
                    factors = indices + 1
                    weighted_e_dist_arr = e_dist * factors
                    weighted_e_dist = np.sqrt(np.sum(weighted_e_dist_arr))


                    e_dist = np.linalg.norm(np.array(compare_data) - np.array(target_data))
                    e_dist_list.append(weighted_e_dist)
                else:
                    compare_data = TimeSeriesScalerMeanVariance().fit_transform([asset_data[j:j+windows]])[0]
                    e_dist = np.linalg.norm(np.array(compare_data) - TimeSeriesScalerMeanVariance().fit_transform([target_data])[0])
                    e_dist_list.append(e_dist)
            all_asset_e_dist_list.append(e_dist_list)
        cumulative_asset_e_dist_list = reduce(np.add, all_asset_e_dist_list)
        n_smallest_e_dist_list = np.partition(cumulative_asset_e_dist_list, n_closest)[:n_closest]
        n_smallest_index = np.where(np.isin(cumulative_asset_e_dist_list, n_smallest_e_dist_list))[0]
        self.cumulative_asset_e_dist_list = cumulative_asset_e_dist_list
        self.n_smallest_e_dist_list = n_smallest_e_dist_list
        self.n_smallest_index = n_smallest_index

    def plot(self, show_mean = True, show_all = True):
        is_p_change = self.__is_p_change
        asset_price_lists = self.__price_lists
        asset_price_date_list = self.__price_date_list

        target_index = self.__target_index
        windows = self.__windows
        n_smallest_index = self.n_smallest_index
        asset_name_list = self.__asset_name_list
        all_prediction_list = self.__all_prediction_list
        new_date_list = self.__new_date_list
        extra_windows = 0
        
        if is_p_change:
            extra_windows = 1
        for i in range(len(asset_price_lists)):
            
            asset_price_list = asset_price_lists[i]
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=asset_price_date_list[:len(asset_price_list)], y=asset_price_list, name='index',  mode='lines'))
            for j in range(len(n_smallest_index)):
                index = n_smallest_index[j]
                fig.add_trace(go.Scatter(x=asset_price_date_list[index:index+windows+extra_windows], y=asset_price_list[index:index+windows+extra_windows], name= f"similar {j}", line = dict(color='red'),  mode='lines'))
                if show_all:
                    fig.add_trace(go.Scatter(x=new_date_list, y=all_prediction_list[i][j], name= f"predict {j}", line = dict(color='gray'),  mode='lines'))
            
            mean_list = self.__all_predict_mean_list[i]
            if show_mean:
                fig.add_trace(go.Scatter(x=new_date_list, y=mean_list, name= f"mean predict", line = dict(color='green'),  mode='lines'))
            fig.add_trace(go.Scatter(x=asset_price_date_list[target_index:target_index+windows+extra_windows], y=asset_price_list[target_index:target_index+windows+extra_windows], name= f"search target", line = dict(color='yellow'),  mode='lines'))
            
            fig.update_layout(title=asset_name_list[i],
                   xaxis_title='Date',
                   yaxis_title='Value')
            fig.show()

        pass

    def transform(self, n_data_points):
        self.__n_data_points = n_data_points 
        n_smallest_index = self.n_smallest_index
        data_lists = self.__data_lists
        data_date_list = self.__data_date_list
        
        price_lists = self.__price_lists
        asset_price_date_list = self.__price_date_list

        is_p_change = self.__is_p_change
        windows = self.__windows 
        target_index = self.__target_index
              
        all_prediction_list = []
        all_predict_mean_list = []
        all_predict_percentage_change_list =[]

        for i in range(len(data_lists)):
            data_list = data_lists[i]
            price_list = price_lists[i]
            asset_prediction_list = []
            predict_percentage_change_list = []
            for index in n_smallest_index:
                latest_price = price_list[target_index + windows]
                latest_date = asset_price_date_list[target_index + windows]
                new_data_list = [latest_price]
                new_date_list = [latest_date]
                for j in range(n_data_points - 1):
                    if i < 12 or (len(asset_price_list)>20 and i<19):
                    
                        latest_price *= (1 + data_list[index+windows+j])
                    else:
                        latest_price += data_list[index+windows+j]

                    latest_date += np.timedelta64(7, 'D')
                    new_date_list.append(latest_date)
                    new_data_list.append(latest_price)
                asset_prediction_list.append(new_data_list)
                predict_percentage_change_list.append(data_list[index+windows:index+windows+n_data_points])
            
            np_all_prediction_list = np.array(asset_prediction_list)
            mean_list = np.mean(np_all_prediction_list, axis=0)
            all_predict_mean_list.append(mean_list)
            all_prediction_list.append(asset_prediction_list)
            predict_percentage_change_mean = np.mean(np.array(predict_percentage_change_list), axis=0)
            all_predict_percentage_change_list.append(predict_percentage_change_mean)
        new_date_list = np.array(new_date_list).astype('datetime64[D]')
        self.__all_prediction_list = all_prediction_list
        self.__new_date_list = new_date_list
        self.__all_predict_mean_list = all_predict_mean_list
        self.__all_predict_percentage_change_list = np.array(all_predict_percentage_change_list)
    def plot_e_dist(self):
        cumulative_asset_e_dist_list = self.cumulative_asset_e_dist_list
        price_date_list = self.__price_date_list

        fig = go.Figure()
        fig.add_trace(go.Scatter(x=price_date_list[:len(cumulative_asset_e_dist_list)], y=cumulative_asset_e_dist_list, name='similarity'))
        fig.update_layout(title='Similarity',
                   xaxis_title='Date',
                   yaxis_title='1/similarity')
        fig.show()

    def score(self):
        all_predict_percentage_change_list = self.__all_predict_percentage_change_list
        windows = self.__windows
        target_index = self.__target_index
        data_lists = self.__data_lists
        n_data_points = self.__n_data_points
        total_sse = 0
        for i in range(len(data_lists)):
            mean = all_predict_percentage_change_list[i]
            data_list = data_lists[i][target_index+windows:target_index+windows+len(mean)]
            e_dist = np.square(np.array(mean) - np.array(data_list))

            n = len(e_dist)
            indices = np.arange(n)
            factors = indices + 1
            weighted_e_dist_arr = e_dist * factors
            weighted_e_dist = np.sqrt(np.sum(weighted_e_dist_arr))
            total_sse += weighted_e_dist
        self.total_sse = total_sse
        self.sse_per_n_forecast = total_sse/n_data_points
    

In [15]:
indexs = [i for i in range(700,1180)]
total_sse_result = []
average_sse = []
result_details = []
for index in indexs:
    all_tsmd = All_TSMD(asset_price_list, date_list, asset_pchange_list, p_date_list, asset_name_list, index, 14, 5, True)
    all_tsmd.fit()
    all_tsmd.transform(16)
    all_tsmd.score()
    total_sse_result.append(all_tsmd.total_sse)
    average_sse.append(all_tsmd.sse_per_n_forecast)
    result_details.append({"index":index, "total_sse": all_tsmd.total_sse, "average_sse": all_tsmd.sse_per_n_forecast})
np_total_sse_result= np.array(total_sse_result)
np_average_sse = np.array(average_sse)

sum_total_sse_result = np.sum(np_total_sse_result)
sum_average_sse = np.sum(np_average_sse)

print(sum_total_sse_result)

81325.60808592642


In [23]:
sizes = [4, 5, 6, 7, 8]
result = []
for size in sizes:
    indexs = [i for i in range(700,1180)]
    total_sse_result = []
    average_sse = []
    result_details = []
    for index in indexs:
        all_tsmd = All_TSMD(asset_price_list, date_list, asset_pchange_list, p_date_list, asset_name_list, index, size, 5, True)
        all_tsmd.fit()
        all_tsmd.transform(4)
        all_tsmd.score()
        total_sse_result.append(all_tsmd.total_sse)
        average_sse.append(all_tsmd.sse_per_n_forecast)
        result_details.append({"index":index, "total_sse": all_tsmd.total_sse, "average_sse": all_tsmd.sse_per_n_forecast})
    np_total_sse_result= np.array(total_sse_result)
    np_average_sse = np.array(average_sse)

    sum_total_sse_result = np.sum(np_total_sse_result)
    sum_average_sse = np.sum(np_average_sse)
    result.append(sum_total_sse_result)

In [39]:
total_ssr = 0
total = []
nosdtotal_ssr = 0
nosdtotal = []
for i in range(500, 1100):
    all_tsmd = All_TSMD(asset_price_list, date_list, asset_pchange_list, p_date_list, asset_name_list, i, 14, 5, True)
    all_tsmd.fit()
    all_tsmd.transform(16)
    all_tsmd.score()
    total_ssr += all_tsmd.total_sse
    total.append(all_tsmd.total_sse)
    all_tsmd = All_TSMD(asset_price_list, date_list, asset_pchange_list, p_date_list, asset_name_list, i, 14, 5, True)
    all_tsmd.fit(False)
    all_tsmd.transform(16)
    all_tsmd.score()
    nosdtotal_ssr += all_tsmd.total_sse
    nosdtotal.append(all_tsmd.total_sse)

In [57]:
n_smallest_e_dist_list = np.partition(total, -3)[-3:]
n_smallest_index = np.where(np.isin(total, n_smallest_e_dist_list))[0]
print(n_smallest_index)
n_smallest_e_dist_list = np.partition(nosdtotal, 3)[:3]
n_smallest_index = np.where(np.isin(nosdtotal, n_smallest_e_dist_list))[0]
print(n_smallest_index)

[527 528 529]
[369 370 393]


In [55]:
all_tsmd = All_TSMD(asset_price_list, date_list, asset_pchange_list, p_date_list, asset_name_list, 527, 14, 5, True)
all_tsmd.fit()
all_tsmd.transform(16)
all_tsmd.plot()

In [58]:
all_tsmd = All_TSMD(asset_price_list, date_list, asset_pchange_list, p_date_list, asset_name_list, 369 , 14, 5, True)
all_tsmd.fit(False)
all_tsmd.transform(16)
all_tsmd.plot()

In [59]:
all_tsmd.score()
print(all_tsmd.total_sse)

0.21761593175428365


In [38]:
print(nosdtotal_ssr)

149.22926067302822


In [19]:
all_tsmd = All_TSMD(asset_price_list, date_list, asset_pchange_list, p_date_list, asset_name_list, 1200, 14, 5, True)
all_tsmd.fit()
all_tsmd.transform(16)
all_tsmd.plot(True, True)

[ 0.45306405 -0.29510047 -1.21352393  1.15805612 -0.5221882  -0.6006892
 -0.1398047   1.37905164 -0.15237614  1.21620824  1.06196069 -0.35625383
  0.34839675 -2.33680102]
[-0.34238611 -1.2104033   1.0310162  -0.55701048 -0.63120306 -0.1956135
  1.23988273 -0.20749498  1.08597674  0.94019483 -0.40018316  0.26579375
 -2.27203105  1.25346139]
[-1.2578031   0.98899409 -0.60284264 -0.67721323 -0.24057859  1.19836173
 -0.25248857  1.04408649  0.89795481 -0.44563905  0.22193569 -2.32197794
  1.21197297  0.23523733]
[ 0.97792668 -0.70899181 -0.78780462 -0.32508942  1.19980003 -0.3377108
  1.0363098   0.88144955 -0.54239832  0.16505123 -2.53081254  1.21422428
  0.17914739 -0.42110143]
[-0.64103672 -0.72277775 -0.24287082  1.3386744  -0.25596113  1.16910985
  1.00849592 -0.46825362  0.26548053 -2.53054541  1.35363457  0.28010041
 -0.34245006 -0.21160017]
[-0.76031582 -0.27415321  1.32800778 -0.28741415  1.15623292  0.99352535
 -0.50247391  0.2408246  -2.5916481   1.34316296  0.25563506 -0.375030

In [32]:
n_smallest_e_dist_list = np.partition(np_total_sse_result, 3)[:3]
n_smallest_index = np.where(np.isin(np_total_sse_result, n_smallest_e_dist_list))[0]
for i in range(3):
    print(result_details[n_smallest_index[i]])

{'index': 978, 'total_sse': 77.93886648123873, 'average_sse': 4.871179155077421}
{'index': 979, 'total_sse': 77.0957915829112, 'average_sse': 4.81848697393195}
{'index': 982, 'total_sse': 81.46317086258279, 'average_sse': 5.091448178911424}


In [3]:
class New_TSMD():
    def __init__(self, asset_price_lists, asset_price_date_list, asset_percentage_change_lists, asset_percentage_change_date_list, asset_name_list):
        self.__asset_price_lists = asset_price_lists
        self.__asset_price_date_list = asset_price_date_list
        self.__asset_percentage_change_lists = asset_percentage_change_lists
        self.__asset_percentage_change_date_list = asset_percentage_change_date_list
        self.__asset_name_list = asset_name_list
    
    def fit(self, target_index, target_windows, n_similar, search_windows_range):
        self.__target_index = target_index
        self.__target_windows = target_windows
        asset_percentage_change_lists = self.__asset_percentage_change_lists
        before_target_data_lists = np.array(asset_percentage_change_lists)[:, :target_index]
        target_data_lists = np.array(asset_percentage_change_lists)[:, target_index:target_index+target_windows]
        self.__before_target_data_lists = before_target_data_lists
        
        self.__target_data_lists = target_data_lists

        num_of_asset = len(before_target_data_lists)
        num_of_data = len(before_target_data_lists[0])
        search_windows_list = search_windows_range

        all_sum_of_error_list = np.array([])
        all_details_list = []

        for search_windows in search_windows_list:
            for i in range(num_of_data - search_windows + 1):

                sum_of_error = 0
                for j in range(num_of_asset):
                    target_data = target_data_lists[j]
                    search_data = before_target_data_lists[j][i:i+search_windows]
                    resized_search_data = np.ravel(TimeSeriesResampler(sz=target_windows).fit_transform(search_data)[0])
                    e_dist = np.square(np.array(target_data) - np.array(resized_search_data))

                    n = len(e_dist)
                    indices = np.arange(n)
                    # factors = indices + 1
                    factors = 1
                    weighted_e_dist_arr = e_dist * factors
                    weighted_e_dist = np.sqrt(np.sum(weighted_e_dist_arr))
                    sum_of_error += weighted_e_dist
                all_sum_of_error_list = np.append(all_sum_of_error_list, sum_of_error)
                all_details_list.append({"search_windows": search_windows, "index": i, "sum_of_error": sum_of_error})
        n_smallest_e_dist_list = np.partition(all_sum_of_error_list, n_similar)[:n_similar]
        n_smallest_index = np.where(np.isin(all_sum_of_error_list, n_smallest_e_dist_list))[0]
        self.__details_list = np.take(all_details_list, n_smallest_index)
        print(self.__details_list)
        
    def transform(self, n_data_points):
        details_list = self.__details_list
        asset_price_lists = self.__asset_price_lists
        asset_percentage_change_lists = self.__asset_percentage_change_lists
        asset_price_date_list = self.__asset_price_date_list
        target_index = self.__target_index
        target_windows = self.__target_windows
        self.__n_data_points = n_data_points

        all_prediction_list = []
        all_predict_mean_list = []
        all_predict_percentage_change_list = []

        num_of_asset = len(asset_percentage_change_lists)
        for i in range(num_of_asset):
            percentage_change_list = asset_percentage_change_lists[i]
            price_list = asset_price_lists[i]

            asset_prediction_lists = []
            predict_percentage_change_list = []
            for details in details_list:
                latest_price = price_list[target_index + target_windows]
                latest_date = asset_price_date_list[target_index + target_windows]
                new_data_list = np.array([latest_price])
                new_date_list = np.array([latest_date])
                start_index = details["index"]
                search_windows = details["search_windows"]
                scale = search_windows/target_windows
                search_percentage_change_list = percentage_change_list[start_index+search_windows:start_index+search_windows+math.ceil(n_data_points*scale)]
                resized_percentage_change_list = np.ravel(TimeSeriesResampler(sz=n_data_points).fit_transform(search_percentage_change_list)[0])
                for resized_percentage_change in resized_percentage_change_list[:-1]:
                    if i < 12 or (len(asset_price_list)>20 and i<19):
                    
                        latest_price *= (1 + resized_percentage_change)
                    else:
                        latest_price += resized_percentage_change

                    latest_date += np.timedelta64(7, 'D')
                    new_date_list = np.append(new_date_list, latest_date)
                    new_data_list = np.append(new_data_list, latest_price)
                
                asset_prediction_lists.append(new_data_list)
                predict_percentage_change_list.append(resized_percentage_change_list)
            np_all_prediction_list = np.array(asset_prediction_lists)
            mean_list = np.mean(np_all_prediction_list, axis=0)
            all_predict_mean_list.append(mean_list)
            all_prediction_list.append(asset_prediction_lists)
            predict_percentage_change_mean = np.mean(np.array(predict_percentage_change_list), axis=0)
            all_predict_percentage_change_list.append(predict_percentage_change_mean)
        new_date_list = new_date_list.astype('datetime64[D]')
        self.__all_prediction_list = all_prediction_list
        self.__new_date_list = new_date_list
        self.__all_predict_mean_list = np.array(all_predict_mean_list)
        self.__all_predict_percentage_change_list = np.array(all_predict_percentage_change_list)
    def plot(self, show_mean = True, show_all = True):
        details_list = self.__details_list
        asset_price_lists = self.__asset_price_lists
        asset_price_date_list = self.__asset_price_date_list

        target_index = self.__target_index
        target_windows = self.__target_windows
        asset_name_list = self.__asset_name_list
        all_prediction_list = self.__all_prediction_list
        new_date_list = self.__new_date_list
        for i in range(len(asset_price_lists)):
            
            asset_price_list = asset_price_lists[i]
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=asset_price_date_list[:len(asset_price_list)], y=asset_price_list, name='index',  mode='lines'))
            for j in range(len(details_list)):
                details = details_list[j]
                index = details["index"]
                search_windows = details["search_windows"]
                fig.add_trace(go.Scatter(x=asset_price_date_list[index:index+search_windows+1], y=asset_price_list[index:index+search_windows+1], name= f"similar {j}", line = dict(color='red'),  mode='lines'))
                if show_all:
                    fig.add_trace(go.Scatter(x=new_date_list, y=all_prediction_list[i][j], name= f"predict {j}", line = dict(color='gray'),  mode='lines'))
            
            mean_list = self.__all_predict_mean_list[i]
            if show_mean:
                fig.add_trace(go.Scatter(x=new_date_list, y=mean_list, name= f"mean predict", line = dict(color='green'),  mode='lines'))
            fig.add_trace(go.Scatter(x=asset_price_date_list[target_index:target_index+target_windows+1], y=asset_price_list[target_index:target_index+target_windows+1], name= f"search target", line = dict(color='yellow'),  mode='lines'))
            
            fig.update_layout(title=asset_name_list[i],
                   xaxis_title='Date',
                   yaxis_title='Value')
            fig.show()
    def score(self):
        all_predict_percentage_change_list = self.__all_predict_percentage_change_list
        target_windows = self.__target_windows
        target_index = self.__target_index
        n_data_points = self.__n_data_points
        asset_percentage_change_lists = self.__asset_percentage_change_lists

        total_sse = 0
        for i in range(len(all_predict_percentage_change_list)):
            target_data = asset_percentage_change_lists[i][target_index+target_windows:target_index+target_windows+n_data_points]
            predict_percentage_change = all_predict_percentage_change_list[i]
            e_dist = np.square(np.array(target_data) - np.array(predict_percentage_change))

            n = len(e_dist)
            indices = np.arange(n)
            factors = indices + 1
            weighted_e_dist_arr = e_dist * factors
            weighted_e_dist = np.sqrt(np.sum(weighted_e_dist_arr))
            total_sse += weighted_e_dist
        self.total_sse = total_sse
        self.sse_per_n_forecast = total_sse/n_data_points
        

In [None]:
sizes = [12, 13, 15, 16]
result = []
for size in sizes:
    indexs = [i for i in range(700,1180)]
    total_sse_result = []
    average_sse = []
    result_details = []
    for index in indexs:
        newtsm = New_TSMD(asset_price_list, date_list, asset_pchange_list, p_date_list, asset_name_list)
        windows_range = [12, 13, 14, 15]
        newtsm.fit(index, size, 5, windows_range)
        newtsm.transform(16)
        newtsm.score()
        total_sse_result.append(newtsm.total_sse)
        average_sse.append(newtsm.sse_per_n_forecast)
        result_details.append({"index":index, "total_sse": newtsm.total_sse, "average_sse": newtsm.sse_per_n_forecast})
    np_total_sse_result= np.array(total_sse_result)
    np_average_sse = np.array(average_sse)

    sum_total_sse_result = np.sum(np_total_sse_result)
    sum_average_sse = np.sum(np_average_sse)
    result.append(sum_total_sse_result)
    print(sum_total_sse_result, sum_average_sse)

In [36]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = date_list[700:700+len(np_total_sse_result)],y=np_total_sse_result, name='sse result'))
fig.update_layout(title='sse resul',
            xaxis_title='Date',
            yaxis_title='sse')
fig.show()

In [14]:
newtsm = New_TSMD(asset_price_list, date_list, asset_pchange_list, p_date_list, asset_name_list)
windows = 14
windows_range = [13, 14, 15, 16]
newtsm.fit(1200, windows, 5, windows_range)
newtsm.transform(16)
newtsm.plot()

[{'search_windows': 13, 'index': 79, 'sum_of_error': 79.6095364717148}
 {'search_windows': 13, 'index': 934, 'sum_of_error': 86.745430271728}
 {'search_windows': 13, 'index': 1152, 'sum_of_error': 85.22953772654267}
 {'search_windows': 15, 'index': 77, 'sum_of_error': 87.3397532573639}
 {'search_windows': 16, 'index': 77, 'sum_of_error': 74.32265039660047}]


In [26]:
A = np.array([0, 10, 0, 10, 0])
B = np.array([0, 30, 0, 30, 0])
C = np.array([0, 20, 0, 20, 0])

sse = np.sum((A - C) ** 2)
print(sse)
sse = np.sum((B - C) ** 2)
print(sse)
A = zscore(A)
B = zscore(B)
C = zscore(C)

sse = np.sum((A - B) ** 2)
print(sse)
sse = np.sum((B - C) ** 2)
print(sse)

200
200
1.355854680848614e-31
1.355854680848614e-31
