In [8]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from functools import reduce
from tslearn.preprocessing import TimeSeriesScalerMeanVariance, TimeSeriesResampler


Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [9]:
#loading data

asset_pchange_list = []
asset_price_list = []


df = pd.read_excel('PCA.xlsx', sheet_name=2)
p_date_list = np.flip(df.iloc[:,0].to_numpy().astype('datetime64[D]'))
num_of_asset = len(df.columns)
for i in range(1, num_of_asset):
    asset_array = df.iloc[:,i].to_numpy()
    mask = ~np.isnan(asset_array)
    asset_array_no_na = asset_array[mask]
    asset_pchange_list.append(np.flip(asset_array_no_na))

df = pd.read_excel('PCA.xlsx', sheet_name=1)
date_list = np.flip(df.iloc[:,0].to_numpy().astype('datetime64[D]'))
num_of_asset = len(df.columns)
for i in range(1, num_of_asset):
    asset_array = df.iloc[:,i].to_numpy()
    mask = ~np.isnan(asset_array)
    asset_array_no_na = asset_array[mask]
    asset_price_list.append(np.flip(asset_array_no_na))

training_asset_pchange_list = np.array(asset_pchange_list)[:, :960]
testing_asset_pchange_list = np.array(asset_pchange_list)[:, 960:]

training_p_date_list = np.array(p_date_list)[:960]
testing_p_date_list = np.array(p_date_list)[960:]

training_asset_price_list = np.array(asset_price_list)[:,:960]
testing_asset_price_list = np.array(asset_price_list)[:,960:]

training_date_list = np.array(date_list)[:960]
testing_date_list = np.array(date_list)[960:]

In [19]:
class TSMD():
    def __init__(self, data_array):
        self.data_array = data_array
        self.e_dist_list = []
        self.n_smallest_e_dist_list = []
        self.n_smallest_index = []
        self.target_window = 0
    
    def search(self, search_target, n=3, p_change = True):
        if p_change:
            self.search_target = search_target
            target_window = len(self.search_target)
            self.target_window = target_window
            data_array = self.data_array

            e_dist_list = []
            for i in range(len(data_array) - target_window + 1):
                compare_array = data_array[i:i+target_window]
                E_dist = np.linalg.norm(np.array(compare_array) - np.array(search_target))
                e_dist_list.append(E_dist)
            self.e_dist_list = e_dist_list
            n_smallest_e_dist_list = np.partition(e_dist_list, n)[:n]

            self.n_smallest_e_dist_list = n_smallest_e_dist_list

            n_smallest_index = np.where(np.isin(e_dist_list, n_smallest_e_dist_list))[0]
            self.n_smallest_index = n_smallest_index
        else:
            self.search_target = search_target
            target_window = len(self.search_target)
            self.target_window = target_window
            data_array = self.data_array

            e_dist_list = []
            for i in range(len(data_array) - target_window + 1):
                compare_array = TimeSeriesScalerMeanVariance().fit_transform([data_array[i:i+target_window]])[0]
                E_dist = np.linalg.norm(np.array(compare_array) - TimeSeriesScalerMeanVariance().fit_transform([search_target])[0])
                e_dist_list.append(E_dist)
            self.e_dist_list = e_dist_list
            n_smallest_e_dist_list = np.partition(e_dist_list, n)[:n]

            self.n_smallest_e_dist_list = n_smallest_e_dist_list

            n_smallest_index = np.where(np.isin(e_dist_list, n_smallest_e_dist_list))[0]
            self.n_smallest_index = n_smallest_index

def plot(index_data_arr, index_date_arr, p_data_arr, p_date_arr, n_smallest_index, target_window, target_index):
    # plt.figure(figsize=(26, 9))
    # plt.plot(p_date_arr[:len(p_data_arr)], p_data_arr)
    # for index in n_smallest_index:
    #     plt.plot(p_date_arr[index:index+target_window], p_data_arr[index:index+target_window], 'r-')
    # plt.xlabel('Date')
    # plt.ylabel('Value')
    # plt.title('Time Series Data')
    # plt.show()

    plt.figure(figsize=(26, 9))
    plt.plot(index_date_arr[:len(index_data_arr)], index_data_arr)
    for index in n_smallest_index:
        plt.plot(index_date_arr[index:index+target_window], index_data_arr[index:index+target_window], 'r-')
    plt.plot(index_date_arr[target_index:target_index+target_window], index_data_arr[target_index:target_index+target_window], 'y-')
    plt.xlabel('Date')
    plt.ylabel('Value')
    plt.title('Time Series Data')
    plt.show()

In [28]:
start_index = 1
window = 10
n = 5
per = False




tsdm_result = []
for i in range(len(training_asset_pchange_list)):
    tsmd = TSMD(training_asset_pchange_list[i])
    tsmd.search(testing_asset_pchange_list[i][start_index:start_index+window], n)
    tsdm_result.append(tsmd.e_dist_list)
e_dist_list = reduce(np.add, tsdm_result)
n_smallest_e_dist_list = np.partition(e_dist_list, n)[:n]
n_smallest_index = np.where(np.isin(e_dist_list, n_smallest_e_dist_list))[0]
print(n_smallest_index)
# Print the result
# for i in range(len(asset_pchange_list)):
#     target_index = 960+start_index
#     plot(asset_price_list[i], date_list, asset_pchange_list[i], p_date_list, n_smallest_index, window, target_index)


[301 334 336 354 360]


In [None]:
tsdm_result = []
for i in range(len(training_asset_price_list)):
    tsmd = TSMD(training_asset_price_list[i])
    tsmd.search(testing_asset_price_list[i][start_index:start_index+window], n, False)
    tsdm_result.append(tsmd.e_dist_list)
e_dist_list = reduce(np.add, tsdm_result)
n_smallest_e_dist_list = np.partition(e_dist_list, n)[:n]
n_smallest_index = np.where(np.isin(e_dist_list, n_smallest_e_dist_list))[0]
# Print the result
for i in range(len(asset_pchange_list)):
    target_index = 960+start_index
    plot(asset_price_list[i], date_list, asset_pchange_list[i], p_date_list, n_smallest_index, window, target_index)

In [53]:
print(type(e_dist_list))

<class 'numpy.ndarray'>


In [None]:
start_index = 1140
window = 4
n = 12
i = 0

p_asset = asset_pchange_list[i]
asset = asset_price_list[i]

p_tsdm = TSMD(p_asset)
p_tsdm.search(p_asset[start_index:start_index+window], n)
plot(asset, date_list, p_asset,p_date_list, p_tsdm.n_smallest_index, window, start_index)

tsdm = TSMD(asset)
tsdm.search(asset[start_index:start_index+window], n, False)
plot(asset, date_list, p_asset,p_date_list, tsdm.n_smallest_index, window, start_index)

In [17]:
class All_TSMD():
    def __init__(self, price_lists, price_date_list, data_lists, data_date_list, target_index, windows=10, n_closest=5, is_p_change = True):
        self.__price_lists = price_lists
        self.__price_date_list = price_date_list
        self.__data_lists = data_lists
        self.__data_date_list = data_date_list
        self.__target_index = target_index
        self.__windows = windows
        self.__n_closest = n_closest
        self.__is_p_change = is_p_change

        before_target_data_lists = np.array(data_lists)[:, :target_index]
        target_data_lists = np.array(data_lists)[:, target_index:target_index+windows]
        self.__before_target_data_lists = before_target_data_lists
        self.__target_data_lists = target_data_lists

    def fit(self):
        all_asset_e_dist_list = []
        windows = self.__windows
        is_p_change = self.__is_p_change
        before_target_data_lists = self.__before_target_data_lists
        n_closest = self.__n_closest
        target_data_lists = self.__target_data_lists
        for i in range(len(before_target_data_lists)):
            e_dist_list = []
            asset_data = before_target_data_lists[i]
            target_data = target_data_lists[i]
            for j in range(len(asset_data) - windows + 1):
                if is_p_change:
                    compare_data = asset_data[j:j+windows]
                    e_dist = np.linalg.norm(np.array(compare_data) - np.array(target_data))
                    e_dist_list.append(e_dist)
                else:
                    compare_data = TimeSeriesScalerMeanVariance().fit_transform([asset_data[j:j+windows]])[0]
                    e_dist = np.linalg.norm(np.array(compare_data) - TimeSeriesScalerMeanVariance().fit_transform([target_data])[0])
                    e_dist_list.append(e_dist)
            all_asset_e_dist_list.append(e_dist_list)
        cumulative_asset_e_dist_list = reduce(np.add, all_asset_e_dist_list)
        n_smallest_e_dist_list = np.partition(cumulative_asset_e_dist_list, n_closest)[:n_closest]
        n_smallest_index = np.where(np.isin(cumulative_asset_e_dist_list, n_smallest_e_dist_list))[0]
        print(n_smallest_index)


        


In [29]:
all_tsmd = All_TSMD(asset_price_list, date_list, asset_pchange_list, p_date_list, 961, 10, 5, True)
all_tsmd.fit()

[301 334 336 354 360]


In [2]:
print([1,2,3,4,5,6,7][1:1+2])

[2, 3]
