In [62]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [63]:
price_df = pd.read_csv("/Users/shurui/python/sparseDictionaryTimeSeries/Data/Dow Jones Industrial Average Historical Data - Daily.csv")
price_df.columns = price_df.columns.map(lambda x: x.lower())
price_df.loc[:, "price"] = price_df["price"].map(lambda x: x.replace(",", ""))
price_df.loc[:, "price"] = price_df["price"].astype("float")
time_series = np.array(price_df["price"])

In [64]:
class data_maker():
    def __init__(self, data, task) -> None:
        self.data = data.astype("float")
        self.task = task
        self.data_diff = np.diff(data)
        self.data_return = np.diff(data)/data[:-1]

    def create_sliding_windows(self, data_type, window_size, stride):
        if data_type == "origin":
            data = self.data
        if data_type=="return":
            data = self.data_return
        if data_type=="diff":
             data = self.data_diff
        num_samples = len(data)
        num_windows = ((num_samples - window_size) // stride) + 1
        X_matrix = np.zeros((num_windows, window_size))

        for i in range(num_windows):
            start = i * stride
            end = start + window_size
            window = data[start:end]
            X_matrix[i, :] = window.T

        return X_matrix
    
    def classifiction_pm(self, data_type, window_size, stride, test_size):
        if data_type == "origin":
            data = self.data
        if data_type=="return":
            data = self.data_return
        if data_type=="diff":
             data = self.data_diff
        num_samples = len(data)
        num_windows = ((num_samples - window_size) // stride) + 1
        X_matrix_m = []
        X_matrix_p = []
        if data_type == "origin":
            for i in range(num_windows-1):
                start = i * stride
                end = start + window_size
                window = data[start:end]
                if data[end] >= window[-1]:
                    X_matrix_p.append(window.T)
                else:
                    X_matrix_m.append(window.T)
        else:
            for i in range(num_windows-1):
                start = i * stride
                end = start + window_size
                window = data[start:end]
                if data[end] >= 0:
                    X_matrix_p.append(window.T)
                else:
                    X_matrix_m.append(window.T)

        return np.array(X_matrix_p[:-test_size]), np.array(X_matrix_p[-test_size:]), np.array(X_matrix_m[:-test_size]), np.array(X_matrix_m[-test_size:])

In [65]:
class alternated_resolution():
    
    def __init__(self, time_series, K, K0) -> None:
        self.X = time_series.T
        self.K = K
        self.K0 = K0
        self.num = time_series.shape[0]
        self.length = time_series.shape[1]
        np.random.seed(123)
        self.D = np.random.randn(self.length, self.K)

    def projection(self, z):
        res = z.copy()
        sort = np.sort(res, axis=1)[:, -self.K0]
        res = np.where(res - sort[:,np.newaxis]>=0, res, 0)
        return res
    
    def projection_D(self, y=None):
        if y is None:
            norm = np.linalg.norm(self.D, axis=0)
            norm = np.where(norm > 1, norm**2, 1)
            self.D = self.D/norm
            return self.D
        else:
            norm = np.linalg.norm(y, axis=0)
            norm = np.where(norm > 1, norm**2, 1)
            y = y/norm
            return y

    def sparse_coding(self, tol, max_iter, time_series=None):
        projection = False
        if time_series is None:
            time_series = self.X
            projection = True
        L = np.sort(np.linalg.eigvalsh(np.matmul(self.D.T, self.D)))[-1]
        step_size = 1 / L
        sparse_code = np.zeros((self.K, time_series.shape[1]))
        sparse_code_next = sparse_code
        sparse_code_change = 1
        count = 0
        while sparse_code_change > tol and count < max_iter:
            sparse_code = sparse_code_next
            count += 1
            sparse_code_next = sparse_code - step_size * np.matmul(self.D.T, np.matmul(self.D, sparse_code) - time_series)
            if projection:
                sparse_code_next = self.projection(sparse_code_next)
            sparse_code_change = np.max(np.abs(sparse_code_next - sparse_code))
        return sparse_code_next
    
    def dictionary_learning(self, tol, max_iter, z):
        L = np.sort(np.linalg.eigvalsh(np.matmul(z, z.T)))[-1]
        step_size = 1 / L
        D_change = 1
        D_next = self.D
        count = 0
        while D_change > tol and count < max_iter:
            self.D = D_next
            count += 1
            self.D = self.D - step_size * (np.matmul(np.matmul(self.D, z) - self.X, z.T))
            D_next = self.projection_D()
            D_change = np.max(np.abs(D_next - self.D))
        return self.D 
    
    def alternated_resolution(self, tol, max_iter, iter):
        for i in range(iter):
            print(i)
            Z = self.sparse_coding(tol, max_iter)
            self.dictionary_learning(tol, max_iter, Z)
        return self.D, Z

In [66]:
model_data = data_maker(time_series, "classification_pm")
X_re_p, X_test_p, X_re_m, X_test_m = model_data.classifiction_pm("return", 50, 1, 100)

In [67]:
model_m = alternated_resolution(X_re_m.astype("float"), K=150, K0=15)
_, code_m = model_m.alternated_resolution(1e-6, 1000, 100)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [73]:
code_m[:,4]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     