# TS Processing

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from importlib import reload

In [2]:
import TimeSeriesGenerator as TSG
import TimeSeries as TS
import DWMYTransformator as T
import DWMYGroupTransformator as GT
import DWMYDFTransformator as DFT

import TSToIndexTransformator as TST
import PipelineTSToIndex as PI

In [3]:
tsg = TSG.TimeSeriesGenerator()
ts = TS.TimeSeries()
t = T.DWMYTransformator()
gt = GT.DWMYGroupTransformator()
dft = DFT.DWMYDFTransformator()

## Data Generator

def generate_df(ts_lengths=[20, 30, 40, 50], int_data=True):
    df = pd.DataFrame()
    id_list = []
    data_list = []
    i = 0
    for n in ts_lengths:
        id_list.append(i)
        i = i + 1
        data_list.append(tsg.generate_day_data(int_data=False, n=n))

    df["ID"] = id_list
    df["DATA"] = data_list
    
    return df

In [4]:
df_all = tsg.generate_sample_ts_df()
df_all

Unnamed: 0,ID,DATA
0,10,"[(2019-08-09 00:00:00, 1), (2019-09-09 00:00:0..."
1,20,"[(2019-08-09 00:00:00, 1), (2019-09-09 00:00:0..."
2,30,"[(2019-08-09 00:00:00, 1), (2019-09-09 00:00:0..."
3,40,"[(2019-08-09 00:00:00, 1), (2019-09-09 00:00:0..."


In [5]:
ts_list = df_all.iloc[0,1]

## One TS

### Metaparametry

In [6]:
atr_names = ["TIME", "VALUE"]
time_type = "d"
group_fun = sum

### Postupně

#### TS jako list

In [7]:
ts_list = df_all.iloc[0,1]
ts_list[0:2]

[(datetime.datetime(2019, 8, 9, 0, 0), 1),
 (datetime.datetime(2019, 9, 9, 0, 0), 1)]

#### Vytvořím si z toho DF

In [8]:
ts.set_ts_list(ts_list, atr_names)
df = ts.get_ts_df()
df.head(2)

Unnamed: 0,TIME,VALUE
0,2019-08-01,1
1,2019-08-01,1


#### Transformuji čas

In [9]:
df = dft.fit(df, time_type, atr_names)
df.head()

Unnamed: 0,VALUE,TIME_d
0,1,20190801
1,1,20190801
2,1,20190805
3,1,20190809
4,1,20190813


#### Grouping

In [10]:
df_out = gt.fit(df, time_type, atr_names, group_fun)
df_out.head()

Unnamed: 0,TIME_d,VALUE
0,20190801,2
1,20190805,1
2,20190809,1
3,20190813,2
4,20190822,1


#### Do indexu

In [11]:
reload(TST)
tst = TST.TSToIndexTransformator()

In [12]:
a = tst.fitpredict(df_out["VALUE"].array)
a

0.6643574057122259

### Pipeline

In [13]:
reload(PI)
p = PI.PipelineTSToIndex()

In [14]:
print(p._create_index_for_ts_list(ts_list, time_type, atr_names, group_fun))

0.6643574057122259


## TS DF

In [15]:
df_all

Unnamed: 0,ID,DATA
0,10,"[(2019-08-09 00:00:00, 1), (2019-09-09 00:00:0..."
1,20,"[(2019-08-09 00:00:00, 1), (2019-09-09 00:00:0..."
2,30,"[(2019-08-09 00:00:00, 1), (2019-09-09 00:00:0..."
3,40,"[(2019-08-09 00:00:00, 1), (2019-09-09 00:00:0..."


In [16]:
reload(PI)
p = PI.PipelineTSToIndex()

In [17]:
atr_names_df = ["ID", "DATA"]
atr_names_ts = ["TIME", "VALUE"]

In [18]:
entropy = p.execute(df_all, time_type, atr_names_df, atr_names_ts, group_fun, fun_type="entropy")

In [19]:
entropy

Unnamed: 0,DATA,ID
0,0.664357,10
1,0.515273,20
2,0.63955,30
3,0.621226,40
