# Get Descriptive Table of Multiple Series

## Dependencies

In [1]:
from src.utils.paths import *
from src.utils import csv_exporter, latex_exporter
import pandas as pd

Loading helper functions...
Loading paths...


In [2]:
# Define datasets you are interested in

In [22]:
simulated_series = ["RW", "TrendSeasRW", "WeakSARIMA", "StrongSARIMA", "SARIMAX"]
dataset_names = simulated_series + ['eurusd']
filter_variables = ["RW", "TrendSeasRW", "WeakSARIMA", "StrongSARIMA", "SARIMAX"] + ['x1', 'x2', 'x3'] + ['bid_close']

In [30]:
descriptives = pd.DataFrame()
dataset_csv = []
for i, data in enumerate(dataset_names):
    if not data.endswith(".csv"):
        dataset_csv.append(data + ".csv")

for dirpath, dirnames, files in os.walk(DATA_DIR):
    for file in files:
        if file in dataset_csv:
            df = pd.read_csv(os.path.join(dirpath, file), index_col=0)
            df = df.loc[:,[col in filter_variables for col in df.columns]]
            df.index = pd.DatetimeIndex(df.index)
            inferred_freq = pd.infer_freq(df.index)
            df.index.freq = inferred_freq
            df.index.to_period()
            if inferred_freq in ['M', 'MS']:
                fmt = "%Y-%m"
            else:  # in ['D', 'B']:
                fmt = "%Y-%m-%d"
            start = df.index.strftime(fmt)[0]
            end = df.index.strftime(fmt)[-1]
            start_end_df = pd.DataFrame({"start":[start]*len(df.columns), "end":[end]*len(df.columns)}, index=df
                                        .columns)
            transposed_summary = df.describe().T
            transposed_summary = pd.concat([start_end_df, transposed_summary, ], axis=1)
            
            descriptives = pd.concat([descriptives, transposed_summary], axis=0)
            
            descriptives.index.rename("Data", inplace=True)

# Sort
descriptives = descriptives.reindex(filter_variables)
                

In [31]:
display(descriptives)

Unnamed: 0_level_0,start,end,count,mean,std,min,25%,50%,75%,max
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
RW,2004-01,2023-12,240.0,68.215495,10.103961,40.0,61.588426,68.492171,74.799536,90.0
TrendSeasRW,2004-01,2023-12,240.0,65.212735,9.860455,40.0,59.442427,65.278618,70.045723,90.0
WeakSARIMA,2004-01,2023-12,240.0,65.434632,9.932379,40.0,59.695437,65.626215,70.117469,90.0
StrongSARIMA,2004-01,2023-12,240.0,65.665527,10.656031,40.0,58.53135,66.647129,70.413462,90.0
SARIMAX,2004-01,2023-12,240.0,66.78597,12.470082,40.0,56.642862,68.105724,75.746263,90.0
x1,2004-01,2023-12,240.0,360.268079,73.697749,200.0,308.950805,381.153821,418.607367,500.0
x2,2004-01,2023-12,240.0,57.956553,8.979728,40.0,50.990507,56.779285,64.344842,80.0
x3,2004-01,2023-12,240.0,923.1683,12.484585,900.0,914.459089,921.004521,933.86099,950.0
bid_close,2020-01-01,2024-02-06,101921.0,1.114271,0.062135,0.95386,1.07325,1.10153,1.17454,1.23462


In [32]:
csv_exporter(TABLE_DIR, descriptives)

In [33]:
caption = "Descriptive Statistics"

In [34]:
latex_exporter(descriptives, caption=caption)

'\\begin{table}\n\\caption{Descriptive Statistics}\n\\begin{tabular}{lllrrrrrrrr}\n\\toprule\n & start & end & count & mean & std & min & 25% & 50% & 75% & max \\\\\nData &  &  &  &  &  &  &  &  &  &  \\\\\n\\midrule\nRW & 2004-01 & 2023-12 & 240.000000 & 68.215495 & 10.103961 & 40.000000 & 61.588426 & 68.492171 & 74.799536 & 90.000000 \\\\\nTrendSeasRW & 2004-01 & 2023-12 & 240.000000 & 65.212735 & 9.860455 & 40.000000 & 59.442427 & 65.278618 & 70.045723 & 90.000000 \\\\\nWeakSARIMA & 2004-01 & 2023-12 & 240.000000 & 65.434632 & 9.932379 & 40.000000 & 59.695437 & 65.626215 & 70.117469 & 90.000000 \\\\\nStrongSARIMA & 2004-01 & 2023-12 & 240.000000 & 65.665527 & 10.656031 & 40.000000 & 58.531350 & 66.647129 & 70.413462 & 90.000000 \\\\\nSARIMAX & 2004-01 & 2023-12 & 240.000000 & 66.785970 & 12.470082 & 40.000000 & 56.642862 & 68.105724 & 75.746263 & 90.000000 \\\\\nx1 & 2004-01 & 2023-12 & 240.000000 & 360.268079 & 73.697749 & 200.000000 & 308.950805 & 381.153821 & 418.607367 & 500.000

In [35]:
with open(f"descriptives.tex", "w") as f:
    f.write(latex_exporter(descriptives, caption=caption))