# Get Descriptive Table of Multiple Series

## Dependencies

In [1]:
from src.utils.paths import *
from src.utils import csv_exporter, latex_exporter
import pandas as pd

Loading helper functions...
Loading paths...


In [2]:
# Define datasets you are interested in

In [3]:
dataset_names = ['noisy_simdata.csv', 'eurusd.csv']
filter_variables = ['y', 'x1', 'x2', 'x3', 'bid_close']

In [25]:
descriptives = pd.DataFrame()

for dirpath, dirnames, files in os.walk(DATA_DIR):
    for file in files:
        if file in dataset_names:
            df = pd.read_csv(os.path.join(dirpath, file), index_col=0)
            df = df.loc[:,[col in filter_variables for col in df.columns]]
            df.index = pd.DatetimeIndex(df.index)
            inferred_freq = pd.infer_freq(df.index)
            df.index.freq = inferred_freq
            df.index.to_period()
            if inferred_freq in ['M', 'MS']:
                fmt = "%Y-%m"
            else:  # in ['D', 'B']:
                fmt = "%Y-%m-%d"
            start = df.index.strftime(fmt)[0]
            end = df.index.strftime(fmt)[-1]
            start_end_df = pd.DataFrame({"start":[start]*len(df.columns), "end":[end]*len(df.columns)}, index=df
                                        .columns)
            transposed_summary = df.describe().T
            transposed_summary = pd.concat([start_end_df, transposed_summary, ], axis=1)
            
            descriptives = pd.concat([descriptives, transposed_summary], axis=0)
            
            descriptives.index.rename("Data", inplace=True)
                

In [26]:
display(descriptives)

Unnamed: 0_level_0,start,end,count,mean,std,min,25%,50%,75%,max
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
y,2004-01,2023-12,240.0,62.787547,11.356034,39.396929,55.35138,60.395866,73.004331,90.553934
x1,2004-01,2023-12,240.0,359.903917,69.397088,200.041616,303.80216,379.083912,410.068355,499.870667
x2,2004-01,2023-12,240.0,58.225344,7.809754,40.583377,54.532484,57.414637,62.259925,80.801414
x3,2004-01,2023-12,240.0,925.39593,12.362736,899.510058,917.52724,922.147737,934.202909,949.660801
bid_close,2020-01-01,2024-02-06,101921.0,1.114271,0.062135,0.95386,1.07325,1.10153,1.17454,1.23462


In [27]:
csv_exporter(TABLE_DIR, descriptives)

In [28]:
caption = "Descriptive Statistics"

In [29]:
latex_exporter(descriptives, caption=caption)

'\\begin{table}\n\\caption{Descriptive Statistics}\n\\begin{tabular}{lllrrrrrrrr}\n\\toprule\n & start & end & count & mean & std & min & 25% & 50% & 75% & max \\\\\nData &  &  &  &  &  &  &  &  &  &  \\\\\n\\midrule\ny & 2004-01 & 2023-12 & 240.000000 & 62.787547 & 11.356034 & 39.396929 & 55.351380 & 60.395866 & 73.004331 & 90.553934 \\\\\nx1 & 2004-01 & 2023-12 & 240.000000 & 359.903917 & 69.397088 & 200.041616 & 303.802160 & 379.083912 & 410.068355 & 499.870667 \\\\\nx2 & 2004-01 & 2023-12 & 240.000000 & 58.225344 & 7.809754 & 40.583377 & 54.532484 & 57.414637 & 62.259925 & 80.801414 \\\\\nx3 & 2004-01 & 2023-12 & 240.000000 & 925.395930 & 12.362736 & 899.510058 & 917.527240 & 922.147737 & 934.202909 & 949.660801 \\\\\nbid_close & 2020-01-01 & 2024-02-06 & 101921.000000 & 1.114271 & 0.062135 & 0.953860 & 1.073250 & 1.101530 & 1.174540 & 1.234620 \\\\\n\\bottomrule\n\\end{tabular}\n\\end{table}\n'

In [30]:
with open(f"descriptives.tex", "w") as f:
    f.write(latex_exporter(descriptives, caption=caption))