In [1]:
import numpy as np
import pandas as pd
import os, string

def shorten(name, join="-"):
    items = name.split("-")
    items[0] = items[0].capitalize()
    return string.join(map(lambda item: item.strip().upper() if len(item) <= 3 else item.strip(), items), join)

In [2]:
file_list = sorted([file for root, subs, files in os.walk("../data/raw/investing") for file in files if file.endswith(".csv")])

tables = {}
for file in file_list:
    name = string.join(file.replace(".csv", "").split("-")[:-1], "-")
    
    tables[name] = pd.read_csv("../data/raw/investing/{}".format(file), index_col="Datetime").sort_index()
    if "gdp" in name:
        tables[name].drop(columns="Quarter", inplace=True)
        tables[name] = tables[name].shift(-3)

kw, raw_table = tables.popitem()
raw_table.columns = ["{}-{}".format(col, shorten(kw)) for col in raw_table.columns]
raw_table.index.name = "Date"
while tables:
    try:
        kw, other = tables.popitem()
        other.columns = ["{}-{}".format(col, shorten(kw)) for col in other.columns]
        other.index.name = "Date"
        raw_table = raw_table.join(other, how="outer")
    except:
        break

raw_table.index = pd.DatetimeIndex(raw_table.index)

In [3]:
# save raw
raw_table.dropna(how="all", axis=0, inplace=True)
raw_table.columns = map(lambda col: shorten(col), raw_table.columns)
raw_table.reset_index().to_csv("../data/processed/filter-analysis/economic-indicators-raw.csv", index=False)

# save short
# TODO: use resample instead of interpolation
short_table = raw_table.copy()
short_table = short_table.loc[short_table.index>"1998-01-01",:]
short_table.columns = map(lambda col: shorten(col, " "), short_table.columns)
short_table = short_table.dropna(how="all")
short_table.reset_index().to_csv("../data/processed/filter-analysis/economic-indicators-short.csv", index=False)

# save actual for predictions
actual_table = raw_table.copy()
actual_table = actual_table.loc[actual_table.index>"1998-01-01",:]
actual_table = actual_table.filter(like="Actual-")
actual_table.columns = map(lambda col: shorten(col.replace("Actual-", ""), " "), actual_table.columns)
actual_table = actual_table.dropna(how="all")
actual_table.reset_index().to_csv("../data/processed/filter-analysis/economic-indicators-short-actual.csv", index=False)