In [1]:
import numpy as np
import pandas as pd
import os, string

def shorten(name):
    items = name.split(" ")
    items[0] = items[0].capitalize()
    items = [item.upper() if len(item) <= 3 else item for item in items]
    return string.join(items, " ")

In [2]:
file_list = sorted([file for root, subs, files in os.walk("../data/raw/investing") for file in files if file.endswith(".csv")])

tables = {}
for file in file_list:
    tables[string.join(file.replace(".csv", "").split("-")[:-1], "-")] = pd.read_csv("../data/raw/investing/{}".format(file), index_col="Datetime").sort_index()

kw, raw_table = tables.popitem()
raw_table.columns = ["{}-{}".format(col, shorten(kw)) for col in raw_table.columns]
raw_table.index.name = "Date"
while tables:
    try:
        kw, other = tables.popitem()
        other.columns = ["{}-{}".format(col, shorten(kw)) for col in other.columns]
        other.index.name = "Date"
        raw_table = raw_table.join(other, how="outer")
    except:
        break

raw_table.index = pd.DatetimeIndex(raw_table.index)

In [3]:
# save raw
raw_table.dropna(how="all", axis=0, inplace=True)
raw_table.reset_index().to_csv("../data/processed/filter-analysis/economic-indicators-raw.csv", index=False)

# save short
ncol = "Quarter-GDP"
icol = list(raw_table.columns).index(ncol)
xcol = raw_table[ncol]
short_table = raw_table.drop(columns=ncol)
short_table = short_table.loc[short_table.index>"1998-01-01",:]
short_table = short_table.dropna(how="all")
short_table = short_table.interpolate(method="time")
short_table.insert(loc=icol, column=ncol, value=xcol)
short_table.reset_index().to_csv("../data/processed/filter-analysis/economic-indicators-short.csv", index=False)

# save actual for predictions
ncol = "Quarter-GDP"
icol = list(raw_table.columns).index(ncol)
xcol = raw_table[ncol]
actual_table = raw_table.drop(columns=ncol)
actual_table = actual_table.loc[actual_table.index>"1998-01-01",:]
actual_table = actual_table.filter(like="Actual")
actual_table.columns = [shorten(col.replace("Actual-", "")) for col in actual_table.columns]
actual_table = actual_table.dropna(how="all")
actual_table = actual_table.interpolate(method="time")
actual_table.insert(loc=0, column="Quarter-GDP", value=raw_table.get("Quarter-GDP"))
actual_table.reset_index().to_csv("../data/processed/filter-analysis/economic-indicators-short-actual.csv", index=False)