In [2]:
import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 10000)

In [3]:
from datetime import datetime

def cpi_dataframe():
    cpi_data = pd.read_csv("../data/external/cpi/CPI_2018-2020.csv")
    cpi_data = cpi_data.rename(columns={"TIME": "Time", "Value": "CPI"})
    cpi_data = cpi_data[{"Time", "CPI"}]
    cpi_data["Time"] = cpi_data["Time"].transform(lambda time : datetime.strptime(time, "%Y-%m").strftime("%Y-%m-%d %H:%M:%S"))
    
    time_frame = pd.date_range(start="2018-01-01 22:00:00", freq="1T", end="2020-12-31 21:59:00")
    time_frame = pd.DataFrame(time_frame, columns=["Time"])
    time_frame["Time"] = time_frame["Time"].dt.strftime("%Y-%m-%d %H:%M:%S")
    
    can_cpi = cpi_data[1:37]
    jpy_cpi = cpi_data[38:74]
    chf_cpi = cpi_data[75:111]
    gbp_cpi = cpi_data[112:148]
    usd_cpi = cpi_data[149:185]
    eur_cpi = cpi_data[223:259]
    
    australia_cpi_data = pd.read_csv("../data/external/cpi/AUD_CPI_2018-2020.csv")
    new_zealand_cpi_data = pd.read_csv("../data/external/cpi/NZD_CPI_2018-2020.csv")
    aud_cpi = australia_cpi_data[25:37]
    nzd_cpi = new_zealand_cpi_data[4:]
    nzd_cpi = nzd_cpi.rename(columns={"Year ended": "Time", "Percentage change": "CPI"})
    aud_cpi = aud_cpi.rename(columns={"Quarter": "Time", "Change from previous quarter (%)": "CPI"})
    aud_cpi["Time"] = aud_cpi["Time"].transform(lambda time : datetime.strptime(time[:4] + "20" + time[4:], "%b-%Y").strftime("%Y-%m-%d %H:%M:%S"))
    nzd_cpi["Time"] = nzd_cpi["Time"].transform(lambda time : datetime.strptime(time[:4] + "20" + time[4:], "%b-%Y").strftime("%Y-%m-%d %H:%M:%S"))
    
    create_cpi_csv(aud_cpi, time_frame, aud_cpi["CPI"].iloc[0]).to_csv("../data/processed/cpi/aud_cpi_processed.csv", index=False)
    create_cpi_csv(can_cpi, time_frame, can_cpi["CPI"].iloc[0]).to_csv("../data/processed/cpi/can_cpi_processed.csv", index=False)
    create_cpi_csv(jpy_cpi, time_frame, jpy_cpi["CPI"].iloc[0]).to_csv("../data/processed/cpi/jpy_cpi_processed.csv", index=False)
    create_cpi_csv(chf_cpi, time_frame, chf_cpi["CPI"].iloc[0]).to_csv("../data/processed/cpi/chf_cpi_processed.csv", index=False)
    create_cpi_csv(gbp_cpi, time_frame, gbp_cpi["CPI"].iloc[0]).to_csv("../data/processed/cpi/gbp_cpi_processed.csv", index=False)
    create_cpi_csv(usd_cpi, time_frame, usd_cpi["CPI"].iloc[0]).to_csv("../data/processed/cpi/usd_cpi_processed.csv", index=False)
    create_cpi_csv(eur_cpi, time_frame, eur_cpi["CPI"].iloc[0]).to_csv("../data/processed/cpi/eur_cpi_processed.csv", index=False)
    create_cpi_csv(nzd_cpi, time_frame, nzd_cpi["CPI"].iloc[0]).to_csv("../data/processed/cpi/nzd_cpi_processed.csv", index=False)
    
def create_cpi_csv(pair, time, initial):
    pair = time.merge(pair, how="left", on="Time")
    pair.iloc[0, pair.columns.get_loc("CPI")] = initial
    pair = pair.fillna(method="ffill")
    return pair

cpi_dataframe()