In [None]:
%matplotlib inline

import pandas as pd

## Read Data

In [None]:
rates = pd.read_csv(
    'input/Export_GBP.csv', sep=';', header=None,
    names=['month_start', 'month_end', 'exchange', 'rate'
])
rates.shape

In [None]:
rates.head()

## Fix Types

In [None]:
rates.month_start = pd.to_datetime(rates.month_start, dayfirst=True)
rates.drop('month_end', axis=1, inplace=True)

In [None]:
rates.plot.line('month_start', 'rate')

In [None]:
rates.month_start.unique().shape

In [None]:
rates.month_start.describe()

## Trim

Exclude the current month (incomplete) and the pre-EURO data, which is before our datasets start.

In [None]:
rates = rates[rates.month_start < '2018-08-01'].copy()

In [None]:
rates.exchange.unique()

In [None]:
rates[rates.exchange == 'ECU/GBP'].month_start.max()

In [None]:
rates = rates[rates.exchange == 'EUR/GBP'].copy()
rates.drop('exchange', axis=1, inplace=True)
rates.head()

## Extrapolate

In [None]:
rates.head()

In [None]:
mean_rate = rates[rates.month_start >= '2016-07-01'].rate.mean()
mean_rate

In [None]:
future_month_starts = pd.to_datetime([
    '{:4d}-{:02d}-01'.format(year, month)
    for year in range(2018, 2026)
    for month in range(1, 13)
])
future_month_starts = future_month_starts[future_month_starts > rates.month_start.max()]
future_month_starts

In [None]:
future_rates = pd.DataFrame({
    'month_start': future_month_starts,
    'rate': mean_rate
})
future_rates.head()

In [None]:
all_rates = pd.concat([rates, future_rates]).sort_values('month_start')
all_rates.shape

In [None]:
all_rates.head()

In [None]:
all_rates.tail()

In [None]:
all_rates.plot.line('month_start', 'rate')

In [None]:
all_rates.to_pickle('output/exchange_rates.pkl.gz')