In [1]:
import pandas as pd
import numpy as np
from tqdm import tnrange, tqdm_notebook
from fbprophet import Prophet

## Forecasting für Januar 2016

In [2]:
!ls ../output

df_antibiotics.csv                 df_tengu_imputed.csv
df_antibiotics_imputed.csv         df_tritanium.csv
df_nanite_repair_paste.csv         df_tritanium_imputed.csv
df_nanite_repair_paste_imputed.csv df_veldspar.csv
df_tengu.csv                       df_veldspar_imputed.csv


In [3]:
imputed_dfs = {
    'tritanium': '../output/df_tritanium_imputed.csv',
    'veldspar': '../output/df_veldspar_imputed.csv',
    'antibiotics': '../output/df_antibiotics_imputed.csv',
    'nanite repair paste': '../output/df_nanite_repair_paste_imputed.csv',
    'tengu': '../output/df_tengu_imputed.csv'
}

In [None]:
def days_to_predict(df, target_date="2016-01-31"):
    last_example = pd.to_datetime(df['time'].values[-1])
    last_prediction = pd.to_datetime(target_date)

    return (last_prediction - last_example).days

def rename_fix_df(df, ycol):
    return df.rename(index=str, columns={
        'time': 'ds',
        ycol: 'y'
    }).fillna(0)

def fit_product_and_station(df, station_id):
    """
    Returns
    -------
    (demand_forecast, price_forecast) : (pd.DataFrame, pd.DataFrame)
        A tuple of pandas data frames, where either or both can be None.
        In case the prediction target is completely zero, no forecasts
        are made and the respective component is returned as None.
        Consider these forecasts as zero.
    """
    df_station = df[df['stationid'] == station_id]

    assert df_station.shape[0] > 0, "no such station in the data"
    target = days_to_predict(df_station)

    # price
    if not np.isnan(df_station['dem_avg_p']).all():
        df_price = rename_fix_df(df_station[['dem_avg_p', 'time']], 'dem_avg_p')

        pp = Prophet()
        pp.fit(df_price)
        pfuture = pp.make_future_dataframe(periods=target)
        pforecast = pp.predict(pfuture)
    else:
        pforecast = None

    # demand
    if not np.isnan(df_station['demand']).all():
        df_demand = rename_fix_df(df_station[['demand', 'time']], 'demand')

        dp = Prophet()
        dp.fit(df_demand)
        dfuture = dp.make_future_dataframe(periods=target)
        dforecast = dp.predict(dfuture)
    else:
        dforecast = None

    return (dforecast, pforecast)

## Forecasts

In [None]:
from tqdm import tqdm_notebook

idx = []
fs = []

for k in tqdm_notebook(imputed_dfs.keys(), desc='products'):
    df = pd.read_csv(imputed_dfs[k])
    stations = df['stationid'].unique()
    _idx = []
    _fs = []

    for station in tqdm_notebook(stations, desc='stations'):
        _idx.append(station)
        _fs.append(fit_product_and_station(df, station))
        
    idx.append(_idx)
    fs.append(_fs)

Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run "jupyter nbextension enable --py --sys-prefix widgetsnbextension"
Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run "jupyter nbextension enable --py --sys-prefix widgetsnbextension"


In [100]:
dfd, dfp = fs[0][0]
dfd = dfd[dfd['ds'] >= pd.to_datetime("2016-01-01")]
dfp = dfp[dfp['ds'] >= pd.to_datetime("2016-01-01")]

cols = ['product', 'stationid'] \
+ ['d' + str(d).split('T')[0] for d in dfd['ds'].unique()] \
+ ['p' + str(d).split('T')[0] for d in dfp['ds'].unique()]
# print(','.join(cols))

In [111]:
!rm ../output/forecasts.csv

rm: ../output/forecasts.csv: No such file or directory


In [112]:
keys = list(imputed_dfs.keys())

OUT_PATH = '../output/forecasts.csv'
with open(OUT_PATH, 'a') as f:
    f.write(','.join(cols) + '\n')

for i in tqdm_notebook(range(len(fs)), desc='products'):
    for j in tqdm_notebook(range(len(fs[i])), desc='stations'):
        dfd, dfp = fs[i][j]
        d = [keys[i], idx[i][j]]

        if dfd is not None:
            dfd = dfd[dfd['ds'] >= pd.to_datetime("2016-01-01")]
            d += ["%.2f" % x for x in dfd['yhat'].values.tolist()]
        else:
            d += ["0.0"] * 31

        if dfp is not None:
            dfp = dfp[dfp['ds'] >= pd.to_datetime("2016-01-01")]
            d += ["%.2f" % x for x in dfp['yhat'].values.tolist()]
        else:
            d += ["0.0"] * 31

        with open(OUT_PATH, 'a') as f:
            data = dict(zip(cols, d))
            df = pd.DataFrame(data, columns=cols, index=[0])
            df.to_csv(f, index=False, header=False)

The installed widget Javascript is the wrong version.
The installed widget Javascript is the wrong version.
The installed widget Javascript is the wrong version.
The installed widget Javascript is the wrong version.
The installed widget Javascript is the wrong version.
The installed widget Javascript is the wrong version.



