In [None]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

path_data = "./data/trips.csv"

data = pd.read_csv(path_data, encoding="latin_1", sep=";",decimal=",")
col_melt = list(data.columns[-12:]) #last months

df_month = pd.DataFrame.from_dict(data={
    "month_name": col_melt,
    "month_value": list(range(1,13))
})

col_maintain = list(data.columns[:-12])

data_long = data.melt(id_vars=col_maintain,value_vars=col_melt,var_name="month_name")
data_long = data_long.merge(df_month, how="left",on="month_name")
def to_date(year,month):
    return pd.Timestamp(day=1, month=month, year = year)


years = data_long["Anio "].values
month_values = data_long["month_value"].values

data_long["time_stamp"] = [to_date(year,month) for year,month  in zip(years, month_values)]
data_long.drop(columns=["Anio ", "month_name", "month_value"], inplace=True)

data_region = data_long[["CUT Region Origen ", "CUT Region Destino ", "value", "time_stamp"]]
data_region = data_region.copy()

data_region.rename(columns={"CUT Region Origen ": "region_origin", "CUT Region Destino ": "region_destiny"}, inplace=True)

data_region = data_region.groupby(["region_origin", "region_destiny", "time_stamp"]).sum().reset_index()
data_region.value = np.int64(data_region.value.values)
data_region.query("value > 0", inplace=True)

In [None]:
data_region

In [None]:
for group, data_group in data_region.groupby(["region_origin", "region_destiny"]):
    pass

In [None]:
from sktime.forecasting.tbats import TBATS
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error
from sktime.utils.plotting import plot_series
import numpy as np

y= pd.Series(
    data_group.value.values,
    index= data_group.time_stamp.values)
y.index = y.index.to_period("M")

y_train, y_valid = temporal_train_test_split(y, test_size=12)
plot_series(y_train, y_valid, labels=["train", "validation"])


In [None]:
fh_valid = ForecastingHorizon(y_valid.index, is_relative=False)

params_model = {
    "use_box_cox": True,
    "use_arma_errors": True,
    "use_trend": True,
    "sp" : 12
}

forecaster_try = TBATS(**params_model)
forecaster_try.fit(y_train)
y_pred = forecaster_try.predict(fh_valid)
print(mean_absolute_percentage_error(y_valid, y_pred))

plot_series(y_train,y_valid, y_pred, labels= ["train", "valid", "valid_prediction"])

In [None]:

forecaster = TBATS(**params_model)
forecaster.fit(y)

fh = ForecastingHorizon(np.arange(1,12))
y_pred_final = forecaster.predict(fh)
y_pred_invervals = forecaster.predict_interval(fh)

plot_series(y, y_pred_final, labels = ["value", "prediction"], pred_interval=y_pred_invervals)


In [None]:
def predict_dataframe(data_grouped: pd.DataFrame, h:int = 12) -> pd.Series:
    
    y= pd.Series(
    data_grouped.value.values,
    index= data_grouped.time_stamp.values)
    
    y.index = y.index.to_period("M")
    
    
    params_model = {
    "use_box_cox": True,
    "use_arma_errors": True,
    "use_trend": True,
    "sp" : 12}
    
    forecaster = TBATS(**params_model)
    forecaster.fit(y)
    fh = ForecastingHorizon(np.arange(1,h))
    
    y_pred = forecaster.predict(fh)
    
    return y_pred
    
    



In [None]:
dict_pred = dict()

for group, data_group in data_region.groupby(["region_origin", "region_destiny"]):
    y_pred = predict_dataframe(data_group)
    dict_pred[group] = y_pred

In [None]:
import pickle as p

pred_path = "./data/preds.pkl"

with open(pred_path,"wb") as file:
    p.dump(dict_pred,file)

In [None]:
with open(pred_path, "rb") as file:
    dict_pred_pkl = p.load(file)

In [None]:
dict_df = dict()
for k,v in dict_pred_pkl.items():
    region_origin = k[0]
    region_destiny = k[1]
    df = pd.DataFrame(v)
    df["region_origin"] = region_origin
    df["region_destiny"] = region_destiny
    dict_df[k] = df

In [None]:
df_preds = pd.concat(dict_df.values())

In [None]:
df_preds.rename(columns={0: "prediction"}, inplace=True)

In [None]:
df_preds

In [None]:
df_preds_destiny = df_preds.reset_index(names=["time_stamp"]).groupby(["time_stamp","region_destiny"]).sum().reset_index()
df_preds_destiny.prediction = np.int32(df_preds_destiny.prediction.values)
df_preds_destiny.drop(columns="region_origin",inplace=True)
df_preds_destiny.sort_values(["region_destiny", "time_stamp"],inplace=True)
df_preds_destiny

In [None]:
df_destiny = data_region.groupby(["time_stamp", "region_destiny"]).sum().reset_index()
df_destiny.drop(columns=["region_origin"], inplace=True)
df_destiny["time_stamp"] = pd.DatetimeIndex(df_destiny.time_stamp).to_period("M")
df_destiny

In [None]:
df_destiny_all = pd.concat([df_destiny, df_preds_destiny])
df_destiny_all

In [None]:
for rd in range(16):
    rd = rd + 1
    df_region = df_destiny_all[df_destiny_all.region_destiny == rd]
    df_y = df_region[pd.isna(df_region.prediction)]
    y = pd.Series(df_y.value.values, index= df_y.time_stamp.values)
    df_y_pred = df_region[pd.isna(df_region.value)]
    y_pred = pd.Series(df_y_pred.prediction.values, index= df_y_pred.time_stamp.values)
    plot_series(y, y_pred, labels=["value", "prediction"], title="Travels to {} region".format(rd))

