In [None]:
import numpy as np
import pandas as pd
import ast
import matplotlib.pyplot as plt
import multiprocessing as mp
from datetime import datetime as dtm
from typing import Optional, Sequence
from darts.dataprocessing.transformers import Scaler

from darts.metrics import mape, smape, mae

from darts.models.forecasting.varima import VARIMA
from darts.timeseries import TimeSeries as TS
from sklearn.model_selection import ParameterGrid as PG

In [None]:
def load_training_data(market_name):
    df = pd.read_csv('/home/zqiao/data_flake/imputed data/{}_train_data_new.csv'.format(market_name), index_col=0)
    df['date'] = pd.to_datetime(df['date'])
    df = df.set_index('date')
    return df

def get_data_pipeline(market_name: str = None,
                      submkt_id: Optional[Sequence[str]] = None, 
                      target = "real_hedonic_rent_submarket",
                      features: list = None,
                        target_rolling: bool = None,
                        a_shift: bool = None,
                        ntest: int = None,
                        nlag: int = None,
                       ):
    
    if market_name is None:
        market_name = 'pho'
    
    if submkt_id is None:
        submkt_id = 'PHO037'

    df = load_training_data(market_name)

    grouped_df = df.groupby('research_submkt_id')
    for submkt, submkt_group in grouped_df:
        if submkt == submkt_id:
            submkt_df = submkt_group
    
    if ntest is None:
        ntest = 12
    
    if nlag is None:
        nlag = 6
        
    if features is None:
        features = [
            "gdp_histfc",
            "manufacturing_employment_histfc",
            "real_ecommerce",
            "spread_3m10y",
            "real_retail_sales_ex_gas",
            "ecomm_pop",
            "weighted_pop_estimate_cryr",
            "weighted_hh_estimate_cryr"]
    
    target_cols = [target, "avrate"]
    pdf = submkt_df[target_cols + features].copy()
    
    if a_shift:
        pdf["avrate"] = pdf["avrate"].shift(3)
        
        for col in features:
            pdf[col] = pdf[col].shift(nlags)

    if target_rolling:
        pdf[target] = pdf[target].rolling(3).mean()
   
    pdf = pdf.dropna()
    X = TS.from_dataframe(pdf[features])
    Y = TS.from_dataframe(pdf[target_cols])
    X_train, X_test = X[:-ntest], X[-ntest:]
    Y_train, Y_test = Y[:-ntest], Y[-ntest:]
    
    
    

    return X, Y, X_train, Y_train, X_test, Y_test

In [None]:
X, Y, X_train, Y_train, X_test, Y_test = get_data_pipeline(
    market_name = 'pho',
    submkt_id = 'PHO037', 
    target = 'real_hedonic_rent_submarket',
    features = None,
    target_rolling = False,
    a_shift = False,
    ntest = 36,
    nlag = 0)

In [None]:
transformer = Scaler()
Y_train_transformed = transformer.fit_transform(Y_train)
Y_test_transformed = transformer.transform(Y_test)
series_transformed = transformer.transform(Y)

In [None]:
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from darts.models import RNNModel

my_model = RNNModel(
    model="RNN",
    hidden_dim=4,
    dropout=0,
    n_epochs=200,
    optimizer_kwargs={"lr": 1e-1},
    model_name="Submkt_RNN",
    force_reset=True,
    random_state=66,
    input_chunk_length=6,
    output_chunk_length=1,
    save_checkpoints=True,
)


my_model.fit(
    Y_train_transformed,
    future_covariates=X,
    verbose=True

)

In [None]:

# stop training when validation loss does not decrease more than 0.05 (`min_delta`) over
# a period of 5 epochs (`patience`)
my_stopper = EarlyStopping(
    monitor="val_loss",
    min_delta=0.01,
    mode='min',
)

pl_trainer_kwargs={"callbacks": [my_stopper]}

In [None]:
sma

In [None]:
Y_test_pred = my_model.predict(n=len(Y_test_transformed), future_covariates=X)
Y_test_pred = transformer.inverse_transform(Y_test_pred)

In [None]:
smape(Y_test_pred,Y_test)

In [None]:
Y_test_pred.plot()
Y_train.plot(label="actual")

In [None]:
def eval_model(model):
    pred_series = model.predict(n=12, future_covariates=X)
    plt.figure(figsize=(8, 5))
    Y_test.plot(label="actual")
    pred_series.plot(label="forecast")
    plt.title("MAPE: {:.2f}%".format(mape(pred_series, val_transformed)))
    plt.legend()


eval_model(my_model)

In [None]:
X_test_transformed

In [None]:
X_train_transformed

In [None]:
Y_train_transformed

In [None]:
Y_test_transformed

In [None]:
from darts.models import NBEATSModel
model = NBEATSModel(input_chunk_length=12,
                    output_chunk_length=6,
                    random_state=66)
model.fit(Y_train,
          past_covariates=X_train,
          epochs=10,
          verbose=True)

In [None]:
Y_pred = model.predict(series=Y_train, past_covariates=X_train, n=6)

In [None]:
Y_pred.plot()


In [None]:
Y_pred = transformer.inverse_transform(series=Y_pred)

In [None]:
Y_train.plot()

In [None]:
Y_pred.plot()
