In [None]:
import os
from pprint import pprint
import importlib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from hts.hierarchy import HierarchyTree

%matplotlib inline
%load_ext autoreload
%autoreload 2


%matplotlib inline

### pho_data

In [None]:
pho = pd.read_csv('/home/zqiao/data_flake/pho_train_data_new.csv', index_col=0)

In [None]:
pho.info()

In [None]:
pho['date'] = pd.to_datetime(pho['date'])

In [None]:
pho.columns

In [None]:
# Transform data in the format required by scikit-hts
# Level                   Node Key
#   1       Phoenix                    Dallas-Fort Worth
#   2    PHO061...(28)               DAL031 ...()

In [None]:
pho_rent = pho[['date', 'research_market', 'research_submkt_name', 'research_submkt_id', 'real_hedonic_rent_submarket', 'real_market_level_rent',
                'submkt_mkt_sf_ratio_link', 'submkt_mkt_sf_ratio_cbre']]

In [None]:
pho_rent.head(20)

In [None]:
pho_rent['submkt_id'] = (pho_rent['research_market'] + '_' + pho_rent['research_submkt_id'])

In [None]:
pho_rent.head()

In [None]:
target_var = ['real_hedonic_rent_submarket']

In [None]:
pivot_pho = pho_rent.pivot_table(index='date', columns='submkt_id', values=target_var)

In [None]:
pivot_pho.head()

In [None]:
pivot_pho.shape

In [None]:
df_mkt_rent = pho_rent.sort_values(['research_submkt_id','date'])[['date', 'real_market_level_rent']].head(len(pho_rent['date'].unique()))

In [None]:
df_mkt_rent

In [None]:
pivot_pho = pivot_pho.merge(df_mkt_rent, left_on='date', right_on='date')

In [None]:
pivot_pho.rename(columns=dict(zip(pivot_pho.columns, pho_rent['submkt_id'].unique().tolist())), inplace=True)

In [None]:
pivot_pho.head()

In [None]:
desired_columns = ['Phoenix_PHO037', 'Phoenix_PHO038', 'Phoenix_PHO039', 'Phoenix_PHO040',
                   'Phoenix_PHO041', 'Phoenix_PHO042', 'Phoenix_PHO043', 'Phoenix_PHO044',
                   'Phoenix_PHO045', 'Phoenix_PHO046', 'Phoenix_PHO047', 'Phoenix_PHO048',
                   'Phoenix_PHO049', 'Phoenix_PHO050', 'Phoenix_PHO051', 'Phoenix_PHO053',
                   'Phoenix_PHO054', 'Phoenix_PHO055', 'Phoenix_PHO056', 'Phoenix_PHO057',
                   'Phoenix_PHO058', 'Phoenix_PHO059', 'Phoenix_PHO060', 'Phoenix_PHO061',
                   'Phoenix_PHO062', 'Phoenix_PHO063', 'Phoenix_PHO064', 'Phoenix_PHO065']

# Reorder the columns
pivot_pho = pivot_pho.reindex(columns=desired_columns)

In [None]:
pivot_pho.head()

In [None]:
pivot_pho.info()

In [None]:
pivot_pho.merge(df_mkt_rent, how='left', on='date')

In [None]:
pivot_pho.head()

In [None]:
pivot_pho.to_csv('pivot_pho_ol_rent.csv')

In [None]:
pivot_pho.columns.tolist()

In [None]:
pivot_pho.head()

### varima test

In [None]:
from statsmodels.tsa.statespace.varmax import VARMAX
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


In [None]:
pivot_pho['date'] = pd.to_datetime(pivot_pho['date'])
pivot_pho.set_index('date', inplace=True)

# Separate the features and target variables
features = pivot_pho.drop('real_market_level_rent', axis=1)
target = pivot_pho['real_market_level_rent']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


In [None]:
pivot_pho.head()

In [None]:
import statsmodels.api as sm


model = sm.tsa.SARIMAX(pivot_pho.real_market_level_rent, order=(4, 0, 0), trend='c')

results = model.fit()
print(results.summary())


In [None]:
print(results.params)

In [None]:
# - Create diagnostic figures based on standardized residuals:
#   (1) time series graph
#   (2) histogram
#   (3) Q-Q plot
#   (4) correlogram
results.plot_diagnostics(0)

In [None]:
# Forecast the next 4 values
results.forecast(12)

### Hierarchy Tree

In [None]:
total = {'total': ['total']}
submkt = pho_rent.submkt_id.unique()
hier = {'total': list(submkt)}

In [None]:
tree = HierarchyTree.from_nodes(hier, pivot_pho, root='total')

In [None]:
print(tree)

### pip hts

In [None]:
pip install scikit-hts

In [None]:
pip install scikit-hts[geo]

In [None]:
pip install scikit-hts[prophet]

In [None]:
pip install scikit-hts[auto-arima]

In [None]:
pip install scikit-hts[distributed]

In [None]:
pip install scikit-hts[all]

### model

In [None]:
pip install pmdarima

In [None]:
pip install --upgrade pip

In [None]:
pip install scikit-hts

In [None]:
pip install prophet

In [None]:
from hts import HTSRegressor
from pmdarima import auto_arima

In [None]:
pivot_pho.tail()

In [None]:
train = pivot_pho.loc['2003-09-01':'2022-03-01']
test = pivot_pho.loc['2022-04-01':'2023-07-01']

In [None]:
clf = HTSRegressor(model='auto_arima')  # Choose a valid model here
model = clf.fit(train, hier)

In [None]:
pred = model.predict(steps_ahead=len(test))

In [None]:
pred

In [None]:
PLOT = ['total', 
        'Phoenix_PHO037',
        'Phoenix_PHO038', 
        'Phoenix_PHO039',
        'Phoenix_PHO040',
        'Phoenix_PHO041'
       ]

fig, ax = plt.subplots(len(PLOT), figsize=(20, 30), sharex=True)

dts = pred[12:].index

for i, group in enumerate(PLOT):
    ax[i].plot(pivot_pho[12:][group], 
            lw=1.1, 
            color='#2ecc71', 
            alpha=0.8, 
            label='Truth')
    ax[i].plot(pred[12:][group], 
            lw=1.1,
            color='#e74c3c', 
            alpha=0.8, 
            label='Prediction')
    ax[i].legend()
    ax[i].set_title('Prediction VS True series')

### Hierarchical TimeSeries Reconciliation