In [None]:
import numpy as np
import pandas as pd
import ast
import matplotlib.pyplot as plt
import pmdarima as pm
from pmdarima.arima.stationarity import ADFTest
from pmdarima.arima import ndiffs
from sklearn.metrics import mean_squared_error
from pmdarima.metrics import smape
from statsmodels.tsa.arima.model import ARIMA

import multiprocessing as mp
from datetime import datetime as dtm
from typing import Optional, Sequence
import itertools
from sklearn.model_selection import ParameterGrid as PG
import joblib
from sklearn.feature_selection import mutual_info_regression as MIR
import time

In [None]:
def smape(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    numerator = np.abs(y_pred - y_true)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2

    return np.mean(numerator / denominator) * 100

def get_important_features(df, feature_space, k, thresh):
    submarket_features_dict = {}
    y = df['real_hedonic_rent_submarket']
    X = df[feature_space]  

    mi_scores = MIR(X, y)
    #print(mi_scores)

    features = []
    if thresh:
        mi_score_selected_index = np.where(mi_scores > thresh)[0]
        features = X.columns[mi_score_selected_index].tolist()
        #print(f"num features above mi thresh for submarket {name}: {len(features)}")

    if k:
        mi_score_selected_index = np.argsort(mi_scores)[::-1][:k]
        features = X.columns[mi_score_selected_index].tolist()

    return features

def _get_list_intersect_size(list1, list2):
    return len(set(list1) & set(list2))


def get_feature_subsets(
    feature_space,
    subset_size,
    include_features=None,
    intersect_size=1,
):
    subset_size = max(1, subset_size)
    subset_size = min(subset_size, len(feature_space))
    subset_li = []

    for k in range(subset_size, 0, -1):
        for subset in itertools.combinations(feature_space, k):
            subset = list(subset)
            if include_features is not None:
                intersect_size = min(k, intersect_size)
                _get_list_intersect_size(subset, include_features)

            subset_li.append(subset)

    return subset_li



def _process_subset(subset):
    return subset

def run_auto_arima_experiment(name, exo, group, params, ntest, min_p, max_p, min_q, max_q):
    adf_test = ADFTest(alpha=0.05)
    p_val, should_diff = adf_test.should_diff(group['real_hedonic_rent_submarket'])
    kpss_diffs = ndiffs(group['real_hedonic_rent_submarket'], alpha=0.05, test='kpss', max_d=6)
    adf_diffs = ndiffs(group['real_hedonic_rent_submarket'], alpha=0.05, test='adf', max_d=6)
    n_diffs = max(adf_diffs, kpss_diffs)
    print(name,': ',p_val,should_diff,n_diffs)

    Y_train = group['real_hedonic_rent_submarket'][:-ntest]
    Y_test = group['real_hedonic_rent_submarket'][-ntest:]
    X_train = exo[params['subset_li']].iloc[:-ntest, :]
    X_test = exo[params['subset_li']].iloc[-ntest:,:]
    auto = pm.auto_arima(Y_train, X_train, d=n_diffs, 
                     suppress_warnings=True, error_action="ignore", 
                     min_p=min_p,min_q=min_q,max_p=max_p,max_q=max_q,
                     stepwise=True, scoring=smape,
                     max_order=None, trace=True)

    model = auto
    y_pred = model.predict(ntest, X_test)

    mse = mean_squared_error(Y_test, y_pred)
    smape_ = smape(Y_test, y_pred)

    n_params = model.order[0] + model.order[1] + model.order[2] + len(params['subset_li'])
    n_obs = len(Y_train)
    aic = n_obs * np.log(mse) + 2 * n_params
    bic = n_obs * np.log(mse) + n_params * np.log(n_obs)

    model_info = {
        "order": model.order,
        "params": params,
        "y_pred": y_pred.tolist(),
        "smape": smape_,
        "mse": mse,
        "aic": aic,
        "bic": bic,
        "arima_model": model,  
    }

    
    return model_info



def run_auto_arima_pipeline(df, ntest, feature_space, k, thresh, min_p, max_p, min_q, max_q):
    results = {}  
    best_models_df = pd.DataFrame()
    
    grouped = df.groupby('research_submkt_id')
    for name, group in grouped:
        group = group.sort_values('date')
        features = get_important_features(group, feature_space, k, thresh)
        exo = group[features]
        subset_li = get_feature_subsets(
            features,  
            subset_size=len(features),
            include_features=None,
            intersect_size=1
        )

        param_vals = {
            "subset_li": subset_li
        }
        param_grid = list(PG(param_vals))
        num_params = len(param_grid)

        best_smape = float('inf')  # Initialize to a high value
        best_model_info = None  # Initialize as None

        pool = mp.Pool(processes=mp.cpu_count())
        for idx, params in enumerate(param_grid):
            print(f"training model {idx}/{num_params - 1}: {params}")

            result = pool.apply_async(
                run_auto_arima_experiment,
                kwds={
                    "name": name,
                    "exo": exo,
                    "group": group,
                    "params": params,
                    "ntest": ntest,
                    "min_p": min_p,
                    "max_p": max_p, 
                    "min_q": min_q, 
                    "max_q": max_q
                },
            )
            params = str(params)
            results[params] = result
            
        pool.close()
        pool.join()
        
        rs_df = pd.DataFrame()
        
        # Iterate over each key-value pair in 'results'
        for key, value in results.items():
            dic = value.get()
            y_pred = dic['y_pred']
            smape_dic = dic['smape']
            aic = dic['aic']
            bic = dic['bic']
            p,d,q = dic['arima_model'].order
            data = {
                'research_submkt_id': name,
                'date': group['date'].iloc[-ntest:],
                'y_test': group['real_hedonic_rent_submarket'].iloc[-ntest:],
                'y_pred': y_pred,
                'smape': smape_dic,
                'p': p,
                'd': d,
                'q': q,
                'atrributes': key,
                'aic': aic,
                'bic': bic                
            }
            rs_part_df = pd.DataFrame(data)

            
            rs_df = pd.concat([rs_df,rs_part_df])
    
        best_models_df = pd.concat([best_models_df, rs_df])
    
    grouped_ = best_models_df.groupby('research_submkt_id')
    
    rs_df_new = pd.DataFrame()
    
    for name, group in grouped_:
        grdf = group[group['smape']==group['smape'].min()]
        rs_df_new = pd.concat([rs_df_new, grdf])
    

    return rs_df_new



In [None]:
df_dal = pd.read_csv('/mnt/container1/zqiao_Workspace/link-research/ad-hoc/zq-sandbox/submkt_data/submkt_train_data/dal_submkt_train_test_data.csv',index_col=0)
df_dal.head()

In [None]:
feature_space = [
   'population_histfc','nominal_retail_sales_histfc', 
   'nominal_earnings_by_residence_histfc',
   'gdp_histfc', 'unemployment_histfc', 'employment_histfc',
   'unemployment_rate_histfc', 'labor_force_histfc',
   'manufacturing_employment_histfc', 'employment_trade_histfc',
   'employment_warehousing_histfc', 'affordability_index_histfc',
   'gdp_transp_and_dist_histfc', 'income_per_capita_histfc',
   'median_sfh_sale_price_histfc', 'tech_employment_histfc',
   'household_count_histfc', 'nominal_earnings_by_workplace_histfc',
   'nominal_proprietors_income_histfc', 'housing_completions_histfc',
   'employment_wholesale_trade_histfc', 'population_20_24_histfc',
   'population_25_29_histfc', 'population_30_34_histfc',
   'population_35_39_histfc', 'population_40_44_histfc',
   'population_45_49_histfc', 'real_retail_sales_histfc',
   'real_earnings_by_residence_histfc',
   'real_earnings_by_workplace_histfc', 'real_proprietors_income_histfc',
   'ecomm_sh', 'real_retail_sales_ex_gas',
   'real_bricks_and_mortar_retail_sales', 'real_ecommerce',
   'ecomm_footprint_adj_sales', 'exports_us', 'imports_us', 'treasury_10y',
   'spread_3m10y','real_market_level_rent','ecomm_pop',
   'ecomm^2_pop', 'weighted_pop_estimate_cryr',
   'weighted_hh_estimate_cryr','total_dock_doors', 'total_car_spaces', 'retailirsa', 'pcedg', 'pcend',
   'pcepilfe', 'rsxfs', 'isratio', 'mrtsir4423xuss', 'whlslrimsa',
   'a333rx1q020sbea']

In [None]:
start_time = time.time()
min_p = 1
max_p = 6
min_q = 1
max_q = 6
#results_dal_f4, best_model_dal_f4 = run_auto_arima_pipeline(df_dal, 24, feature_space, 3, None, min_p, max_p, min_q, max_q)
rs_6_8 = run_auto_arima_pipeline(df_dal, 24, feature_space, 8, None, min_p, max_p, min_q, max_q)

end_time = time.time()
execution_time = end_time - start_time

print(f"Execution time: {execution_time} seconds")

In [None]:
rs_6_8.to_csv('/mnt/container1/zqiao_Workspace/link-research/ad-hoc/zq-sandbox/submkt_forecsat_model/seperate_pipeline_final/arima_result_flake/dal_arima_6_8')

In [None]:
rs_6_8[rs_6_8['date']=='2021-08-01']

In [None]:
start_time = time.time()
min_p = 1
max_p = 12
min_q = 1
max_q = 12
#results_dal_f4, best_model_dal_f4 = run_auto_arima_pipeline(df_dal, 24, feature_space, 3, None, min_p, max_p, min_q, max_q)
rs_12_5 = run_auto_arima_pipeline(df_dal, 24, feature_space, 5, None, min_p, max_p, min_q, max_q)

end_time = time.time()
execution_time = end_time - start_time

print(f"Execution time: {execution_time} seconds")

In [None]:
rs_12_5.to_csv('/mnt/container1/zqiao_Workspace/link-research/ad-hoc/zq-sandbox/submkt_forecsat_model/seperate_pipeline_final/arima_result_flake/dal_arima_12_5')

In [None]:
rs_12_5[rs_12_5['date']=='2021-08-01']

In [3]:
import numpy as np
import pandas as pd
import ast
import matplotlib.pyplot as plt
import pmdarima as pm
from pmdarima.arima.stationarity import ADFTest
from pmdarima.arima import ndiffs
from sklearn.metrics import mean_squared_error
from pmdarima.metrics import smape
from statsmodels.tsa.arima.model import ARIMA

import multiprocessing as mp
from datetime import datetime as dtm
import itertools
from sklearn.model_selection import ParameterGrid as PG
import joblib
from sklearn.feature_selection import mutual_info_regression as MIR
import time
from sklearn.model_selection import TimeSeriesSplit
from pmdarima.arima import auto_arima

def smape(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    numerator = np.abs(y_pred - y_true)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2

    return np.mean(numerator / denominator) * 100

def smape_scorer(y_true, y_pred):
    return -smape(y_true, y_pred)

def get_important_features(df, feature_space, k, thresh):
    y = df['real_hedonic_rent_submarket']
    X = df[feature_space]

    mi_scores = MIR(X, y)

    features = []
    if thresh:
        mi_score_selected_index = np.where(mi_scores > thresh)[0]
        features = X.columns[mi_score_selected_index].tolist()

    if k:
        mi_score_selected_index = np.argsort(mi_scores)[::-1][:k]
        features = X.columns[mi_score_selected_index].tolist()

    return features


def _get_list_intersect_size(list1, list2):
    return len(set(list1) & set(list2))


def get_feature_subsets(
        feature_space,
        subset_size,
        include_features=None,
        intersect_size=1,
):
    subset_size = max(1, subset_size)
    subset_size = min(subset_size, len(feature_space))
    subset_li = []

    for k in range(subset_size, 0, -1):
        for subset in itertools.combinations(feature_space, k):
            subset = list(subset)
            if include_features is not None:
                intersect_size = min(k, intersect_size)
                _get_list_intersect_size(subset, include_features)

            subset_li.append(subset)

    return subset_li


def _process_subset(subset):
    return subset


def run_auto_arima_experiment(name, exo, group, params, ntest, min_p, max_p, min_q, max_q, diff):
    Y_train = group['real_hedonic_rent_submarket'][:-ntest]
    Y_test = group['real_hedonic_rent_submarket'][-ntest:]
    X_train = exo[params['subset_li']].iloc[:-ntest, :]
    X_test = exo[params['subset_li']].iloc[-ntest:, :]

    adf_test = ADFTest(alpha=0.05)
    p_val, should_diff = adf_test.should_diff(group['real_hedonic_rent_submarket'])
    kpss_diffs = ndiffs(group['real_hedonic_rent_submarket'], alpha=0.05, test='kpss', max_d=6)
    adf_diffs = ndiffs(group['real_hedonic_rent_submarket'], alpha=0.05, test='adf', max_d=6)
    n_diffs = max(adf_diffs, kpss_diffs)
    print(name, ': ', p_val, should_diff, n_diffs)

    if diff:
        auto = pm.auto_arima(Y_train, X_train, d=n_diffs,
                             suppress_warnings=True, error_action="ignore",
                             min_p=min_p, min_q=min_q, max_p=max_p, max_q=max_q,
                             stepwise=True, scoring=smape_scorer,
                             max_order=None, trace=True)
    else:
        auto = pm.auto_arima(Y_train, X_train, d=0,
                             suppress_warnings=True, error_action="ignore",
                             min_p=min_p, min_q=min_q, max_p=max_p, max_q=max_q,
                             stepwise=True, scoring=smape_scorer,
                             max_order=None, trace=True)

    model = auto
    y_pred = model.predict(ntest, X_test)
    p, d, q = model.order
    
    date = group['date'].iloc[-ntest:].tolist()
    mse_test = mean_squared_error(Y_test, y_pred)
    smape_test = smape(Y_test, y_pred)



    model_info = pd.DataFrame({
        "research_submkt_id": [name] * ntest,  # Repeat the name ntest times to match the length
        "date": date,
        "y_test": Y_test.values,
        "y_pred": y_pred,
        "mse": [mse_test] * ntest,  # Repeat the mse value ntest times to match the length
        "smape": [smape_test] * ntest,  # Repeat the smape value ntest times to match the length
        "p": [p] * ntest,  # Repeat the p value ntest times to match the length
        "d": [d] * ntest,  # Repeat the d value ntest times to match the length
        "q": [q] * ntest,  # Repeat the q value ntest times to match the length
        "best_attributes": [params['subset_li']]* ntest
    })

    return model_info


def run_auto_arima_pipeline(df, ntest, feature_space, k, thresh, min_p, max_p, min_q, max_q, diff):
    
    rs_df = pd.DataFrame()
    best_models_df = pd.DataFrame()

    grouped = df.groupby('research_submkt_id')
    for name, group in grouped:
        features = get_important_features(group, feature_space, k, thresh)
        exo = group[features]
        subset_li = get_feature_subsets(
            features,
            subset_size=len(features),
            include_features=None,
            intersect_size=1
        )

        param_vals = {
            "subset_li": subset_li
        }
        param_grid = list(PG(param_vals))
        num_params = len(param_grid)
        
        results = {}
        pool = mp.Pool(processes=mp.cpu_count())
        for idx, params in enumerate(param_grid):
            print(f"training model {idx}/{num_params - 1}: {params}")

            result = pool.apply_async(
                run_auto_arima_experiment,
                kwds={
                    "name": name,
                    "exo": exo,
                    "group": group,
                    "params": params,
                    "ntest": ntest,
                    "min_p": min_p,
                    "max_p": max_p,
                    "min_q": min_q,
                    "max_q": max_q,
                    "diff": diff
                },
            )
            params = str(params)
            results[params] = result

        pool.close()
        pool.join()
      

        for key, value in results.items():
            dic = value.get()
            date = dic['date']
            y_test = dic['y_test']
            y_pred = dic['y_pred']
            smape_test = dic['smape']
            mse_test = dic['mse']

            data = {
                'research_submkt_id': name,
                'date': date,
                'y_test': y_test,
                'y_pred': y_pred,
                'smape': smape_test,
                'mse': mse_test,
                'attributes': key
            }
            rs_part_df = pd.DataFrame(data)

            rs_df = pd.concat([rs_df, rs_part_df])

        best_models_df = pd.concat([best_models_df, rs_df])

    grouped_ = best_models_df.groupby('research_submkt_id')

    rs_df_new = pd.DataFrame()

    for name, group_ in grouped_:
        grdf = group_[group_['smape'] == group_['smape'].min()]
        rs_df_new = pd.concat([rs_df_new, grdf])

    return rs_df_new







In [4]:
feature_space = [
   'population_histfc']

df_dal = pd.read_csv(
    '/mnt/container1/zqiao_Workspace/link-research/ad-hoc/zq-sandbox/submkt_data/submkt_train_data/dal_submkt_train_test_data.csv',
    index_col=0)

df_pho = pd.read_csv(
    '/mnt/container1/zqiao_Workspace/link-research/ad-hoc/zq-sandbox/submkt_data/submkt_train_data/pho_submkt_train_test_data.csv',
    index_col=0)

min_p = 1
max_p = 2
min_q = 1
max_q = 2

# without cv
start_time = time.time()

rssss = run_auto_arima_pipeline(df_dal, 24, feature_space, 1, None, min_p, max_p, min_q, max_q,
                                                     True)

end_time = time.time()
execution_time = end_time - start_time

print(f"Execution time: {execution_time} seconds")

DAL031 :  0.99 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-2112.350, Time=0.37 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-1976.488, Time=0.05 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-2043.700, Time=0.10 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-2107.120, Time=0.14 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-2106.112, Time=0.24 sec
 ARIMA(2,2,1)(0,0,0)[0]             : AIC=-2118.411, Time=0.28 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-2118.530, Time=0.21 sec
 ARIMA(0,2,2)(0,0,0)[0]             : AIC=-2116.142, Time=0.24 sec
 ARIMA(2,2,0)(0,0,0)[0]             : AIC=-2061.509, Time=0.13 sec
 ARIMA(1,2,1)(0,0,0)[0] intercept   : AIC=-2116.527, Time=0.30 sec

Best model:  ARIMA(1,2,1)(0,0,0)[0]          
Total fit time: 2.096 seconds


  return get_prediction_index(
  return get_prediction_index(


DAL034-DAL035-DAL041 :  0.9644374565131912 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-1949.798, Time=0.30 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-1800.792, Time=0.07 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-1873.400, Time=0.09 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-1954.410, Time=0.11 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-1953.898, Time=0.17 sec
 ARIMA(0,2,2)(0,0,0)[0]             : AIC=-1953.959, Time=0.15 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-1951.629, Time=0.20 sec
 ARIMA(0,2,1)(0,0,0)[0] intercept   : AIC=-1953.105, Time=0.17 sec

Best model:  ARIMA(0,2,1)(0,0,0)[0]          
Total fit time: 1.312 seconds


  return get_prediction_index(
  return get_prediction_index(


DAL037-DAL038-DAL039-DAL040 :  0.99 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-1349.665, Time=0.29 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-1151.959, Time=0.07 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-1238.004, Time=0.04 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-1354.150, Time=0.12 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-1353.653, Time=0.18 sec
 ARIMA(0,2,2)(0,0,0)[0]             : AIC=-1353.594, Time=0.15 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-1350.542, Time=0.17 sec
 ARIMA(0,2,1)(0,0,0)[0] intercept   : AIC=-1353.167, Time=0.18 sec

Best model:  ARIMA(0,2,1)(0,0,0)[0]          
Total fit time: 1.245 seconds


  return get_prediction_index(
  return get_prediction_index(


DAL042-DAL043-DAL044-DAL045-DAL046 :  0.99 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-1906.015, Time=0.38 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-1752.929, Time=0.06 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-1825.918, Time=0.05 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-1907.583, Time=0.12 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-1909.978, Time=0.18 sec
 ARIMA(2,2,1)(0,0,0)[0]             : AIC=-1908.372, Time=0.22 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-1904.335, Time=0.16 sec
 ARIMA(0,2,2)(0,0,0)[0]             : AIC=-1909.623, Time=0.18 sec
 ARIMA(2,2,0)(0,0,0)[0]             : AIC=-1855.373, Time=0.08 sec
 ARIMA(1,2,1)(0,0,0)[0] intercept   : AIC=-1908.223, Time=0.36 sec

Best model:  ARIMA(1,2,1)(0,0,0)[0]          
Total fit time: 1.832 seconds


  return get_prediction_index(
  return get_prediction_index(


DAL047-DAL048 :  0.9093353462212644 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-2500.030, Time=0.25 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-2414.642, Time=0.07 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-2446.221, Time=0.11 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-2493.668, Time=0.13 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-2504.218, Time=0.16 sec
 ARIMA(0,2,2)(0,0,0)[0]             : AIC=-2506.266, Time=0.28 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-2466.677, Time=0.16 sec
 ARIMA(0,2,2)(0,0,0)[0] intercept   : AIC=-2505.212, Time=0.25 sec

Best model:  ARIMA(0,2,2)(0,0,0)[0]          
Total fit time: 1.457 seconds


  return get_prediction_index(
  return get_prediction_index(


DAL049 :  0.9786334588319291 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-1935.674, Time=0.39 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-1767.755, Time=0.05 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-1849.768, Time=0.04 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-1940.730, Time=0.13 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-1939.651, Time=0.24 sec
 ARIMA(0,2,2)(0,0,0)[0]             : AIC=-1939.645, Time=0.17 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=inf, Time=0.13 sec
 ARIMA(0,2,1)(0,0,0)[0] intercept   : AIC=-1939.194, Time=0.18 sec

Best model:  ARIMA(0,2,1)(0,0,0)[0]          
Total fit time: 1.376 seconds


  return get_prediction_index(
  return get_prediction_index(


DAL050-DAL051-DAL053-DAL054-DAL055 :  0.8487102033657412 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-2581.356, Time=0.44 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-2515.156, Time=0.08 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-2523.291, Time=0.06 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-2543.070, Time=0.10 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-2568.249, Time=0.18 sec
 ARIMA(2,2,1)(0,0,0)[0]             : AIC=-2580.503, Time=0.28 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-2567.148, Time=0.23 sec
 ARIMA(2,2,2)(0,0,0)[0] intercept   : AIC=-2564.200, Time=0.29 sec

Best model:  ARIMA(2,2,2)(0,0,0)[0]          
Total fit time: 1.699 seconds


  return get_prediction_index(
  return get_prediction_index(


DAL052-DAL056 :  0.9303352711339744 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-2345.027, Time=0.35 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-2212.646, Time=0.09 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-2212.471, Time=0.08 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-2252.714, Time=0.16 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-2339.155, Time=0.26 sec
 ARIMA(2,2,1)(0,0,0)[0]             : AIC=-2339.210, Time=0.24 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-2289.326, Time=0.15 sec
 ARIMA(2,2,2)(0,0,0)[0] intercept   : AIC=-2343.943, Time=0.36 sec

Best model:  ARIMA(2,2,2)(0,0,0)[0]          
Total fit time: 1.731 seconds


  return get_prediction_index(
  return get_prediction_index(


DAL057-FTW031 :  0.99 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-2222.672, Time=0.34 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-2088.841, Time=0.05 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-2154.036, Time=0.11 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-2225.600, Time=0.13 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-2226.388, Time=0.14 sec
 ARIMA(2,2,1)(0,0,0)[0]             : AIC=-2224.555, Time=0.26 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-2223.849, Time=0.18 sec
 ARIMA(0,2,2)(0,0,0)[0]             : AIC=-2226.468, Time=0.15 sec
 ARIMA(0,2,2)(0,0,0)[0] intercept   : AIC=-2223.383, Time=0.16 sec

Best model:  ARIMA(0,2,2)(0,0,0)[0]          
Total fit time: 1.565 seconds


  return get_prediction_index(
  return get_prediction_index(


FTW029 :  0.99 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-1777.157, Time=0.37 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-1610.170, Time=0.06 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-1677.416, Time=0.04 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-1778.662, Time=0.10 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-1780.349, Time=0.15 sec
 ARIMA(2,2,1)(0,0,0)[0]             : AIC=-1778.660, Time=0.17 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-1777.753, Time=0.26 sec
 ARIMA(0,2,2)(0,0,0)[0]             : AIC=-1780.498, Time=0.23 sec
 ARIMA(0,2,2)(0,0,0)[0] intercept   : AIC=-1778.671, Time=0.23 sec

Best model:  ARIMA(0,2,2)(0,0,0)[0]          
Total fit time: 1.655 seconds


  return get_prediction_index(
  return get_prediction_index(


FTW032-FTW033-FTW034-FTW039 :  0.9893613226313841 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-1949.536, Time=0.36 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-1799.250, Time=0.05 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-1863.825, Time=0.07 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-1953.081, Time=0.10 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-1953.531, Time=0.14 sec
 ARIMA(2,2,1)(0,0,0)[0]             : AIC=-1952.202, Time=0.26 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-1951.143, Time=0.15 sec
 ARIMA(0,2,2)(0,0,0)[0]             : AIC=-1953.745, Time=0.14 sec
 ARIMA(0,2,2)(0,0,0)[0] intercept   : AIC=-1949.813, Time=0.26 sec

Best model:  ARIMA(0,2,2)(0,0,0)[0]          
Total fit time: 1.565 seconds


  return get_prediction_index(
  return get_prediction_index(


FTW035-FTW036-FTW037-FTW040 :  0.9560986180750723 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-2168.953, Time=0.36 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-2024.462, Time=0.10 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-2096.582, Time=0.08 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-2172.548, Time=0.13 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-2172.930, Time=0.17 sec
 ARIMA(2,2,1)(0,0,0)[0]             : AIC=-2170.849, Time=0.15 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-2170.139, Time=0.16 sec
 ARIMA(0,2,2)(0,0,0)[0]             : AIC=-2172.920, Time=0.22 sec
 ARIMA(2,2,0)(0,0,0)[0]             : AIC=-2124.962, Time=0.11 sec
 ARIMA(1,2,1)(0,0,0)[0] intercept   : AIC=-2163.846, Time=0.14 sec

Best model:  ARIMA(1,2,1)(0,0,0)[0]          
Total fit time: 1.661 seconds


  return get_prediction_index(
  return get_prediction_index(


FTW038-FTW041-FTW042-FTW043 :  0.4928348986919163 True 2
Performing stepwise search to minimize aic
training model 0/0: {'subset_li': ['population_histfc']}
 ARIMA(2,2,2)(0,0,0)[0]             : AIC=-2701.543, Time=0.42 sec
 ARIMA(0,2,0)(0,0,0)[0]             : AIC=-2626.207, Time=0.12 sec
 ARIMA(1,2,0)(0,0,0)[0]             : AIC=-2627.637, Time=0.12 sec
 ARIMA(0,2,1)(0,0,0)[0]             : AIC=-2643.889, Time=0.17 sec
 ARIMA(1,2,2)(0,0,0)[0]             : AIC=-2695.682, Time=0.33 sec
 ARIMA(2,2,1)(0,0,0)[0]             : AIC=-2702.994, Time=0.09 sec
 ARIMA(1,2,1)(0,0,0)[0]             : AIC=-2665.877, Time=0.11 sec
 ARIMA(2,2,0)(0,0,0)[0]             : AIC=-2672.937, Time=0.16 sec
 ARIMA(2,2,1)(0,0,0)[0] intercept   : AIC=-2703.400, Time=0.27 sec
 ARIMA(1,2,1)(0,0,0)[0] intercept   : AIC=-2664.101, Time=0.09 sec
 ARIMA(2,2,0)(0,0,0)[0] intercept   : AIC=-2671.187, Time=0.11 sec
 ARIMA(2,2,2)(0,0,0)[0] intercept   : AIC=-2700.426, Time=0.36 sec
 ARIMA(1,2,0)(0,0,0)[0] intercept   : A

  return get_prediction_index(
  return get_prediction_index(


Execution time: 25.94173789024353 seconds


In [10]:
aa = rssss[rssss['research_submkt_id']=='DAL031'].iloc[24:48,:]
smape(aa['y_test'],aa['y_pred'])

6.377445448361667

In [None]:
for key, value in rs.items():
    dic = value.get()
    date = dic['p']
len(date)

In [None]:
        for key, value in results.items():
            dic = value.get()
            date = dic['date']
            y_test = dic['y_test']
            y_pred = dic['Y_pred']
            smape_test = dic['smape']
            mse_test = dic['mse']

In [None]:
grouped = df_dal.groupby('research_submkt_id')
for name, group in grouped:
    if name == 'DAL031':
        name = 'DAL031'
        exo = 

In [None]:
a = run_auto_arima_experiment(name, exo, group, params, ntest, min_p, max_p, min_q, max_q, diff):
