In [1]:
import pandas as pd
import numpy as np
import csv

# Convert prediction into quantile

In [2]:
def convert_prediction_to_quantile(df,option):
  pop = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/population.csv')
  pop['fips'] = pop['fips'].astype(str).str.zfill(2)
  df = df.merge(pop, on='fips', how='inner')
  if option == 0:
    df['Prediction_1w'] = (df['Prediction_1w'] * df['population'] / 100000).round().astype(int)
    df['Prediction_2w'] = (df['Prediction_2w'] * df['population'] / 100000).round().astype(int)
    df['Prediction_3w'] = (df['Prediction_3w'] * df['population'] / 100000).round().astype(int)
    df['Prediction_4w'] = (df['Prediction_4w'] * df['population'] / 100000).round().astype(int)
    df.drop(columns=['population'], inplace=True)
  result = df
  quantiles = np.array([0.01, 0.025] + list(np.arange(0.05, 0.95+0.05, 0.05)) + [0.975, 0.99])
  quantile_results = []  # Initialize an empty list to store the data frames

  grouped = df.groupby(['fips', 'Week_end'])

  for name, group in grouped:
      fips, week_end = name
      for i, prediction_column in enumerate(['Prediction_1w', 'Prediction_2w', 'Prediction_3w', 'Prediction_4w'], start=0):
          values = group[prediction_column].values
          quantile_values = np.percentile(values, quantiles * 100)
          # Create a DataFrame and append it to the list
          quantile_results.append(pd.DataFrame({
              'fips': [fips] * len(quantiles),
              'Week_end': [week_end] * len(quantiles),
              'horizon': [i] * len(quantiles),
              'Quantile': quantiles,
              'Value': quantile_values
          }))

  # Concatenate all the data frames in the list into a single data frame
  quantile_result = pd.concat(quantile_results, ignore_index=True)
  quantile_result.reset_index(drop=True, inplace=True)
  quantile_result['Week_end'] = quantile_result['Week_end'] + pd.DateOffset(days=7)
  quantile_result1 = quantile_result.copy()
  quantile_result1.rename(columns={
      'fips': 'location',
      'Week_end': 'reference_date',
      'horizon': 'horizon',
      'Quantile': 'output_type_id',
      'Value': 'value'
  }, inplace=True)


  quantile_result1['target'] = 'wk inc flu hosp'

  quantile_result1['horizon'] = quantile_result['horizon'].astype(int)  # Convert 'horizon' to integer if it's not already

  quantile_result1['target_end_date'] = quantile_result1['reference_date'] + quantile_result1['horizon'] * pd.to_timedelta(7, unit='D')
  quantile_result1['location'] = quantile_result1['location'].astype(str).str.zfill(2)
  quantile_result1['location'] = quantile_result1['location'].astype(str)
  quantile_result1['output_type'] = 'quantile'
  quantile_result1['value'] = np.where(quantile_result1['value'] < 0, 0, quantile_result1['value'])
  quantile_result1=quantile_result1[['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id', 'value']]
  return quantile_result1

In [3]:
def convert_prediction_to_quantile_adm(df):
  quantiles = np.array([0.01, 0.025] + list(np.arange(0.05, 0.95+0.05, 0.05)) + [0.975, 0.99])
  quantile_results = []  # Initialize an empty list to store the data frames

  grouped = df.groupby(['fips', 'Week_end'])

  for name, group in grouped:
      fips, week_end = name
      for i, prediction_column in enumerate(['Prediction_1w', 'Prediction_2w', 'Prediction_3w', 'Prediction_4w'], start=0):
          values = group[prediction_column].values
          quantile_values = np.percentile(values, quantiles * 100)
          # Create a DataFrame and append it to the list
          quantile_results.append(pd.DataFrame({
              'fips': [fips] * len(quantiles),
              'Week_end': [week_end] * len(quantiles),
              'horizon': [i] * len(quantiles),
              'Quantile': quantiles,
              'Value': quantile_values
          }))

  # Concatenate all the data frames in the list into a single data frame
  quantile_result = pd.concat(quantile_results, ignore_index=True)

  quantile_result['Week_end'] = pd.to_datetime(quantile_result['Week_end']) + pd.DateOffset(days=7)
  quantile_result1 = quantile_result.copy()
  quantile_result1.rename(columns={
      'fips': 'location',
      'Week_end': 'reference_date',
      'horizon': 'horizon',
      'Quantile': 'output_type_id',
      'Value': 'value'
  }, inplace=True)


  quantile_result1['target'] = 'wk inc flu hosp'

  quantile_result1['horizon'] = quantile_result['horizon'].astype(int)  # Convert 'horizon' to integer if it's not already

  quantile_result1['target_end_date'] = quantile_result1['reference_date'] + quantile_result1['horizon'] * pd.to_timedelta(7, unit='D')
  quantile_result1['location'] = quantile_result1['location'].astype(str).str.zfill(2)
  quantile_result1['location'] = quantile_result1['location'].astype(str)
  quantile_result1['output_type'] = 'quantile'
  quantile_result1['value'] = np.where(quantile_result1['value'] < 0, 0, quantile_result1['value'])
  quantile_result1=quantile_result1[['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id', 'value']]
  return quantile_result1

### Increase the interval

In [4]:

import math

def adjust_quantile_ranges_inplace(result_df):
    # Define the quantiles and adjustment factors
    quantiles = np.array([0.01, 0.025] + list(np.arange(0.05, 0.95 + 0.05, 0.05)) + [0.975, 0.99])
    adjustment_factors = {
        0: 2.0,
        1: 2.0,
        2: 2.0,
        3: 2.0
    }

    # Copy the DataFrame to avoid modifying the original
    df_copy = result_df.copy()
    df_copy['output_type_id'] = df_copy['output_type_id'].astype(float)

    # Calculate the adjustment factors for each horizon
    df_copy['adjustment_factor'] = df_copy['horizon'].map(adjustment_factors).fillna(1)

    # Get the median values for each location and horizon
    medians = df_copy[df_copy['output_type_id'] == 0.5].set_index(['location', 'horizon'])['value']


    df_copy = df_copy.join(medians, on=['location', 'horizon'], rsuffix='_median')
    df_copy['adjusted_value'] = df_copy['value']


    condition = df_copy['output_type_id'] != 0.5
    df_copy.loc[condition, 'adjusted_value'] = df_copy.loc[condition].apply(
        lambda row: row['value_median'] + (row['value'] - row['value_median']) * row['adjustment_factor'], axis=1
    )


    df_copy['value'] = df_copy['adjusted_value']


    df_copy.drop(columns=['adjustment_factor', 'value_median', 'adjusted_value'], inplace=True)

    df_copy['value'] = np.where(df_copy['value'] < 0, 0, df_copy['value'])
    return df_copy




# Plot

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta
def plot_quantile(quantile_result1):
  hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/trainingdata.csv')


  quantile_result1['location'] = quantile_result1['location'].astype(str).str.zfill(2)
  quantile_result2 = quantile_result1
  hist['Week_end'] = pd.to_datetime(hist['Week_end'])
  hist['fips'] = hist['fips'].astype(str).str.zfill(2)
  quantile_result2['target_end_date'] = pd.to_datetime(quantile_result2['target_end_date'])
  quantile_result2['reference_date'] = pd.to_datetime(quantile_result2['reference_date'])
  quantiles = [0.025, 0.5, 0.975]
  quantile_data = quantile_result2[quantile_result2['output_type_id'].isin(quantiles)]
  end_date = pd.to_datetime(quantile_result2['reference_date'].iloc[0]) + timedelta(weeks=3)
  filtered_hist = hist[(hist['Week_end'] >= '2023-09-01') & (hist['Week_end'] <= end_date)]
  filtered_quantile_data = quantile_data

  unique_states = quantile_result1['location'].unique()

  for state in unique_states:
      state_hist_data = filtered_hist[filtered_hist['fips'] == state]
      state_quantile_data = filtered_quantile_data[filtered_quantile_data['location'] == state]

      plt.figure(figsize=(12, 6))

      plt.plot(state_hist_data['Week_end'], state_hist_data['total_admissions'], marker='o', linestyle='-', label=f'{state} - Historical')

      for q in quantiles:

          q_data = state_quantile_data[state_quantile_data['output_type_id'] == q]
          plt.plot(q_data['target_end_date'], q_data['value'], marker='x', linestyle='--', label=f'{state} - Quantile {q}')

          # Fill the area between 0.025 and 0.975 quantiles with color
          if q == 0.025:
              lower_q_data = q_data
          else:
              upper_q_data = q_data

      plt.fill_between(lower_q_data['target_end_date'], lower_q_data['value'], upper_q_data['value'], color='gray', alpha=0.5)

      plt.title(f'Total Admissions for {state}')
      plt.xlabel('End Date')
      plt.ylabel('Total Admissions')
      plt.legend()
      plt.grid(True)
      plt.xlim(state_hist_data['Week_end'].min(), state_quantile_data['target_end_date'].max())
      plt.tight_layout()
      plt.show()


In [6]:
df = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/Flusight Ensemble/2024-02-17-FluSight-ensemble.csv')
df = df[(df['location'] == '39') & (df['target'] =='wk inc flu hosp')]
df['reference_date'] = pd.to_datetime(df['reference_date'])
df['target_end_date'] = pd.to_datetime(df['target_end_date'])
df['output_type_id'] = df['output_type_id'].astype(float)
plot_quantile(df)

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Flu Forecasting/Flusight Ensemble/2024-02-17-FluSight-ensemble.csv'

# Calculate MAE

In [26]:
def mape_cal(quantile_result1, option):
    if option == 0:
        hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smootheddata.csv')
        tgt = 'total_admissions'
    else:
        hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smoothedrate.csv')
        tgt = 'rate'

    hist['Week_end'] = pd.to_datetime(hist['Week_end'])
    hist['fips'] = hist['fips'].astype(str).str.zfill(2)
    quantile_result1['location'] = quantile_result1['location'].astype(str).str.zfill(2)
    quantile_result1['reference_date'] = pd.to_datetime(quantile_result1['reference_date'])
    quantile_result1['target_end_date'] = pd.to_datetime(quantile_result1['target_end_date'])

    desired_dates = quantile_result1['target_end_date'].unique()
    hist = hist[hist['Week_end'].isin(desired_dates)]
    hist = hist[['fips', 'Week_end', tgt]]

    regionresult = quantile_result1[quantile_result1['output_type_id'] == 0.500]
    merged_df = pd.merge(hist, regionresult, left_on=['fips', 'Week_end'], right_on=['location', 'target_end_date'])

    # Handle cases where actual values are zero
    merged_df = merged_df[merged_df[tgt] != 0]

    # Calculate the percentage error and MAPE
    merged_df['percentage_error'] = abs((merged_df[tgt] - merged_df['value']) / merged_df[tgt]) * 100
    mape = merged_df['percentage_error'].mean()
    mape_by_horizon = merged_df.groupby('horizon')['percentage_error'].mean()

    # Output the MAPE and return
    merged_df1 = merged_df[['reference_date', 'location', 'horizon', 'percentage_error']]
    print(f'Mean Absolute Percentage Error (MAPE): {mape}')

    return merged_df1, mape, mape_by_horizon


In [9]:
def mae_cal(quantile_result1,option):
  if option == 0:
    hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smootheddata.csv')
    tgt = 'total_admissions'
  else:
    hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smoothedrate.csv')
    tgt = 'rate'
  hist['Week_end'] = pd.to_datetime(hist['Week_end'])
  hist['fips'] =hist['fips'].astype(str).str.zfill(2)
  quantile_result1['location'] = quantile_result1['location'].astype(str).str.zfill(2)
  quantile_result1['reference_date'] = pd.to_datetime(quantile_result1['reference_date'])
  quantile_result1['target_end_date'] = pd.to_datetime(quantile_result1['target_end_date'])
  desired_dates = quantile_result1['target_end_date'].unique()
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[['fips','Week_end',tgt]]
  regionresult = quantile_result1[quantile_result1['output_type_id']==0.500]
  merged_df = pd.merge(hist, regionresult, left_on=['fips', 'Week_end'], right_on=['location', 'target_end_date'])

  # Calculate the absolute difference between actual and predicted total admissions
  merged_df['absolute_difference'] = abs(merged_df[tgt] - merged_df['value'])

  # Calculate the Mean Absolute Error (MAE)
  mae = merged_df['absolute_difference'].mean()
  mae_by_horizon = merged_df.groupby('horizon')['absolute_difference'].mean()
  # Print the MAE
  merged_df1 = merged_df[['reference_date','location','horizon','absolute_difference']]
  print(f'Mean Absolute Error (MAE): {mae}')
  return merged_df1,mae,mae_by_horizon

In [10]:
def mae_cal1(quantile_result1,option,o):
  if option == 0:
    hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smootheddata.csv')
    tgt = 'total_admissions'
  else:
    hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smoothedrate.csv')
    tgt = 'rate'
  hist['Week_end'] = pd.to_datetime(hist['Week_end'])
  hist['fips'] =hist['fips'].astype(str).str.zfill(2)
  quantile_result1['location'] = quantile_result1['location'].astype(str).str.zfill(2)
  quantile_result1['reference_date'] = pd.to_datetime(quantile_result1['reference_date'])
  quantile_result1['target_end_date'] = pd.to_datetime(quantile_result1['target_end_date'])
  desired_dates = quantile_result1['target_end_date'].unique()
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[['fips','Week_end',tgt]]
  regionresult = quantile_result1[quantile_result1['output_type_id']==0.500]
  merged_df = pd.merge(hist, regionresult, left_on=['fips', 'Week_end'], right_on=['location', 'target_end_date'])

  # Calculate the absolute difference between actual and predicted total admissions
  merged_df['absolute_difference'] = abs(merged_df[tgt] - merged_df['value'])

  # Calculate the Mean Absolute Error (MAE)
  mae = merged_df['absolute_difference'].mean()
  mae_by_horizon = merged_df.groupby('horizon')['absolute_difference'].mean()
  merged_df1 = merged_df[['reference_date','location','horizon','absolute_difference']]
  # Print the MAE
  if o ==1:
    print(f'Mean Absolute Error (MAE): {mae}')
  return merged_df1,mae,mae_by_horizon

In [11]:
def mae_cal2(quantile_result1,desired_dates,option,o):
  if option == 0:
    hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smootheddata.csv')
    tgt = 'total_admissions'
  else:
    hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smoothedrate.csv')
    tgt = 'rate'
  hist['Week_end'] = pd.to_datetime(hist['Week_end'])
  hist['fips'] =hist['fips'].astype(str).str.zfill(2)
  quantile_result1['location'] = quantile_result1['location'].astype(str).str.zfill(2)
  quantile_result1['reference_date'] = pd.to_datetime(quantile_result1['reference_date'])
  quantile_result1['target_end_date'] = pd.to_datetime(quantile_result1['target_end_date'])
  #desired_dates = quantile_result1['target_end_date'].unique()
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[['fips','Week_end',tgt]]
  regionresult = quantile_result1[quantile_result1['output_type_id']==0.500]
  merged_df = pd.merge(hist, regionresult, left_on=['fips', 'Week_end'], right_on=['location', 'target_end_date'])

  # Calculate the absolute difference between actual and predicted total admissions
  merged_df['absolute_difference'] = abs(merged_df[tgt] - merged_df['value'])

  # Calculate the Mean Absolute Error (MAE)
  mae = merged_df['absolute_difference'].mean()
  mae_by_horizon = merged_df.groupby('horizon')['absolute_difference'].mean()
  merged_df1 = merged_df[['reference_date','location','horizon','absolute_difference']]
  # Print the MAE
  if o ==1:
    print(f'Mean Absolute Error (MAE): {mae}')
  return merged_df1,mae,mae_by_horizon

In [12]:
import numpy as np
'''
def weighted_interval_score(quantiles, values, actual_value):
    """
    Compute the weighted interval score (WIS) for forecast evaluation.

    Parameters:
    - quantiles (list or np.array): Quantiles at which forecasts were made (e.g., [0.1, 0.5, 0.9]).
    - values (list or np.array): Forecasted values corresponding to the provided quantiles.
    - actual_value (float): The actual observed value.

    Returns:
    - float: The computed weighted interval score.
    """
    if np.isnan(actual_value):
        return np.nan
    not_nan_indices = ~np.isnan(quantiles)
    quantiles = np.array(quantiles)[not_nan_indices]
    values = np.array(values)[not_nan_indices]


    try:
        median_index = np.where(quantiles == 0.5)[0][0]
        median_forecast = values[median_index]
    except IndexError:
        return np.nan

    quantile_losses = []
    for q, v in zip(quantiles, values):
        loss = 2 * max(q * (actual_value - v), (1 - q) * (v - actual_value))
        quantile_losses.append(loss)

    wis = np.mean(quantile_losses)
    return wis
'''
import numpy as np

def quantile_loss(q, q_value, actual_value):
    """
    Compute the quantile loss (pinball loss) for a given quantile.

    Parameters:
    - q (float): The quantile (e.g., 0.1, 0.5, 0.9).
    - q_value (float): The forecasted value at quantile q.
    - actual_value (float): The actual observed value.

    Returns:
    - float: The quantile loss for quantile q.
    """
    if actual_value <= q_value:
        return 2 * (1 - q) * (q_value - actual_value)
    else:
        return 2 * q * (actual_value - q_value)

def weighted_interval_score(quantiles, values, actual_value):
    """
    Compute the weighted interval score (WIS) for forecast evaluation.

    Parameters:
    - quantiles (np.array): Quantiles at which forecasts were made.
    - values (np.array): Forecasted values corresponding to the provided quantiles.
    - actual_value (float): The actual observed value.

    Returns:
    - float: The computed weighted interval score.
    """
    if np.isnan(actual_value):
        return np.nan

    quantile_losses = []
    for q, v in zip(quantiles, values):
        loss = quantile_loss(q, v, actual_value)
        quantile_losses.append(loss)


    interval_scores = []


    for i in range(len(quantiles) - 1):
        lower_idx = i
        upper_idx = len(quantiles) - 1 - i
        interval_score = (quantile_losses[lower_idx] + quantile_losses[upper_idx]) / 2
        interval_scores.append(interval_score)

    wis = np.mean(interval_scores)
    return wis


def cal_wis(quantile_result, option):
    # Load historical data based on the option
    if option == 0:
        hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smootheddata.csv')
        tgt = 'total_admissions'
    else:
        hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smoothedrate.csv')
        tgt = 'rate'

    hist['Week_end'] = pd.to_datetime(hist['Week_end'])
    hist['fips'] = hist['fips'].astype(str).str.zfill(2)

    quantile_result['location'] = quantile_result['location'].astype(str).str.zfill(2)
    quantile_result['reference_date'] = pd.to_datetime(quantile_result['reference_date'])
    quantile_result['target_end_date'] = pd.to_datetime(quantile_result['target_end_date'])

    desired_dates = quantile_result['target_end_date'].unique()
    hist = hist[hist['Week_end'].isin(desired_dates)]
    hist = hist[['fips', 'Week_end', tgt]]


    merged_df = pd.merge(hist, quantile_result, left_on=['fips', 'Week_end'], right_on=['location', 'target_end_date'])
    grouped = merged_df.groupby(['location', 'target_end_date', 'horizon'])


    wis_scores = []
    wis_by_horizon = {}
    wis_results = []
    for (location, date, horizon), group in grouped:
        actual_value = group[tgt].iloc[0]
        quantiles = group['output_type_id'].unique()
        #print(quantiles)
        values = group['value']
        wis = weighted_interval_score(quantiles, values, actual_value)
        wis_scores.append(wis)
        if horizon not in wis_by_horizon:
            wis_by_horizon[horizon] = []
        wis_by_horizon[horizon].append(wis)
        wis_results.append({
            "location": location,
            "date": date - pd.Timedelta(days=horizon * 7),
            "horizon": horizon,
            "WIS": wis
        })
    # Calculate mean WIS across all groups
    mean_wis = np.nanmean(wis_scores)
    wis_df = pd.DataFrame(wis_results)
    # Calculate mean WIS by horizon
    mean_wis_by_horizon = {h: np.nanmean(scores) for h, scores in wis_by_horizon.items()}

    print(f'Weighted Interval Score (WIS): {mean_wis}')
    #print(f'Mean WIS by Horizon: {mean_wis_by_horizon}')
    return wis_df, mean_wis, mean_wis_by_horizon

def cal_wis2(quantile_result,desired_dates, option):
    # Load historical data based on the option
    if option == 0:
        hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smootheddata.csv')
        tgt = 'total_admissions'
    else:
        hist = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smoothedrate.csv')
        tgt = 'rate'

    hist['Week_end'] = pd.to_datetime(hist['Week_end'])
    hist['fips'] = hist['fips'].astype(str).str.zfill(2)
    hist = hist[hist['Week_end'].isin(desired_dates)]
    quantile_result['location'] = quantile_result['location'].astype(str).str.zfill(2)
    quantile_result['reference_date'] = pd.to_datetime(quantile_result['reference_date'])
    quantile_result['target_end_date'] = pd.to_datetime(quantile_result['target_end_date'])

    #desired_dates = quantile_result['target_end_date'].unique()
    hist = hist[hist['Week_end'].isin(desired_dates)]
    hist = hist[['fips', 'Week_end', tgt]]


    merged_df = pd.merge(hist, quantile_result, left_on=['fips', 'Week_end'], right_on=['location', 'target_end_date'])
    grouped = merged_df.groupby(['location', 'target_end_date', 'horizon'])


    wis_scores = []
    wis_by_horizon = {}
    wis_results = []
    for (location, date, horizon), group in grouped:
        actual_value = group[tgt].iloc[0]
        quantiles = group['output_type_id'].unique()
        #print(quantiles)
        values = group['value']
        wis = weighted_interval_score(quantiles, values, actual_value)
        wis_scores.append(wis)
        if horizon not in wis_by_horizon:
            wis_by_horizon[horizon] = []
        wis_by_horizon[horizon].append(wis)
        wis_results.append({
            "location": location,
            "date": date - pd.Timedelta(days=horizon * 7),
            "horizon": horizon,
            "WIS": wis
        })
    # Calculate mean WIS across all groups
    mean_wis = np.nanmean(wis_scores)
    wis_df = pd.DataFrame(wis_results)
    # Calculate mean WIS by horizon
    mean_wis_by_horizon = {h: np.nanmean(scores) for h, scores in wis_by_horizon.items()}

    #print(f'Weighted Interval Score (WIS): {mean_wis}')
    #print(f'Mean WIS by Horizon: {mean_wis_by_horizon}')
    return wis_df, mean_wis, mean_wis_by_horizon


# State Level EXponential Smoothing

In [None]:
from datetime import datetime, timedelta
start_date = datetime.strptime('2023-10-07', '%Y-%m-%d')
end_date = datetime.strptime('2024-02-24', '%Y-%m-%d')
current_date = start_date
dates_list = []

while current_date <= end_date:
    dates_list.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=7)

test_weeks = dates_list
df1 = pd.DataFrame()
df2 = pd.DataFrame()
df3 = pd.DataFrame()
option = 1
mae_results = []
for test_week in test_weeks:
    print(test_week)

    file_path = f'/content/drive/MyDrive/Flu Forecasting/ARIMA/state result/{test_week}_quantileresultexp.csv'

    df = pd.read_csv(file_path)

    if option == 1:
      pop = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/population.csv')
      merged_df = pd.merge(df, pop, left_on='location', right_on='fips')
      merged_df['value'] = (merged_df['value'] * 100000) / merged_df['population']

      final_df = merged_df.drop(columns=['fips', 'population'])
      df = final_df
      df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/ARIMA/state result/{test_week}_quantileexp.csv',index=False)
      df = adjust_quantile_ranges_inplace(df)

      df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/ARIMA/state result/{test_week}_quantileexplarger.csv',index=False)
      #break
    df1 = pd.concat([df1, df], ignore_index=True)
    mergeddf,mae,maewk = mae_cal(df,option)
    df2 = pd.concat([df2, mergeddf], ignore_index=True)
    me, mean_wis, mean_wis_by_horizon = cal_wis(df,option)
    df3 = pd.concat([df3, me], ignore_index=True)
    mae_results.append({
        'test_week': test_week,
        'horizon': 'Overall',
        'state MAE': mae,
        'state WIS':mean_wis
    })

    '''
    for horizon, horizon_mae in maewk.items():
        mae_results.append({
            'test_week': test_week,
            'horizon': f'{horizon}_w',
            'state MAE': horizon_mae
        })
    for horizon, horizon_wis in mean_wis_by_horizon.items():
        mae_results.append({
            'test_week': test_week,
            'horizon': f'{horizon}_w',
            'state WIS': horizon_wis
        })
    '''
    combined_horizon_results = {}


    for horizon, horizon_mae in maewk.items():
        combined_horizon_results[horizon] = {
            'state MAE': horizon_mae
        }


    for horizon, horizon_wis in mean_wis_by_horizon.items():
        if horizon in combined_horizon_results:
            combined_horizon_results[horizon]['state WIS'] = horizon_wis
        else:
            combined_horizon_results[horizon] = {'state WIS': horizon_wis}


    for horizon, metrics in combined_horizon_results.items():
        metrics.update({
            'test_week': test_week,
            'horizon': f'{horizon}_w'
        })
        mae_results.append(metrics)
mae_df = pd.DataFrame(mae_results)
df2.to_csv('/content/drive/MyDrive/Flu Forecasting/result/result by state/statemaelarger.csv', index=False)
df3.to_csv('/content/drive/MyDrive/Flu Forecasting/result/result by state/statewislarger.csv', index=False)
if option == 1:
  #mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/statemae_ratearima.csv', index=False)
  mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/increase interval/statemae_rateexp.csv', index=False)
else:
  mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/statemae.csv', index=False)


2023-10-07
Mean Absolute Error (MAE): 0.249748908688373
Weighted Interval Score (WIS): 0.17425480583552422
2023-10-14
Mean Absolute Error (MAE): 0.26770764227993027
Weighted Interval Score (WIS): 0.184304367685814
2023-10-21
Mean Absolute Error (MAE): 0.281830931881702
Weighted Interval Score (WIS): 0.19352995717511828
2023-10-28
Mean Absolute Error (MAE): 0.3222462598293772
Weighted Interval Score (WIS): 0.2250940247722661
2023-11-04
Mean Absolute Error (MAE): 0.4086264548328264
Weighted Interval Score (WIS): 0.28435374122747975
2023-11-11
Mean Absolute Error (MAE): 0.5152032435577176
Weighted Interval Score (WIS): 0.3470998667183466
2023-11-18
Mean Absolute Error (MAE): 0.6946983917420073
Weighted Interval Score (WIS): 0.450252048018217
2023-11-25
Mean Absolute Error (MAE): 0.8813758258313888
Weighted Interval Score (WIS): 0.5803254069774918
2023-12-02
Mean Absolute Error (MAE): 1.2342666312896131
Weighted Interval Score (WIS): 0.863349042152856
2023-12-09
Mean Absolute Error (MAE): 

In [None]:
df2.isna().sum()

reference_date         0
location               0
horizon                0
absolute_difference    0
dtype: int64

In [None]:
a,mae,wk = mae_cal(df1,1)
print(wk)

Mean Absolute Error (MAE): 1.268929816972287
horizon
0    0.684034
1    1.093668
2    1.464067
3    1.833950
Name: absolute_difference, dtype: float64


In [None]:
a,wis,wk = cal_wis(df1,option)
print(wk)

Weighted Interval Score (WIS): 0.8932066142906208
{0: 0.46975875050224575, 1: 0.769233664030958, 2: 1.0377815348677277, 3: 1.2960525077615515}


In [None]:
a,mae,wk = mae_cal(df1,1)
print(wk)
a,wis,wk = cal_wis(df1,option)
print(wk)

Mean Absolute Error (MAE): 1.389986261622987
horizon
0    0.685696
1    1.178176
2    1.638696
3    2.057376
Name: absolute_difference, dtype: float64
Weighted Interval Score (WIS): 1.0039272600578086
{0: 0.45943947396811163, 1: 0.8279898721534976, 2: 1.1946785485262155, 3: 1.5336011455834107}


# Regional LSTM

## recent trend 6 weeks


In [None]:
from datetime import datetime, timedelta
start_date = datetime.strptime('2023-09-30', '%Y-%m-%d')
end_date = datetime.strptime('2024-02-17', '%Y-%m-%d')
current_date = start_date
dates_list = []

while current_date <= end_date:
    dates_list.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=7)

test_weeks = dates_list
df1 = pd.DataFrame()
df2 = pd.DataFrame()
df3 = pd.DataFrame()
mae_results = []
option = 1
for test_week in test_weeks:
    print(test_week)
    test_week_datetime = datetime.strptime(test_week, '%Y-%m-%d')
    save_week = (test_week_datetime + timedelta(days=7)).strftime('%Y-%m-%d')
    file_path = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/recent trend (6 weeks)/1000 times/{test_week}_allresult.pkl'
    #save_path = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/recent trend (6 weeks)/quantile result/{save_week}_quantile.csv'
    save_path1 = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/recent trend (6 weeks)/quantile result/{save_week}_quantilelarger.csv'
    # Load the summary table, sort it, and filter
    df = pd.read_pickle(file_path)

    qt = convert_prediction_to_quantile(df,option)
    qt = adjust_quantile_ranges_inplace(qt)

    qt.to_csv(save_path1,index = False)

    #qt.to_csv(save_path,index = False)
    df1 = pd.concat([df1, qt], ignore_index=True)
    mergeddf,mae,maewk = mae_cal(qt,option)
    df2 = pd.concat([df2, mergeddf], ignore_index=True)
    me, mean_wis, mean_wis_by_horizon = cal_wis(qt,option)
    df3 = pd.concat([df3, me], ignore_index=True)
    mae_results.append({
        'test_week': save_week,
        'horizon': 'Overall',
        'regional MAE(recent trend)': mae,
        'regional WIS(recent trend)': mean_wis
    })

    '''
    for horizon, horizon_mae in maewk.items():
        mae_results.append({
            'test_week': save_week,
            'horizon': f'{horizon}_w',
            'regional MAE(recent trend)': horizon_mae
        })
    '''
    combined_horizon_results = {}


    for horizon, horizon_mae in maewk.items():
        combined_horizon_results[horizon] = {
            'regional MAE(recent trend)': horizon_mae
        }


    for horizon, horizon_wis in mean_wis_by_horizon.items():
        if horizon in combined_horizon_results:
            combined_horizon_results[horizon]['regional WIS(recent trend)'] = horizon_wis
        else:
            combined_horizon_results[horizon] = {'regional WIS(recent trend)': horizon_wis}


    for horizon, metrics in combined_horizon_results.items():
        metrics.update({
            'test_week': save_week,
            'horizon': f'{horizon}_w'
        })
        mae_results.append(metrics)
mae_df = pd.DataFrame(mae_results)
df2.to_csv('/content/drive/MyDrive/Flu Forecasting/result/result by state/recenttrendmaelarger.csv', index=False)
df3.to_csv('/content/drive/MyDrive/Flu Forecasting/result/result by state/recenttrendwislarger.csv', index=False)

if option == 1:
  #mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/recenttrendmae_rate.csv', index=False)
  mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/increase interval/recenttrendmae_rate.csv', index=False)
else:
  mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/recenttrendmae.csv', index=False)


2023-09-30
Mean Absolute Error (MAE): 0.506180825664213
Weighted Interval Score (WIS): 0.3358722915162579
2023-10-07
Mean Absolute Error (MAE): 0.3281131099061078
Weighted Interval Score (WIS): 0.21915258975680257
2023-10-14
Mean Absolute Error (MAE): 0.34174263762349555
Weighted Interval Score (WIS): 0.22809565917942626
2023-10-21
Mean Absolute Error (MAE): 0.49311586722586487
Weighted Interval Score (WIS): 0.3196550010263253
2023-10-28
Mean Absolute Error (MAE): 0.6497167405977146
Weighted Interval Score (WIS): 0.4358749564870476
2023-11-04
Mean Absolute Error (MAE): 0.9003615477275406
Weighted Interval Score (WIS): 0.6078266285365411
2023-11-11
Mean Absolute Error (MAE): 0.8790537405746389
Weighted Interval Score (WIS): 0.5934952136281549
2023-11-18
Mean Absolute Error (MAE): 1.1850673744728029
Weighted Interval Score (WIS): 0.768144532309966
2023-11-25
Mean Absolute Error (MAE): 1.0344990598256403
Weighted Interval Score (WIS): 0.7685510712359089
2023-12-02
Mean Absolute Error (MAE

In [None]:
a,mae,wk = mae_cal(df1,option)
print(wk)
a,wis,wk = cal_wis(df1,option)
print(wk)

Mean Absolute Error (MAE): 1.243133412331973
horizon
0    0.719151
1    1.105023
2    1.472950
3    1.675410
Name: absolute_difference, dtype: float64
Weighted Interval Score (WIS): 0.8824750229755086
{0: 0.4860427350665976, 1: 0.7661005433208771, 2: 1.0459118233321838, 3: 1.2318449901823763}


In [None]:
a,mae,wk = mae_cal(df1,option)
print(wk)

Mean Absolute Error (MAE): 1.243133412331973
horizon
0    0.719151
1    1.105023
2    1.472950
3    1.675410
Name: absolute_difference, dtype: float64


In [None]:
a,wis,wk = cal_wis(df1,option)
print(wk)

Weighted Interval Score (WIS): 0.9668240292766822
{0: 0.5064599070699679, 1: 0.843043169266353, 2: 1.1606592391774917, 3: 1.357133801592916}


In [None]:
df2

Unnamed: 0,reference_date,location,horizon,absolute_difference
0,2023-10-07,01,0,0.088922
1,2023-10-07,01,1,0.058234
2,2023-10-07,01,2,0.171246
3,2023-10-07,01,3,0.544116
4,2023-10-07,02,0,0.530435
...,...,...,...,...
4035,2024-02-24,55,3,1.021178
4036,2024-02-24,56,0,0.882372
4037,2024-02-24,56,1,0.770545
4038,2024-02-24,56,2,1.701070


## Long trend 3 cluster


In [None]:
from datetime import datetime, timedelta
start_date = datetime.strptime('2023-09-30', '%Y-%m-%d')
end_date = datetime.strptime('2024-02-17', '%Y-%m-%d')
current_date = start_date
dates_list = []

while current_date <= end_date:
    dates_list.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=7)

test_weeks = dates_list
df1 = pd.DataFrame()
df2 = pd.DataFrame()
df3 = pd.DataFrame()
mae_results = []
option = 1
for test_week in test_weeks:
    print(test_week)
    test_week_datetime = datetime.strptime(test_week, '%Y-%m-%d')
    save_week = (test_week_datetime + timedelta(days=7)).strftime('%Y-%m-%d')
    file_path = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/long trend/1000 times/{test_week}_allresult.pkl'
    save_path = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/long trend/quantile result/{save_week}_quantile.csv'
    save_path1 = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/long trend/quantile result/{save_week}_quantilelarger.csv'
    # Load the summary table, sort it, and filter
    df = pd.read_pickle(file_path)

    qt = convert_prediction_to_quantile(df,option)
    qt = adjust_quantile_ranges_inplace(qt)
    qt.to_csv(save_path1,index = False)
    #qt.to_csv(save_path,index = False)
    df1 = pd.concat([df1, qt], ignore_index=True)
    mergeddf,mae,maewk = mae_cal(qt,option)
    df2 = pd.concat([df2, mergeddf], ignore_index=True)
    me, mean_wis, mean_wis_by_horizon = cal_wis(qt,option)
    df3 = pd.concat([df3, me], ignore_index=True)
    mae_results.append({
        'test_week': save_week,
        'horizon': 'Overall',
        'regional MAE(long trend)': mae,
        'regional WIS(long trend)':mean_wis
    })

    '''
    for horizon, horizon_mae in maewk.items():
        mae_results.append({
            'test_week': save_week,
            'horizon': f'{horizon}_w',
            'regional MAE(long trend)': horizon_mae
        })
    '''
    combined_horizon_results = {}


    for horizon, horizon_mae in maewk.items():
        combined_horizon_results[horizon] = {
            'regional MAE(long trend)': horizon_mae
        }


    for horizon, horizon_wis in mean_wis_by_horizon.items():
        if horizon in combined_horizon_results:
            combined_horizon_results[horizon]['regional WIS(long trend)'] = horizon_wis
        else:
            combined_horizon_results[horizon] = {'regional WIS(long trend)': horizon_wis}


    for horizon, metrics in combined_horizon_results.items():
        metrics.update({
            'test_week': save_week,
            'horizon': f'{horizon}_w'
        })
        mae_results.append(metrics)
mae_df = pd.DataFrame(mae_results)
df2.to_csv('/content/drive/MyDrive/Flu Forecasting/result/result by state/longtrendmaelarger.csv', index=False)
df3.to_csv('/content/drive/MyDrive/Flu Forecasting/result/result by state/longtrendwislarger.csv', index=False)

if option == 1:
  #mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/longtrendmae_rate.csv', index=False)
  mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/increase interval/longtrendmae_rate.csv', index=False)
else:
  mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/longtrendmae.csv', index=False)
a,mae,wk = mae_cal(df1,option)
print(wk)
a,wis,wk = cal_wis(df1,option)
print(wk)

2023-09-30
Mean Absolute Error (MAE): 0.6009977630745111
Weighted Interval Score (WIS): 0.3895636644548048
2023-10-07
Mean Absolute Error (MAE): 0.46150626242852866
Weighted Interval Score (WIS): 0.2942865193223439
2023-10-14
Mean Absolute Error (MAE): 0.37385086474032686
Weighted Interval Score (WIS): 0.25446645309184573
2023-10-21
Mean Absolute Error (MAE): 0.36843094562973233
Weighted Interval Score (WIS): 0.24537324145268577
2023-10-28
Mean Absolute Error (MAE): 0.4634773401244551
Weighted Interval Score (WIS): 0.3181900692767712
2023-11-04
Mean Absolute Error (MAE): 0.6364733503444465
Weighted Interval Score (WIS): 0.4335321726021634
2023-11-11
Mean Absolute Error (MAE): 0.7246431241907427
Weighted Interval Score (WIS): 0.5019183477705904
2023-11-18
Mean Absolute Error (MAE): 1.1027981967657607
Weighted Interval Score (WIS): 0.7975334794478995
2023-11-25
Mean Absolute Error (MAE): 1.1438133619720159
Weighted Interval Score (WIS): 0.8566518185061062
2023-12-02
Mean Absolute Error (

In [None]:
a,mae,wk = mae_cal(df1,option)
print(wk)

Mean Absolute Error (MAE): 1.2522946690790286
horizon
0    0.779464
1    1.121755
2    1.437199
3    1.670760
Name: absolute_difference, dtype: float64


In [None]:
a,wis,wk = cal_wis(df1,option)
print(wk)

Weighted Interval Score (WIS): 1.0041748318905885
{0: 0.5750439068226845, 1: 0.8788940540497939, 2: 1.1738208510562413, 3: 1.3889405156336347}


## HHS

In [None]:
from datetime import datetime, timedelta
start_date = datetime.strptime('2023-09-30', '%Y-%m-%d')
end_date = datetime.strptime('2024-02-17', '%Y-%m-%d')
current_date = start_date
dates_list = []

while current_date <= end_date:
    dates_list.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=7)

test_weeks = dates_list
df1 = pd.DataFrame()
df2 = pd.DataFrame()
df3 = pd.DataFrame()
option = 1
mae_results = []
for test_week in test_weeks:
    print(test_week)
    test_week_datetime = datetime.strptime(test_week, '%Y-%m-%d')
    save_week = (test_week_datetime + timedelta(days=7)).strftime('%Y-%m-%d')
    file_path = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/HHS/1000 times/{test_week}_allresult.pkl'
    save_path = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/HHS/quantile result/{save_week}_quantile.csv'
    save_path1 = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/HHS/quantile result/{save_week}_quantilelarger.csv'
    # Load the summary table, sort it, and filter
    df = pd.read_pickle(file_path)

    qt = convert_prediction_to_quantile(df,option)
    qt = adjust_quantile_ranges_inplace(qt)
    qt.to_csv(save_path1,index = False)
    #qt.to_csv(save_path,index = False)
    df1 = pd.concat([df1, qt], ignore_index=True)
    mergeddf,mae,maewk = mae_cal(qt,option)
    df2 = pd.concat([df2, mergeddf], ignore_index=True)
    me, mean_wis, mean_wis_by_horizon = cal_wis(qt,option)
    df3 = pd.concat([df3, me], ignore_index=True)
    mae_results.append({
        'test_week': save_week,
        'horizon': 'Overall',
        'HHS MAE': mae,
        'HHS WIS': mean_wis
    })

    '''
    for horizon, horizon_mae in maewk.items():
        mae_results.append({
            'test_week': save_week,
            'horizon': f'{horizon}_w',
            'HHS MAE': horizon_mae
        })
    '''
    combined_horizon_results = {}


    for horizon, horizon_mae in maewk.items():
        combined_horizon_results[horizon] = {
            'HHS MAE': horizon_mae
        }


    for horizon, horizon_wis in mean_wis_by_horizon.items():
        if horizon in combined_horizon_results:
            combined_horizon_results[horizon]['HHS WIS'] = horizon_wis
        else:
            combined_horizon_results[horizon] = {'HHS WIS': horizon_wis}


    for horizon, metrics in combined_horizon_results.items():
        metrics.update({
            'test_week': save_week,
            'horizon': f'{horizon}_w'
        })
        mae_results.append(metrics)
mae_df = pd.DataFrame(mae_results)
df2.to_csv('/content/drive/MyDrive/Flu Forecasting/result/result by state/HHSmaelarger.csv', index=False)
df3.to_csv('/content/drive/MyDrive/Flu Forecasting/result/result by state/HHSwislarger.csv', index=False)

if option == 1:
  #mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/HHSmae_rate.csv', index=False)
  mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/increase interval/HHSmae_rate.csv', index=False)
else:
  mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/HHSmae.csv', index=False)
a,mae,wk = mae_cal(df1,option)
print(wk)
a,wis,wk = cal_wis(df1,option)
print(wk)

2023-09-30
Mean Absolute Error (MAE): 0.4905676204145499
Weighted Interval Score (WIS): 0.32584096354102626
2023-10-07
Mean Absolute Error (MAE): 0.4230490728711642
Weighted Interval Score (WIS): 0.28314425410648086
2023-10-14
Mean Absolute Error (MAE): 0.4438655436507916
Weighted Interval Score (WIS): 0.30348738931131114
2023-10-21
Mean Absolute Error (MAE): 0.4666904820948494
Weighted Interval Score (WIS): 0.3065874621244757
2023-10-28
Mean Absolute Error (MAE): 0.6529095343507842
Weighted Interval Score (WIS): 0.43978066225170465
2023-11-04
Mean Absolute Error (MAE): 0.7033334212110915
Weighted Interval Score (WIS): 0.47918925366903375
2023-11-11
Mean Absolute Error (MAE): 0.8443708215141131
Weighted Interval Score (WIS): 0.5878772261178906
2023-11-18
Mean Absolute Error (MAE): 0.9168678546839155
Weighted Interval Score (WIS): 0.6585108377949811
2023-11-25
Mean Absolute Error (MAE): 1.3321851430889087
Weighted Interval Score (WIS): 0.9562836698410397
2023-12-02
Mean Absolute Error (

In [None]:
a,mae,wk = mae_cal(df1,option)
print(wk)

Mean Absolute Error (MAE): 1.3124250865272842
horizon
0    0.911105
1    1.203636
2    1.485225
3    1.649735
Name: absolute_difference, dtype: float64


In [None]:
a,wis,wk = cal_wis(df1,option)
print(wk)

Weighted Interval Score (WIS): 1.069457771724584
{0: 0.6984642676233613, 1: 0.9650205489645588, 2: 1.2277266983128592, 3: 1.3866195719975554}


# US model

In [None]:
test_weeks = dates_list
df1 = pd.DataFrame()
mae_results = []
option = 1
df2 = pd.DataFrame()
df3 = pd.DataFrame()
for test_week in test_weeks:
    print(test_week)
    test_week_datetime = datetime.strptime(test_week, '%Y-%m-%d')
    save_week = (test_week_datetime + timedelta(days=7)).strftime('%Y-%m-%d')
    file_path = f'/content/drive/MyDrive/Flu Forecasting/LSTM/US result/{test_week}_allresult.pkl'
    save_path = f'/content/drive/MyDrive/Flu Forecasting/LSTM/US result/quantile result/{save_week}_quantile.csv'
    save_path1 = f'/content/drive/MyDrive/Flu Forecasting/LSTM/US result/quantile result/{save_week}_quantileexp.csv'
    # Load the summary table, sort it, and filter
    df = pd.read_pickle(file_path)

    qt = convert_prediction_to_quantile(df,option)
    qt = adjust_quantile_ranges_inplace(qt)
    qt.to_csv(save_path1,index = False)
    df1 = pd.concat([df1, qt], ignore_index=True)

    #qt.to_csv(save_path,index = False)

    mergeddf,mae,maewk = mae_cal(qt,option)
    df2 = pd.concat([df2, mergeddf], ignore_index=True)
    me, mean_wis, mean_wis_by_horizon = cal_wis(qt,option)
    df3 = pd.concat([df3, me], ignore_index=True)
    mae_results.append({
        'test_week': save_week,
        'horizon': 'Overall',
        'US MAE': mae,
        'US WIS': mean_wis
    })

    '''
    for horizon, horizon_mae in maewk.items():
        mae_results.append({
            'test_week': save_week,
            'horizon': f'{horizon}_w',
            'US MAE': horizon_mae
        })
    '''
    combined_horizon_results = {}


    for horizon, horizon_mae in maewk.items():
        combined_horizon_results[horizon] = {
            'US MAE': horizon_mae
        }


    for horizon, horizon_wis in mean_wis_by_horizon.items():
        if horizon in combined_horizon_results:
            combined_horizon_results[horizon]['US WIS'] = horizon_wis
        else:
            combined_horizon_results[horizon] = {'US WIS': horizon_wis}


    for horizon, metrics in combined_horizon_results.items():
        metrics.update({
            'test_week': save_week,
            'horizon': f'{horizon}_w'
        })
        mae_results.append(metrics)
mae_df = pd.DataFrame(mae_results)
df2.to_csv('/content/drive/MyDrive/Flu Forecasting/result/result by state/USmaelarger.csv', index=False)
df3.to_csv('/content/drive/MyDrive/Flu Forecasting/result/result by state/USwislarger.csv', index=False)

if option == 1:
  #mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/USmae_rate.csv', index=False)
  mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/increase interval/USmae_rate.csv', index=False)
else:
  mae_df.to_csv('/content/drive/MyDrive/Flu Forecasting/result/USmae.csv', index=False)
a,mae,wk = mae_cal(df1,option)
print(wk)
a,wis,wk = cal_wis(df1,option)
print(wk)

2023-09-30
Mean Absolute Error (MAE): 0.2540676686979792
Weighted Interval Score (WIS): 0.16551483158987546
2023-10-07
Mean Absolute Error (MAE): 0.23446470914152542
Weighted Interval Score (WIS): 0.15535875522987813
2023-10-14
Mean Absolute Error (MAE): 0.2624054138545556
Weighted Interval Score (WIS): 0.17950717475562022
2023-10-21
Mean Absolute Error (MAE): 0.35509264830002024
Weighted Interval Score (WIS): 0.23852964710281158
2023-10-28
Mean Absolute Error (MAE): 0.36472639943574564
Weighted Interval Score (WIS): 0.2619656890712621
2023-11-04
Mean Absolute Error (MAE): 0.6360331753739467
Weighted Interval Score (WIS): 0.4224408104585659
2023-11-11
Mean Absolute Error (MAE): 0.5895186430032189
Weighted Interval Score (WIS): 0.4185917171358591
2023-11-18
Mean Absolute Error (MAE): 0.7954194438158859
Weighted Interval Score (WIS): 0.5593918356700449
2023-11-25
Mean Absolute Error (MAE): 1.0812800930148694
Weighted Interval Score (WIS): 0.8273867217306221
2023-12-02
Mean Absolute Error

In [None]:
a,mae,wk = mae_cal(df1,option)
print(wk)

Mean Absolute Error (MAE): 1.102582406543188
horizon
0    0.619479
1    0.983647
2    1.283851
3    1.523353
Name: absolute_difference, dtype: float64


In [None]:
a,wis,wk = cal_wis(df1,option)
print(wk)

Weighted Interval Score (WIS): 0.8751382055277963
{0: 0.4466368965395251, 1: 0.7717712353246327, 2: 1.036165007965227, 3: 1.2459796822818003}


# Weight Selection

## Directly taking the mean

In [28]:
from datetime import datetime, timedelta
start_date = datetime.strptime('2023-10-07', '%Y-%m-%d')
end_date = datetime.strptime('2024-02-24', '%Y-%m-%d')
current_date = start_date
dates_list = []

while current_date <= end_date:
    dates_list.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=7)

test_weeks = dates_list
def ensemble_by_mean(test_weeks,option):
  df0 = pd.DataFrame()
  dfmae = pd.DataFrame()
  dfwis = pd.DataFrame()
  mae_results = []
  for test_week in test_weeks:
    print(test_week)
    if option == 'recent trend':
      file_path1 = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/recent trend (6 weeks)/quantile result/{test_week}_quantile.csv'
      save_path1 = f'/content/drive/MyDrive/Flu Forecasting/ensemble/mean/recent trend/{test_week}_quantile.csv'
      save_path2 = f'/content/drive/MyDrive/Flu Forecasting/ensemble/mean/recent trend/{test_week}_quantilelarger.csv'
    elif option =='long trend':
      file_path1 = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/long trend/quantile result/{test_week}_quantile.csv'
      save_path1 = f'/content/drive/MyDrive/Flu Forecasting/ensemble/mean/long trend/{test_week}_quantile.csv'
      save_path2 = f'/content/drive/MyDrive/Flu Forecasting/ensemble/mean/long trend/{test_week}_quantilelarger.csv'
    elif option == 'HHS':
      file_path1 = f'/content/drive/MyDrive/Flu Forecasting/LSTM/region result/HHS/quantile result/{test_week}_quantile.csv'
      save_path1 = f'/content/drive/MyDrive/Flu Forecasting/ensemble/mean/HHS/{test_week}_quantile.csv'
      save_path2 = f'/content/drive/MyDrive/Flu Forecasting/ensemble/mean/HHS/{test_week}_quantilelarger.csv'

    df1 = pd.read_csv(file_path1)
    file_path2 = f'/content/drive/MyDrive/Flu Forecasting/LSTM/US result/quantile result/{test_week}_quantile.csv'
    file_path3 = f'/content/drive/MyDrive/Flu Forecasting/ARIMA/state result/{test_week}_quantileexp.csv'
    df2 = pd.read_csv(file_path2)
    df3 = pd.read_csv(file_path3)

    combined_result = pd.concat([df1,df2,df3],ignore_index=True)
    result_df = combined_result.groupby(['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'])['value'].mean().reset_index()
    '''
    merged_df = pd.merge(df1, df2, on=['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'], how='outer', suffixes=('_1', '_2'))
    merged_df = pd.merge(merged_df, df3, on=['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'], how='outer')
    merged_df.rename(columns={'value': 'value_3'}, inplace=True)
    merged_df['value'] = merged_df[['value_1', 'value_2', 'value_3']].mean(axis=1, skipna=True)
    result_df = merged_df.drop(columns=['value_1', 'value_2', 'value_3'])
    '''
    result_df.reset_index(drop=True, inplace=True)
    result_df = adjust_quantile_ranges_inplace(result_df)
    #result_df.to_csv(save_path2,index = False)
    #result_df.to_csv(save_path1,index = False)

    '''
    pop = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/population.csv')
    merged_df = pd.merge(result_df, pop, left_on='location', right_on='fips')
    merged_df['value'] = (merged_df['value'] * 100000) / merged_df['population']

    final_df = merged_df.drop(columns=['fips', 'population'])
    result_df = final_df
    '''
    df0 = pd.concat([df0, result_df], ignore_index=True)
    mergeddf,mae,maewk = mae_cal(result_df,1)
    dfmae = pd.concat([dfmae, mergeddf], ignore_index=True)
    me, mean_wis, mean_wis_by_horizon = cal_wis(result_df,1)
    dfwis = pd.concat([dfwis, me], ignore_index=True)
    mae_results.append({
        'test_week': test_week,
        'horizon': 'Overall',
        f'Ensemble MAE(mean,{option})': mae,
        f'Ensemble WIS(mean,{option})':mean_wis
    })

    '''
    for horizon, horizon_mae in maewk.items():
        mae_results.append({
            'test_week': test_week,
            'horizon': f'{horizon}_w',
            f'Ensemble (mean,{option})': horizon_mae
        })
    '''
    combined_horizon_results = {}


    for horizon, horizon_mae in maewk.items():
        combined_horizon_results[horizon] = {
            f'Ensemble MAE(mean,{option})': horizon_mae
        }


    for horizon, horizon_wis in mean_wis_by_horizon.items():
        if horizon in combined_horizon_results:
            combined_horizon_results[horizon][f'Ensemble WIS(mean,{option})'] = horizon_wis
        else:
            combined_horizon_results[horizon] = {f'Ensemble WIS(mean,{option})': horizon_wis}


    for horizon, metrics in combined_horizon_results.items():
        metrics.update({
            'test_week': test_week,
            'horizon': f'{horizon}_w'
        })
        mae_results.append(metrics)
    mae_df = pd.DataFrame(mae_results)
  #dfmae.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/result by state/Ensemble_mean_{option}_maelarger.csv', index=False)
  #dfwis.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/result by state/Ensemble_mean_{option}_wislarger.csv', index=False)
  #mae_df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/Ensemble_mean_{option}_mae_rate.csv', index=False)
  #mae_df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/increase interval/Ensemble_mean_{option}_mae_rate.csv', index=False)
  return df0,result_df

In [29]:
df1,result = ensemble_by_mean(test_weeks,'recent trend')
a,mae,wk = mape_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.30432238601926287
Weighted Interval Score (WIS): 0.19478132856604424
2023-10-14
Mean Absolute Error (MAE): 0.23888963749466918
Weighted Interval Score (WIS): 0.16110214761270059
2023-10-21
Mean Absolute Error (MAE): 0.2540904190270875
Weighted Interval Score (WIS): 0.1757979105726083
2023-10-28
Mean Absolute Error (MAE): 0.35763159055995897
Weighted Interval Score (WIS): 0.2316365057030461
2023-11-04
Mean Absolute Error (MAE): 0.396542888883032
Weighted Interval Score (WIS): 0.27366247922265957
2023-11-11
Mean Absolute Error (MAE): 0.5933541881464423
Weighted Interval Score (WIS): 0.377930614667506
2023-11-18
Mean Absolute Error (MAE): 0.6012127917247108
Weighted Interval Score (WIS): 0.3916025819284036
2023-11-25
Mean Absolute Error (MAE): 0.7334017449801206
Weighted Interval Score (WIS): 0.4908038415500809
2023-12-02
Mean Absolute Error (MAE): 0.9660701668400322
Weighted Interval Score (WIS): 0.6992707420982407
2023-12-09
Mean Absolute Error (M

In [None]:
df1,result = ensemble_by_mean(test_weeks,'long trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.33502232430406975
Weighted Interval Score (WIS): 0.21227236358753543
2023-10-14
Mean Absolute Error (MAE): 0.2822948811817897
Weighted Interval Score (WIS): 0.18488531654183946
2023-10-21
Mean Absolute Error (MAE): 0.2681148381405747
Weighted Interval Score (WIS): 0.18110063444361266
2023-10-28
Mean Absolute Error (MAE): 0.3107423870575185
Weighted Interval Score (WIS): 0.20811301402285728
2023-11-04
Mean Absolute Error (MAE): 0.35284141496696025
Weighted Interval Score (WIS): 0.24116725860146473
2023-11-11
Mean Absolute Error (MAE): 0.5207625251621516
Weighted Interval Score (WIS): 0.3370884740413123
2023-11-18
Mean Absolute Error (MAE): 0.559477983042191
Weighted Interval Score (WIS): 0.3704055724495396
2023-11-25
Mean Absolute Error (MAE): 0.6979620205633139
Weighted Interval Score (WIS): 0.46762974012187164
2023-12-02
Mean Absolute Error (MAE): 0.9762614613362177
Weighted Interval Score (WIS): 0.7256317614360543
2023-12-09
Mean Absolute Error

In [None]:
df1,result = ensemble_by_mean(test_weeks,'HHS')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.28231927958321684
Weighted Interval Score (WIS): 0.18169547365764438
2023-10-14
Mean Absolute Error (MAE): 0.2671975040666171
Weighted Interval Score (WIS): 0.1766995303491663
2023-10-21
Mean Absolute Error (MAE): 0.2845936089151851
Weighted Interval Score (WIS): 0.1893254422128461
2023-10-28
Mean Absolute Error (MAE): 0.3333935583160864
Weighted Interval Score (WIS): 0.21539011644768385
2023-11-04
Mean Absolute Error (MAE): 0.3946478484599993
Weighted Interval Score (WIS): 0.2592799133277079
2023-11-11
Mean Absolute Error (MAE): 0.5277973343589368
Weighted Interval Score (WIS): 0.34287335961180815
2023-11-18
Mean Absolute Error (MAE): 0.5901258821640236
Weighted Interval Score (WIS): 0.38420065706386564
2023-11-25
Mean Absolute Error (MAE): 0.6690786003243524
Weighted Interval Score (WIS): 0.4532388995475391
2023-12-02
Mean Absolute Error (MAE): 1.0348736287461684
Weighted Interval Score (WIS): 0.7465829845697936
2023-12-09
Mean Absolute Error (

## weight selected by  reciprocal of 4 weeks mae

In [21]:
from datetime import datetime, timedelta
start_date = datetime.strptime('2023-10-07', '%Y-%m-%d')
end_date = datetime.strptime('2024-02-24', '%Y-%m-%d')
current_date = start_date
dates_list = []

while current_date <= end_date:
    dates_list.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=7)

test_weeks = dates_list


def ensemble_by_reci(test_weeks, option):
    df0 = pd.DataFrame()
    dfmae = pd.DataFrame()
    dfwis = pd.DataFrame()
    mae_results = []
    weight_records = []
    for i, test_week in enumerate(test_weeks):
        #if i == 0:
         #   continue
        print(test_week)
        # Path setup based on selected option
        base_path = {
            'recent trend': '/content/drive/MyDrive/Flu Forecasting/LSTM/region result/recent trend (6 weeks)/quantile result/',
            'long trend': '/content/drive/MyDrive/Flu Forecasting/LSTM/region result/long trend/quantile result/',
            'HHS': '/content/drive/MyDrive/Flu Forecasting/LSTM/region result/HHS/quantile result/'
        }[option]

        save_path = {
            'recent trend': '/content/drive/MyDrive/Flu Forecasting/ensemble/reciprocal mae/recent trend/',
            'long trend': '/content/drive/MyDrive/Flu Forecasting/ensemble/reciprocal mae/long trend/',
            'HHS': '/content/drive/MyDrive/Flu Forecasting/ensemble/reciprocal mae/HHS/'
        }[option]

        # Load current week's data
        df1 = pd.read_csv(f'{base_path}{test_week}_quantile.csv')
        df2 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/LSTM/US result/quantile result/{test_week}_quantile.csv')
        df3 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/ARIMA/state result/{test_week}_quantileexp.csv')
        if i == 0:
            combined_result = pd.concat([df1,df2,df3],ignore_index=True)
            result_df = combined_result.groupby(['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'])['value'].mean().reset_index()
            result_df.reset_index(drop=True, inplace=True)
            weight_records.append({'test_week': test_week, 'regional': 0.333333, 'US_level': 0.333333, 'state_level':0.333333})
        else:
            # Calculate MAEs from past weeks
            past_weeks = test_weeks[max(0, i-4):i]
            mae1, mae2, mae3 = [], [], []
            for pw in past_weeks:
                past_df1 = pd.read_csv(f'{base_path}{pw}_quantile.csv')
                past_df2 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/LSTM/US result/quantile result/{pw}_quantile.csv')
                past_df3 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/ARIMA/state result/{pw}_quantileexp.csv')
                week_list = test_weeks[test_weeks.index(pw):i]
                mae1.append(mae_cal2(past_df1,week_list, 1,0)[1])
                mae2.append(mae_cal2(past_df2,week_list, 1,0)[1])
                mae3.append(mae_cal2(past_df3,week_list, 1,0)[1])

            # Average MAE across the period
            mae1_mean, mae2_mean, mae3_mean = np.mean(mae1), np.mean(mae2), np.mean(mae3)

            # Merge dataframes and apply conditional weights
            full_df = df1.merge(df2, on=['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'], how='outer', suffixes=('_1', '_2'))
            full_df = full_df.merge(df3, on=['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'], how='outer', suffixes=('', '_3'))
            full_df.rename(columns={'value': 'value_3'}, inplace=True)

            weights_all = np.reciprocal([mae1_mean, mae2_mean, mae3_mean])
            weights_23 = np.reciprocal([mae2_mean, mae3_mean])

            weights_all /= np.sum(weights_all)
            weights_23 /= np.sum(weights_23)
            weight_records.append({'test_week': test_week, 'regional': weights_all[0], 'US_level': weights_all[1], 'state_level': weights_all[2]})
            # weights_all[0]: regional; weights_all[1]: US level model weight; weights_all[2]: state level weight
            # weights_23[0]: US level; weights_23[1]: state level
            full_df['value'] = full_df.apply(
                lambda x: (x['value_1'] * weights_all[0] + x['value_2'] * weights_all[1] + x['value_3'] * weights_all[2]) if pd.notna(x['value_1'])
                else (x['value_2'] * weights_23[0] + x['value_3'] * weights_23[1]),
                axis=1
            )

            # Save the result
            result_df = full_df[['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id', 'value']]

        result_df = adjust_quantile_ranges_inplace(result_df)
        #result_df.to_csv(f'{save_path}{test_week}_quantilelarger.csv', index=False)
        #result_df.to_csv(f'{save_path}{test_week}_quantile.csv', index=False)

        df0 = pd.concat([df0, result_df], ignore_index=True)
        mergeddf,mae,maewk = mae_cal1(result_df,1,1)
        dfmae = pd.concat([dfmae, mergeddf], ignore_index=True)
        me, mean_wis, mean_wis_by_horizon = cal_wis(result_df,1)
        dfwis = pd.concat([dfwis, me], ignore_index=True)
        mae_results.append({
            'test_week': test_week,
            'horizon': 'Overall',
            f'Ensemble MAE(reci,{option})': mae,
            f'Ensemble WIS(reci,{option})': mean_wis
        })

        '''
        for horizon, horizon_mae in maewk.items():
            mae_results.append({
                'test_week': test_week,
                'horizon': f'{horizon}_w',
                f'Ensemble (reci,{option})': horizon_mae
            })
        '''
        combined_horizon_results = {}


        for horizon, horizon_mae in maewk.items():
            combined_horizon_results[horizon] = {
                f'Ensemble MAE(reci,{option})': horizon_mae
            }


        for horizon, horizon_wis in mean_wis_by_horizon.items():
            if horizon in combined_horizon_results:
                combined_horizon_results[horizon][f'Ensemble WIS(reci,{option})'] = horizon_wis
            else:
                combined_horizon_results[horizon] = {f'Ensemble WIS(reci,{option})': horizon_wis}


        for horizon, metrics in combined_horizon_results.items():
            metrics.update({
                'test_week': test_week,
                'horizon': f'{horizon}_w'
            })
            mae_results.append(metrics)
        mae_df = pd.DataFrame(mae_results)
        weight_df = pd.DataFrame(weight_records)

    #dfmae.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/result by state/Ensemble_reci_{option}_maelarger.csv', index=False)
    #dfwis.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/result by state/Ensemble_reci_{option}_wislarger.csv', index=False)
    #mae_df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/Ensemble_reci_{option}_mae_rate.csv', index=False)

    #mae_df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/increase interval/Ensemble_reci_{option}_mae_rate.csv', index=False)
    #weight_df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/weight/Ensemble_reci_{option}_weight.csv', index=False)
    return df0,result_df,mae_df


In [22]:
df1,result,mae_df = ensemble_by_reci(test_weeks,'HHS')
a,mae,wk = mape_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.28231927958321684
Weighted Interval Score (WIS): 0.18169547365764438
2023-10-14


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.24157458992626452
Weighted Interval Score (WIS): 0.1637665280739064
2023-10-21


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.2650839690319017
Weighted Interval Score (WIS): 0.17853405140117687
2023-10-28


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.32067416205068006
Weighted Interval Score (WIS): 0.20935113241878534
2023-11-04


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.36005099064180107
Weighted Interval Score (WIS): 0.24290252169760707
2023-11-11


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.5137644457326778
Weighted Interval Score (WIS): 0.3335845597750285
2023-11-18


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.5686294011245109
Weighted Interval Score (WIS): 0.36953263896368377
2023-11-25


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.6517890526528979
Weighted Interval Score (WIS): 0.44339616935303283
2023-12-02


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

In [23]:
df1,result,mae_df = ensemble_by_reci(test_weeks,'recent trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.30432238601926287
Weighted Interval Score (WIS): 0.19478132856604424
2023-10-14


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.22988623289421944
Weighted Interval Score (WIS): 0.15636329874459218
2023-10-21


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.2478465272826712
Weighted Interval Score (WIS): 0.1722213244052203
2023-10-28


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.34305102037322677
Weighted Interval Score (WIS): 0.22328858965236265
2023-11-04


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.3704446381360939
Weighted Interval Score (WIS): 0.2589471308828828
2023-11-11


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.5784532807235532
Weighted Interval Score (WIS): 0.3696579834476383
2023-11-18


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.5810496854716672
Weighted Interval Score (WIS): 0.37894081132472307
2023-11-25


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.6931635402507095
Weighted Interval Score (WIS): 0.46772449985239106
2023-12-02


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.9767508116981548
Weighted Interval Score (WIS): 0.7040178354790149
2023-12-09


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 1.6123677542487007
Weighted Interval Score (WIS): 1.2146779776997052
2023-12-16


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 2.0055580144557767
Weighted Interval Score (WIS): 1.4728958353613828
2023-12-23


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 1.5536073684104585
Weighted Interval Score (WIS): 1.0424460462701333
2023-12-30


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 1.7765276639718797
Weighted Interval Score (WIS): 1.139975073658359
2024-01-06


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 2.1644474576080137
Weighted Interval Score (WIS): 1.373370498746938
2024-01-13


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 1.195111171266631
Weighted Interval Score (WIS): 0.8104224657912039
2024-01-20


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 1.0605172119320616
Weighted Interval Score (WIS): 0.7185629420619635
2024-01-27


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 1.3516941187425022
Weighted Interval Score (WIS): 0.9099083652780223
2024-02-03


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 1.2502428883086112
Weighted Interval Score (WIS): 0.8470723077043079
2024-02-10


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 1.1268648913112869
Weighted Interval Score (WIS): 0.7318773935674003
2024-02-17


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 1.091899205281902
Weighted Interval Score (WIS): 0.7104352215894576
2024-02-24


  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]
  hist = hist[hist['Week_end'].isin(desired_dates)]


Mean Absolute Error (MAE): 0.8896014738151082
Weighted Interval Score (WIS): 0.594272249379484
Mean Absolute Error (MAE): 1.0192098734382138
horizon
0    0.575297
1    0.918242
2    1.194613
3    1.388687
Name: absolute_difference, dtype: float64
Weighted Interval Score (WIS): 0.6900885323553917
{0: 0.38799431314508037, 1: 0.6108755129329493, 2: 0.8076092866355535, 3: 0.9538750167079839}


In [27]:
a,mae,wk = mape_cal(df1,1)
print(wk)

Mean Absolute Percentage Error (MAPE): 55.386697413760416
horizon
0    35.925931
1    49.988585
2    64.754646
3    70.654118
Name: percentage_error, dtype: float64


In [None]:
df1,result,mae_df = ensemble_by_reci(test_weeks,'long trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.33502232430406975
Weighted Interval Score (WIS): 0.21227236358753543
2023-10-14
Mean Absolute Error (MAE): 0.2551750467129461
Weighted Interval Score (WIS): 0.16977074675748705
2023-10-21
Mean Absolute Error (MAE): 0.2561127710774804
Weighted Interval Score (WIS): 0.17444342892284745
2023-10-28
Mean Absolute Error (MAE): 0.3094324721915572
Weighted Interval Score (WIS): 0.20651200205083461
2023-11-04
Mean Absolute Error (MAE): 0.341104186929283
Weighted Interval Score (WIS): 0.23541561886777354
2023-11-11
Mean Absolute Error (MAE): 0.5170244180354813
Weighted Interval Score (WIS): 0.3344278006064858
2023-11-18
Mean Absolute Error (MAE): 0.555486079885283
Weighted Interval Score (WIS): 0.36720135035422347
2023-11-25
Mean Absolute Error (MAE): 0.6894651409369577
Weighted Interval Score (WIS): 0.4624171681284405
2023-12-02
Mean Absolute Error (MAE): 0.9779613095994069
Weighted Interval Score (WIS): 0.7241057669764178
2023-12-09
Mean Absolute Error (

## weight selected by  reciprocal of each week's mae

In [None]:
from datetime import datetime, timedelta
start_date = datetime.strptime('2023-10-07', '%Y-%m-%d')
end_date = datetime.strptime('2024-02-24', '%Y-%m-%d')
current_date = start_date
dates_list = []

while current_date <= end_date:
    dates_list.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=7)

test_weeks = dates_list


def ensemble_by_recihorizon(test_weeks, option):
    df0 = pd.DataFrame()
    dfmae = pd.DataFrame()
    dfwis = pd.DataFrame()
    mae_results = []
    weight_records = []
    for i, test_week in enumerate(test_weeks):
        #if i == 0:
         #   continue
        print(test_week)
        weights = {}
        weights23 = {}
        # Path setup based on selected option
        base_path = {
            'recent trend': '/content/drive/MyDrive/Flu Forecasting/LSTM/region result/recent trend (6 weeks)/quantile result/',
            'long trend': '/content/drive/MyDrive/Flu Forecasting/LSTM/region result/long trend/quantile result/',
            'HHS': '/content/drive/MyDrive/Flu Forecasting/LSTM/region result/HHS/quantile result/'
        }[option]

        save_path = {
            'recent trend': '/content/drive/MyDrive/Flu Forecasting/ensemble/reci mae by horizon/recent trend/',
            'long trend': '/content/drive/MyDrive/Flu Forecasting/ensemble/reci mae by horizon/long trend/',
            'HHS': '/content/drive/MyDrive/Flu Forecasting/ensemble/reci mae by horizon/HHS/'
        }[option]

        # Load current week's data
        df1 = pd.read_csv(f'{base_path}{test_week}_quantile.csv')
        df2 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/LSTM/US result/quantile result/{test_week}_quantile.csv')
        df3 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/ARIMA/state result/{test_week}_quantileexp.csv')
        if i <= 3:
            combined_result = pd.concat([df1,df2,df3],ignore_index=True)
            result_df = combined_result.groupby(['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'])['value'].mean().reset_index()
            result_df.reset_index(drop=True, inplace=True)
            weight_records.append({'test_week': test_week,'horizon': 'overall', 'regional': 0.333333, 'US_level': 0.333333, 'state_level':0.333333})
        else:
            # Calculate MAEs from past weeks
            #past_weeks = test_weeks[max(0, i-4):i ]
            mae1, mae2, mae3 = [], [], []
            horizons = [0,1,2,3]
            for horizon in horizons:
                past_weeks = test_weeks[max(0, i-4):i - horizon] if (i - horizon) > 0 else []
                mae1, mae2, mae3 = [], [], []
                for pw in past_weeks:
                    past_df1 = pd.read_csv(f'{base_path}{pw}_quantile.csv').query('horizon == @horizon')
                    past_df2 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/LSTM/US result/quantile result/{pw}_quantile.csv').query('horizon == @horizon')
                    past_df3 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/ARIMA/state result/{pw}_quantileexp.csv').query('horizon == @horizon')
                    week_list = test_weeks[test_weeks.index(pw):i]
                    if not past_df1.empty:
                        mae1.append(mae_cal2(past_df1, week_list, 1, 0)[1])
                    if not past_df2.empty:
                        mae2.append(mae_cal2(past_df2, week_list, 1, 0)[1])
                    if not past_df3.empty:
                        mae3.append(mae_cal2(past_df3, week_list, 1, 0)[1])

                # Calculate average MAE per horizon, ensuring we avoid division by zero
                mae1_mean = np.mean(mae1) if mae1 else np.inf
                mae2_mean = np.mean(mae2) if mae2 else np.inf
                mae3_mean = np.mean(mae3) if mae3 else np.inf

                reciprocal_weights = np.reciprocal([mae1_mean, mae2_mean, mae3_mean])
                normalized_weights = reciprocal_weights / reciprocal_weights.sum()
                weights[horizon] = normalized_weights
                reciprocal_weights23 = np.reciprocal([ mae2_mean, mae3_mean])
                normalized_weights23 = reciprocal_weights23 / reciprocal_weights23.sum()
                weights23[horizon] = normalized_weights23
                # Store the weights for each horizon
                weight_records.append({
                    'test_week': test_week,
                    'horizon': horizon,
                    'regional': normalized_weights[0],
                    'US_level': normalized_weights[1],
                    'state_level': normalized_weights[2]
                })


            mae1_mean, mae2_mean, mae3_mean = np.mean(mae1), np.mean(mae2), np.mean(mae3)

            # Merge dataframes and apply conditional weights
        full_df = pd.merge(pd.merge(df1, df2, on=['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'], how='outer', suffixes=('_1', '_2')), df3, on=['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'], how='outer')
        full_df.rename(columns={'value': 'value_3'}, inplace=True)

        full_df['value'] = full_df.apply(
            lambda x: (
                x['value_1'] * weights.get(x['horizon'], [1/3, 1/3, 1/3])[0] +
                x['value_2'] * weights.get(x['horizon'], [1/3, 1/3, 1/3])[1] +
                x['value_3'] * weights.get(x['horizon'], [1/3, 1/3, 1/3])[2]
            ) if pd.notna(x['value_1']) else (
                x['value_2'] * weights23.get(x['horizon'], [0.5, 0.5])[0] +
                x['value_3'] * weights23.get(x['horizon'], [0.5, 0.5])[1]
            ),
            axis=1
        )

            # Save the result
        result_df = full_df[['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id', 'value']]
        result_df = adjust_quantile_ranges_inplace(result_df)
        result_df.to_csv(f'{save_path}{test_week}_quantilelarger.csv', index=False)
        #result_df.to_csv(f'{save_path}{test_week}_quantile.csv', index=False)


        df0 = pd.concat([df0, result_df], ignore_index=True)
        mergeddf,mae,maewk = mae_cal1(result_df,1,1)
        dfmae = pd.concat([dfmae, mergeddf], ignore_index=True)
        me, mean_wis, mean_wis_by_horizon = cal_wis(result_df,1)
        dfwis = pd.concat([dfwis, me], ignore_index=True)
        mae_results.append({
            'test_week': test_week,
            'horizon': 'Overall',
            f'Ensemble MAE(reci by horizon,{option})': mae,
            f'Ensemble WIS(reci by horizon,{option})': mean_wis
        })

        '''
        for horizon, horizon_mae in maewk.items():
            mae_results.append({
                'test_week': test_week,
                'horizon': f'{horizon}_w',
                f'Ensemble (reci by horizon,{option})': horizon_mae
            })
        '''
        combined_horizon_results = {}


        for horizon, horizon_mae in maewk.items():
            combined_horizon_results[horizon] = {
                f'Ensemble MAE(reci by horizon,{option})': horizon_mae
            }


        for horizon, horizon_wis in mean_wis_by_horizon.items():
            if horizon in combined_horizon_results:
                combined_horizon_results[horizon][f'Ensemble WIS(reci by horizon,{option})'] = horizon_wis
            else:
                combined_horizon_results[horizon] = {f'Ensemble WIS(reci by horizon,{option})': horizon_wis}


        for horizon, metrics in combined_horizon_results.items():
            metrics.update({
                'test_week': test_week,
                'horizon': f'{horizon}_w'
            })
            mae_results.append(metrics)
        mae_df = pd.DataFrame(mae_results)
        weight_df = pd.DataFrame(weight_records)
    dfmae.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/result by state/Ensemble_reci by horizon_{option}_maelarger.csv', index=False)
    dfwis.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/result by state/Ensemble_reci by horizon_{option}_wislarger.csv', index=False)
    #mae_df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/Ensemble_reci by horizon_{option}_mae_rate.csv', index=False)
    mae_df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/increase interval/Ensemble_reci by horizon_{option}_mae_rate.csv', index=False)
    #weight_df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/weight/Ensemble_reci by horizon_{option}_weight.csv', index=False)
    return df0,result_df,mae_df

In [None]:
df1,result,mae_df = ensemble_by_recihorizon(test_weeks,'HHS')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.2823192795832168
Weighted Interval Score (WIS): 0.18169547365764432
2023-10-14
Mean Absolute Error (MAE): 0.2671975040666171
Weighted Interval Score (WIS): 0.1766995303491663
2023-10-21
Mean Absolute Error (MAE): 0.2845936089151851
Weighted Interval Score (WIS): 0.1893254422128461
2023-10-28
Mean Absolute Error (MAE): 0.3333935583160864
Weighted Interval Score (WIS): 0.21539011644768385
2023-11-04
Mean Absolute Error (MAE): 0.35796767488994274
Weighted Interval Score (WIS): 0.24170739209966124
2023-11-11
Mean Absolute Error (MAE): 0.5143913991849911
Weighted Interval Score (WIS): 0.3342219493704637
2023-11-18
Mean Absolute Error (MAE): 0.5697435702573909
Weighted Interval Score (WIS): 0.3703671905423726
2023-11-25
Mean Absolute Error (MAE): 0.6534873599241535
Weighted Interval Score (WIS): 0.4452304422742959
2023-12-02
Mean Absolute Error (MAE): 1.0137667018321335
Weighted Interval Score (WIS): 0.7352507502385095
2023-12-09
Mean Absolute Error (M

In [None]:
df1,result,mae_df = ensemble_by_recihorizon(test_weeks,'recent trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.30432238601926287
Weighted Interval Score (WIS): 0.19478132856604424
2023-10-14
Mean Absolute Error (MAE): 0.23888963749466918
Weighted Interval Score (WIS): 0.16110214761270059
2023-10-21
Mean Absolute Error (MAE): 0.25409041902708746
Weighted Interval Score (WIS): 0.1757979105726083
2023-10-28
Mean Absolute Error (MAE): 0.3576315905599591
Weighted Interval Score (WIS): 0.2316365057030461
2023-11-04
Mean Absolute Error (MAE): 0.362439495073767
Weighted Interval Score (WIS): 0.25401612719687255
2023-11-11
Mean Absolute Error (MAE): 0.5776003294982587
Weighted Interval Score (WIS): 0.3694267726666747
2023-11-18
Mean Absolute Error (MAE): 0.584972105130852
Weighted Interval Score (WIS): 0.38248566060821354
2023-11-25
Mean Absolute Error (MAE): 0.6905183736595862
Weighted Interval Score (WIS): 0.46606687925140716
2023-12-02
Mean Absolute Error (MAE): 0.9757175055846578
Weighted Interval Score (WIS): 0.705028803436748
2023-12-09
Mean Absolute Error (

In [None]:
df1,result,mae_df = ensemble_by_recihorizon(test_weeks,'long trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.33502232430406975
Weighted Interval Score (WIS): 0.2122723635875354
2023-10-14
Mean Absolute Error (MAE): 0.28229488118178964
Weighted Interval Score (WIS): 0.18488531654183937
2023-10-21
Mean Absolute Error (MAE): 0.2681148381405747
Weighted Interval Score (WIS): 0.18110063444361263
2023-10-28
Mean Absolute Error (MAE): 0.31074238705751844
Weighted Interval Score (WIS): 0.20811301402285728
2023-11-04
Mean Absolute Error (MAE): 0.3380303898116658
Weighted Interval Score (WIS): 0.2339960998307032
2023-11-11
Mean Absolute Error (MAE): 0.515493744326875
Weighted Interval Score (WIS): 0.33305040143163617
2023-11-18
Mean Absolute Error (MAE): 0.5516644568472059
Weighted Interval Score (WIS): 0.3651387727921394
2023-11-25
Mean Absolute Error (MAE): 0.6882476958918181
Weighted Interval Score (WIS): 0.46088804435515784
2023-12-02
Mean Absolute Error (MAE): 0.9751851272724632
Weighted Interval Score (WIS): 0.7235119735799779
2023-12-09
Mean Absolute Error

## Linear programming

In [13]:
!pip install gurobipy

Collecting gurobipy
  Downloading gurobipy-11.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (15 kB)
Downloading gurobipy-11.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (13.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.4/13.4 MB[0m [31m89.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gurobipy
Successfully installed gurobipy-11.0.3


In [14]:
import gurobipy as gp
from gurobipy import GRB
def LP(X,y_true):
    m = gp.Model("ensemble_mae_minimization3")

    n_predictors = X.shape[1]
    n_samples = X.shape[0]


    weights = m.addVars(n_predictors, lb=0, ub=1, name="weights")
    aux_vars = m.addVars(n_samples, lb=0, name="aux")


    m.setObjective(aux_vars.sum() / n_samples, GRB.MINIMIZE)


    m.addConstr(weights.sum() == 1, "sum_of_weights")


    for i in range(n_samples):
        m.addConstr(aux_vars[i] >= gp.quicksum(weights[j] * X[i, j] for j in range(n_predictors)) - y_true[i])
        m.addConstr(aux_vars[i] >= y_true[i] - gp.quicksum(weights[j] * X[i, j] for j in range(n_predictors)))

    m.setParam('OutputFlag', 0)
    m.optimize()

    optimized_weights = [weights[j].X for j in range(n_predictors)]
    return optimized_weights

In [19]:
import gurobipy as gp
from gurobipy import GRB

from datetime import datetime, timedelta
start_date = datetime.strptime('2023-10-07', '%Y-%m-%d')
end_date = datetime.strptime('2024-02-24', '%Y-%m-%d')
current_date = start_date
dates_list = []

while current_date <= end_date:
    dates_list.append(current_date.strftime('%Y-%m-%d'))
    current_date += timedelta(days=7)

test_weeks = dates_list


def ensemble_by_LP(test_weeks,week, option):
    '''
    test_weeks: the list of week that we want to ensemble
    week: the week that we want to minize the mae (1,2,3,4), if 4, means we want to use
    '''
    df0 = pd.DataFrame()
    dfmae = pd.DataFrame()
    dfwis = pd.DataFrame()
    mae_results = []
    weight_records = []
    for i, test_week in enumerate(test_weeks):

        print(test_week)
        # Path setup based on selected option
        base_path = {
            'recent trend': '/content/drive/MyDrive/Flu Forecasting/LSTM/region result/recent trend (6 weeks)/quantile result/',
            'long trend': '/content/drive/MyDrive/Flu Forecasting/LSTM/region result/long trend/quantile result/',
            'HHS': '/content/drive/MyDrive/Flu Forecasting/LSTM/region result/HHS/quantile result/'
        }[option]

        save_path = {
            'recent trend': '/content/drive/MyDrive/Flu Forecasting/ensemble/optimization/recent trend/',
            'long trend': '/content/drive/MyDrive/Flu Forecasting/ensemble/optimization/long trend/',
            'HHS': '/content/drive/MyDrive/Flu Forecasting/ensemble/optimization/HHS/'
        }[option]

        # Load current week's data
        df1 = pd.read_csv(f'{base_path}{test_week}_quantile.csv')
        df2 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/LSTM/US result/quantile result/{test_week}_quantile.csv')
        df3 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/ARIMA/state result/{test_week}_quantileexp.csv')
        if i <= week - 1:
            combined_result = pd.concat([df1,df2,df3],ignore_index=True)
            result_df = combined_result.groupby(['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'])['value'].mean().reset_index()
            result_df.reset_index(drop=True, inplace=True)
            weight_records.append({'test_week': test_week, 'regional': 0.333333, 'US_level': 0.333333, 'state_level':0.333333})
        else:
            pw = test_weeks[i-week]
            past_df1 = pd.read_csv(f'{base_path}{pw}_quantile.csv')
            past_df2 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/LSTM/US result/quantile result/{pw}_quantile.csv')
            past_df3 = pd.read_csv(f'/content/drive/MyDrive/Flu Forecasting/ARIMA/state result/{pw}_quantileexp.csv')
            desired_week = test_weeks[i-week:i]
            past_df1 = past_df1[(past_df1['target_end_date'].isin(desired_week)) & (past_df1['output_type_id'] == 0.5)]
            past_df2 = past_df2[(past_df2['target_end_date'].isin(desired_week)) & (past_df2['output_type_id'] == 0.5)]
            past_df3 = past_df3[(past_df3['target_end_date'].isin(desired_week)) & (past_df3['output_type_id'] == 0.5)]

            full_df1 = past_df1.merge(past_df2, on=['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'], how='outer', suffixes=('_1', '_2'))

            full_df1 = full_df1.merge(past_df3, on=['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'], how='outer', suffixes=('', '_3'))
            full_df1.rename(columns={'value': 'value_3'}, inplace=True)
            full_df1['location'] = full_df1['location'].astype(str).str.zfill(2)
            gd = pd.read_csv('/content/drive/MyDrive/Flu Forecasting/code/smoothedrate.csv')
            gd = gd[['Week_end', 'fips','rate']]

            gd = gd[['Week_end', 'fips','rate']].reset_index(drop=True)
            gd['fips'] =gd['fips'].astype(str).str.zfill(2)
            gd.rename(columns={'Week_end': 'target_end_date', 'fips': 'location', 'rate': 'rate'}, inplace=True)

            full_df1 = full_df1.merge(gd, on=['target_end_date', 'location'], how='left')
            na_df = full_df1[pd.isna(full_df1['value_1'])]
            not_na_df = full_df1[pd.notna(full_df1['value_1'])]

            X = np.column_stack((not_na_df['value_1'], not_na_df['value_2'], not_na_df['value_3']))
            y_true = not_na_df['rate']
            weights_all = LP(X, y_true)

            X2 = np.column_stack((full_df1['value_2'], full_df1['value_3']))
            y_true2 = full_df1['rate']
            weights_23 = LP(X2, y_true2)

            full_df = df1.merge(df2, on=['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'], how='outer', suffixes=('_1', '_2'))

            full_df = full_df.merge(df3, on=['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id'], how='outer', suffixes=('', '_3'))
            full_df.rename(columns={'value': 'value_3'}, inplace=True)
            full_df['value'] = full_df.apply(
                lambda x: (x['value_1'] * weights_all[0] + x['value_2'] * weights_all[1] + x['value_3'] * weights_all[2]) if pd.notna(x['value_1'])
                else (x['value_2'] * weights_23[0] + x['value_3'] * weights_23[1]),
                axis=1
            )
            weight_records.append(
                {'test_week': test_week, 'regional': weights_all[0], 'US_level': weights_all[1], 'state_level': weights_all[2]})

            result_df = full_df[['reference_date', 'target', 'horizon', 'target_end_date', 'location', 'output_type', 'output_type_id', 'value']]
        result_df = adjust_quantile_ranges_inplace(result_df)
        #result_df.to_csv(f'{save_path}{test_week}_quantile{week}larger.csv', index=False)
        #result_df.to_csv(f'{save_path}{test_week}_quantile{week}.csv', index=False)
        df0 = pd.concat([df0, result_df], ignore_index=True)
        mergeddf,mae,maewk = mae_cal(result_df,1)
        dfmae = pd.concat([dfmae, mergeddf], ignore_index=True)
        me, mean_wis, mean_wis_by_horizon = cal_wis(result_df,1)
        dfwis = pd.concat([dfwis, me], ignore_index=True)
        mae_results.append({
            'test_week': test_week,
            'horizon': 'Overall',
            f'Ensemble MAE(LP,{option},{week})': mae,
            f'Ensemble WIS(LP,{option},{week})': mean_wis
        })

        '''
        for horizon, horizon_mae in maewk.items():
            mae_results.append({
                'test_week': test_week,
                'horizon': f'{horizon}_w',
                f'Ensemble (reci,{option})': horizon_mae
            })
        '''
        combined_horizon_results = {}


        for horizon, horizon_mae in maewk.items():
            combined_horizon_results[horizon] = {
                f'Ensemble MAE(LP,{option},{week})': horizon_mae
            }


        for horizon, horizon_wis in mean_wis_by_horizon.items():
            if horizon in combined_horizon_results:
                combined_horizon_results[horizon][f'Ensemble WIS(LP,{option},{week})'] = horizon_wis
            else:
                combined_horizon_results[horizon] = {f'Ensemble WIS(LP,{option},{week})': horizon_wis}


        for horizon, metrics in combined_horizon_results.items():
            metrics.update({
                'test_week': test_week,
                'horizon': f'{horizon}_w'
            })
            mae_results.append(metrics)
        mae_df = pd.DataFrame(mae_results)
        weight_df = pd.DataFrame(weight_records)
    #weight_df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/weight/Ensemble_LP_{option}_weight{week}.csv', index=False)
    #mae_df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/increase interval/Ensemble_LP_{option}_mae_rate{week}.csv', index=False)
    #mae_df.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/Ensemble_LP_{option}_mae_rate{week}.csv', index=False)
    #dfmae.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/result by state/Ensemble_LP_{option}_mae{week}larger.csv', index=False)
    #dfwis.to_csv(f'/content/drive/MyDrive/Flu Forecasting/result/result by state/Ensemble_LP_{option}_wis{week}larger.csv', index=False)
    return df0,result_df,mae_df

In [20]:
df1,result,mae_df = ensemble_by_LP(test_weeks,1,'recent trend')
a,mae,wk = mape_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.30432238601926287
Weighted Interval Score (WIS): 0.19478132856604424
2023-10-14
Mean Absolute Error (MAE): 0.2264666027445834
Weighted Interval Score (WIS): 0.15069205760892757
2023-10-21


KeyError: 19

In [None]:
df1,result,mae_df = ensemble_by_LP(test_weeks,2,'recent trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.30432238601926287
Weighted Interval Score (WIS): 0.19478132856604424
2023-10-14
Mean Absolute Error (MAE): 0.23888963749466918
Weighted Interval Score (WIS): 0.16110214761270059
2023-10-21
Mean Absolute Error (MAE): 0.23979312299417033
Weighted Interval Score (WIS): 0.1665988617665861
2023-10-28
Mean Absolute Error (MAE): 0.3104088818030897
Weighted Interval Score (WIS): 0.20777151066350952
2023-11-04
Mean Absolute Error (MAE): 0.32879668076777974
Weighted Interval Score (WIS): 0.23017906571159982
2023-11-11
Mean Absolute Error (MAE): 0.497046724272853
Weighted Interval Score (WIS): 0.3223089085165736
2023-11-18
Mean Absolute Error (MAE): 0.5414945536608923
Weighted Interval Score (WIS): 0.35506090114913136
2023-11-25
Mean Absolute Error (MAE): 0.6883492306766534
Weighted Interval Score (WIS): 0.4657531821462967
2023-12-02
Mean Absolute Error (MAE): 1.0450337044851186
Weighted Interval Score (WIS): 0.7410369767604178
2023-12-09
Mean Absolute Erro

In [None]:
df1,result,mae_df = ensemble_by_LP(test_weeks,3,'recent trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.30432238601926287
Weighted Interval Score (WIS): 0.19478132856604424
2023-10-14
Mean Absolute Error (MAE): 0.23888963749466918
Weighted Interval Score (WIS): 0.16110214761270059
2023-10-21
Mean Absolute Error (MAE): 0.2540904190270875
Weighted Interval Score (WIS): 0.1757979105726083
2023-10-28
Mean Absolute Error (MAE): 0.31597805278975116
Weighted Interval Score (WIS): 0.2079589069587999
2023-11-04
Mean Absolute Error (MAE): 0.3322241985701105
Weighted Interval Score (WIS): 0.2332999547742243
2023-11-11
Mean Absolute Error (MAE): 0.5058221224795554
Weighted Interval Score (WIS): 0.32783672751917
2023-11-18
Mean Absolute Error (MAE): 0.5390281177378543
Weighted Interval Score (WIS): 0.35476366547776084
2023-11-25
Mean Absolute Error (MAE): 0.6383067310438455
Weighted Interval Score (WIS): 0.4370524616406826
2023-12-02
Mean Absolute Error (MAE): 1.102039000073857
Weighted Interval Score (WIS): 0.7719855782789046
2023-12-09
Mean Absolute Error (MA

In [None]:
df1,result,mae_df = ensemble_by_LP(test_weeks,4,'recent trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.30432238601926287
Weighted Interval Score (WIS): 0.19478132856604424
2023-10-14
Mean Absolute Error (MAE): 0.23888963749466918
Weighted Interval Score (WIS): 0.16110214761270059
2023-10-21
Mean Absolute Error (MAE): 0.2540904190270875
Weighted Interval Score (WIS): 0.1757979105726083
2023-10-28
Mean Absolute Error (MAE): 0.35763159055995897
Weighted Interval Score (WIS): 0.2316365057030461
2023-11-04
Mean Absolute Error (MAE): 0.33108267264876345
Weighted Interval Score (WIS): 0.2323772766343258
2023-11-11
Mean Absolute Error (MAE): 0.4853298003048334
Weighted Interval Score (WIS): 0.31547107665586516
2023-11-18
Mean Absolute Error (MAE): 0.5328660162460036
Weighted Interval Score (WIS): 0.3514760126632025
2023-11-25
Mean Absolute Error (MAE): 0.6411592856921922
Weighted Interval Score (WIS): 0.440876098522009
2023-12-02
Mean Absolute Error (MAE): 1.005825390153845
Weighted Interval Score (WIS): 0.7291858813558534
2023-12-09
Mean Absolute Error (

In [None]:
df1,result,mae_df = ensemble_by_LP(test_weeks,1,'long trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.33502232430406975
Weighted Interval Score (WIS): 0.21227236358753543
2023-10-14
Mean Absolute Error (MAE): 0.2264666027445834
Weighted Interval Score (WIS): 0.15069205760892757
2023-10-21
Mean Absolute Error (MAE): 0.23671534072313294
Weighted Interval Score (WIS): 0.1654772910010135
2023-10-28
Mean Absolute Error (MAE): 0.30943019169662483
Weighted Interval Score (WIS): 0.20784641325278216
2023-11-04
Mean Absolute Error (MAE): 0.33188960137382006
Weighted Interval Score (WIS): 0.2303548397512585
2023-11-11
Mean Absolute Error (MAE): 0.48740370659897186
Weighted Interval Score (WIS): 0.31664164374694215
2023-11-18
Mean Absolute Error (MAE): 0.5851380044922755
Weighted Interval Score (WIS): 0.38644453862003886
2023-11-25
Mean Absolute Error (MAE): 0.7780548934247381
Weighted Interval Score (WIS): 0.5294042493490067
2023-12-02
Mean Absolute Error (MAE): 0.9850857790899223
Weighted Interval Score (WIS): 0.7221975500854738
2023-12-09
Mean Absolute Er

In [None]:
df1,result,mae_df = ensemble_by_LP(test_weeks,2,'long trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.33502232430406975
Weighted Interval Score (WIS): 0.21227236358753543
2023-10-14
Mean Absolute Error (MAE): 0.2822948811817897
Weighted Interval Score (WIS): 0.18488531654183946
2023-10-21
Mean Absolute Error (MAE): 0.23979312299417033
Weighted Interval Score (WIS): 0.1665988617665861
2023-10-28
Mean Absolute Error (MAE): 0.312207826844283
Weighted Interval Score (WIS): 0.20726394305298446
2023-11-04
Mean Absolute Error (MAE): 0.3285821684725967
Weighted Interval Score (WIS): 0.22973211594649
2023-11-11
Mean Absolute Error (MAE): 0.535864310563402
Weighted Interval Score (WIS): 0.349523105214508
2023-11-18
Mean Absolute Error (MAE): 0.5394795999232435
Weighted Interval Score (WIS): 0.3555180124057467
2023-11-25
Mean Absolute Error (MAE): 0.6883492306766534
Weighted Interval Score (WIS): 0.4657531821462967
2023-12-02
Mean Absolute Error (MAE): 0.9814152468883236
Weighted Interval Score (WIS): 0.7327631950272989
2023-12-09
Mean Absolute Error (MAE):

In [None]:
df1,result,mae_df = ensemble_by_LP(test_weeks,3,'long trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.33502232430406975
Weighted Interval Score (WIS): 0.21227236358753543
2023-10-14
Mean Absolute Error (MAE): 0.2822948811817897
Weighted Interval Score (WIS): 0.18488531654183946
2023-10-21
Mean Absolute Error (MAE): 0.2681148381405747
Weighted Interval Score (WIS): 0.18110063444361266
2023-10-28
Mean Absolute Error (MAE): 0.31597805278975116
Weighted Interval Score (WIS): 0.2079589069587999
2023-11-04
Mean Absolute Error (MAE): 0.32849276076297
Weighted Interval Score (WIS): 0.2288748326588413
2023-11-11
Mean Absolute Error (MAE): 0.5058221224795554
Weighted Interval Score (WIS): 0.32783672751917
2023-11-18
Mean Absolute Error (MAE): 0.582214936949475
Weighted Interval Score (WIS): 0.38428422412720764
2023-11-25
Mean Absolute Error (MAE): 0.6442896891343639
Weighted Interval Score (WIS): 0.44354699790230656
2023-12-02
Mean Absolute Error (MAE): 1.102039000073857
Weighted Interval Score (WIS): 0.7719855782789046
2023-12-09
Mean Absolute Error (MAE)

In [None]:
df1,result,mae_df = ensemble_by_LP(test_weeks,4,'long trend')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.33502232430406975
Weighted Interval Score (WIS): 0.21227236358753543
2023-10-14
Mean Absolute Error (MAE): 0.2822948811817897
Weighted Interval Score (WIS): 0.18488531654183946
2023-10-21
Mean Absolute Error (MAE): 0.2681148381405747
Weighted Interval Score (WIS): 0.18110063444361266
2023-10-28
Mean Absolute Error (MAE): 0.3107423870575185
Weighted Interval Score (WIS): 0.20811301402285728
2023-11-04
Mean Absolute Error (MAE): 0.33108267264876345
Weighted Interval Score (WIS): 0.2323772766343258
2023-11-11
Mean Absolute Error (MAE): 0.5094849075229524
Weighted Interval Score (WIS): 0.33027686477510415
2023-11-18
Mean Absolute Error (MAE): 0.5330036688567036
Weighted Interval Score (WIS): 0.3513572890777627
2023-11-25
Mean Absolute Error (MAE): 0.6962611626629592
Weighted Interval Score (WIS): 0.4644690643641613
2023-12-02
Mean Absolute Error (MAE): 1.005825390153845
Weighted Interval Score (WIS): 0.7291858813558534
2023-12-09
Mean Absolute Error 

In [None]:
df1,result,mae_df = ensemble_by_LP(test_weeks,1,'HHS')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.28231927958321684
Weighted Interval Score (WIS): 0.18169547365764438
2023-10-14
Mean Absolute Error (MAE): 0.2264666027445834
Weighted Interval Score (WIS): 0.15069205760892757
2023-10-21
Mean Absolute Error (MAE): 0.23677338171256157
Weighted Interval Score (WIS): 0.16533957134132496
2023-10-28
Mean Absolute Error (MAE): 0.30943019169662483
Weighted Interval Score (WIS): 0.20784641325278216
2023-11-04
Mean Absolute Error (MAE): 0.3352968517161737
Weighted Interval Score (WIS): 0.2358483602107632
2023-11-11
Mean Absolute Error (MAE): 0.48740370659897186
Weighted Interval Score (WIS): 0.31664164374694215
2023-11-18
Mean Absolute Error (MAE): 0.5698899794428562
Weighted Interval Score (WIS): 0.37710162049698626
2023-11-25
Mean Absolute Error (MAE): 0.6397640591623994
Weighted Interval Score (WIS): 0.44007325551721777
2023-12-02
Mean Absolute Error (MAE): 1.0311007714258258
Weighted Interval Score (WIS): 0.7360036062412888
2023-12-09
Mean Absolute E

In [None]:
df1,result,mae_df = ensemble_by_LP(test_weeks,2,'HHS')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.28231927958321684
Weighted Interval Score (WIS): 0.18169547365764438
2023-10-14
Mean Absolute Error (MAE): 0.2671975040666171
Weighted Interval Score (WIS): 0.1766995303491663
2023-10-21
Mean Absolute Error (MAE): 0.23979312299417033
Weighted Interval Score (WIS): 0.1665988617665861
2023-10-28
Mean Absolute Error (MAE): 0.312207826844283
Weighted Interval Score (WIS): 0.20726394305298446
2023-11-04
Mean Absolute Error (MAE): 0.3285821684725967
Weighted Interval Score (WIS): 0.22973211594649
2023-11-11
Mean Absolute Error (MAE): 0.49304958319581543
Weighted Interval Score (WIS): 0.31992130235177374
2023-11-18
Mean Absolute Error (MAE): 0.5397629430435538
Weighted Interval Score (WIS): 0.3542170491700509
2023-11-25
Mean Absolute Error (MAE): 0.6883492306766534
Weighted Interval Score (WIS): 0.4657531821462967
2023-12-02
Mean Absolute Error (MAE): 1.0450337044851186
Weighted Interval Score (WIS): 0.7410369767604178
2023-12-09
Mean Absolute Error (MA

In [None]:
df1,result,mae_df = ensemble_by_LP(test_weeks,3,'HHS')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.28231927958321684
Weighted Interval Score (WIS): 0.18169547365764438
2023-10-14
Mean Absolute Error (MAE): 0.2671975040666171
Weighted Interval Score (WIS): 0.1766995303491663
2023-10-21
Mean Absolute Error (MAE): 0.2845936089151851
Weighted Interval Score (WIS): 0.1893254422128461
2023-10-28
Mean Absolute Error (MAE): 0.31597805278975116
Weighted Interval Score (WIS): 0.2079589069587999
2023-11-04
Mean Absolute Error (MAE): 0.3284434923178179
Weighted Interval Score (WIS): 0.22945348994065407
2023-11-11
Mean Absolute Error (MAE): 0.5058221224795554
Weighted Interval Score (WIS): 0.32783672751917
2023-11-18
Mean Absolute Error (MAE): 0.5460665836413171
Weighted Interval Score (WIS): 0.3605850012655911
2023-11-25
Mean Absolute Error (MAE): 0.6388934192680611
Weighted Interval Score (WIS): 0.4382613494022095
2023-12-02
Mean Absolute Error (MAE): 1.102039000073857
Weighted Interval Score (WIS): 0.7719855782789043
2023-12-09
Mean Absolute Error (MAE)

In [None]:
df1,result,mae_df = ensemble_by_LP(test_weeks,4,'HHS')
a,mae,wk = mae_cal(df1,1)
print(wk)
me, mean_wis, mean_wis_by_horizon = cal_wis(df1,1)
print(mean_wis_by_horizon)

2023-10-07
Mean Absolute Error (MAE): 0.28231927958321684
Weighted Interval Score (WIS): 0.18169547365764438
2023-10-14
Mean Absolute Error (MAE): 0.2671975040666171
Weighted Interval Score (WIS): 0.1766995303491663
2023-10-21
Mean Absolute Error (MAE): 0.2845936089151851
Weighted Interval Score (WIS): 0.1893254422128461
2023-10-28
Mean Absolute Error (MAE): 0.3333935583160864
Weighted Interval Score (WIS): 0.21539011644768385
2023-11-04
Mean Absolute Error (MAE): 0.33108267264876345
Weighted Interval Score (WIS): 0.2323772766343258
2023-11-11
Mean Absolute Error (MAE): 0.5045685224872279
Weighted Interval Score (WIS): 0.327023967548638
2023-11-18
Mean Absolute Error (MAE): 0.5328660162460036
Weighted Interval Score (WIS): 0.3514760126632025
2023-11-25
Mean Absolute Error (MAE): 0.6443360483773106
Weighted Interval Score (WIS): 0.44302170427790855
2023-12-02
Mean Absolute Error (MAE): 1.003961244459182
Weighted Interval Score (WIS): 0.7283436223850848
2023-12-09
Mean Absolute Error (MA