# Tuning for Transformer hyperparameters

In [1]:
import pytorch_lightning as pl
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss, MAE
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
import pickle
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import keras
import torch
from statsmodels.graphics.tsaplots import plot_acf
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import f_regression, SelectKBest
from sklearn.metrics import  max_error, mean_absolute_error, mean_squared_error

pd.set_option('display.max_columns', None)
np.set_printoptions(suppress=True)

  import pandas.util.testing as tm


In [2]:
def plot_series(time, series, format="-", start=0, end=None):
    plt.plot(time[start:end], series[start:end], format)
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.grid(True)
    
#We apply one hot encoding for the Holiday feature to help linear models 
def onehotholiday(select):
    X_2 = select[['Holiday']]
    enc = preprocessing.OneHotEncoder(sparse=False)
    enc.fit(X_2)
    onehotlabels = enc.transform(X_2)
    # creating a list of column names 
    column_values = []
    for i in range(np.shape(onehotlabels)[1]):
            column_values.append('A'+str(i))

    onehotholiday = pd.DataFrame(data = onehotlabels,columns = column_values)
    dataset = select.drop(columns=['Holiday'])
    dataset = select.join(onehotholiday)
    df1 = dataset.pop('2to5')
    dataset['2to5']=df1 # add b series as a 'new' column
    dataset2=dataset
    dataset2 = dataset2.drop(columns=['Holiday'])
    return dataset2

    
def onehotholidaydiff(select, col):
    X_2 = select[['Holiday']]
    # TODO: create a OneHotEncoder object, and fit it to all of X
    # 1. INSTANTIATE
    enc = preprocessing.OneHotEncoder(sparse=False)
    
    # 2. FIT
    enc.fit(X_2)
    
    # 3. Transform
    onehotlabels = enc.transform(X_2)
    # creating a list of column names 
    column_values = []
    for i in range(np.shape(onehotlabels)[1]):
            column_values.append('A'+str(i))

    # creating the dataframe 
    onehotholiday = pd.DataFrame(data = onehotlabels,columns = column_values)

    dataset = select.drop(columns=['Holiday'])
    dataset = select.join(onehotholiday)
    df1 = dataset.pop(col)
    dataset[col]=df1 # add b series as a 'new' column
    dataset2=dataset
    dataset2 = dataset2.drop(columns=['Holiday'])
    return dataset2
    
def diff_add_lookback(dataset, look_back, df, col):
    for i in range(len(dataset)-look_back):
        a = dataset[i:(i+look_back)][col]
        a = a.values
        for j in range(len(a)):
            df[j][i]= a[j]
    return df

def add_lookback(dataset, look_back, df):
    for i in range(len(dataset)-look_back):
        a = dataset[i:(i+look_back)]['2to5']
        a = a.values
        for j in range(len(a)):
            df[j][i]= a[j]
    return df


In [3]:
train_time=883
time = np.arange(1104, dtype="float32")
df = pd.read_csv('Data/RestaurantDataVets_All_2to5.csv')
df=df.drop([x for x in range(7)]) ##Removing data with holes in beginning
df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
df["DailyBusyness"] = df.MaxSales.astype(float)
df["WeeklyBusyness"] = df.MaxSales.astype(float)

data=df

data['WeeklyAvg']=data['WeeklyAvg'].astype(float)

price_series = data['2to5'].squeeze().to_numpy()
week_avg_series = data['WeeklyAvg'].squeeze().to_numpy()

price_test=price_series[train_time:]

time = np.arange(1104, dtype="float32")
time_series = time[train_time:]

## One Day Window

### Actual

In [4]:
data = pd.read_csv('Data/RestaurantDataVets_All_2to5.csv')
data = data.drop(columns=['DMY','MissingPrevDays'])

remove=7
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
df["DailyBusyness"] = df.DailyBusyness.astype(float)
df["WeeklyBusyness"] = df.WeeklyBusyness.astype(float)

hotdata = onehotholiday(df)

numcols = len(hotdata.columns)
dataset = hotdata
dataset

Unnamed: 0,Index,Group,Year,Day,January,February,March,April,May,June,July,August,September,October,November,December,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Carnival,LentFasting,Ramadan,ChristmasSeason,DailyAvg,WeeklyAvg,MinSales,MaxSales,DailyBusyness,WeeklyBusyness,A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,2to5
0,8,0,2016,18,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,532.292857,666.450000,320.54,862.04,0.391049,0.638800,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,942.94
1,9,0,2016,19,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,583.623750,475.490000,320.54,942.94,0.422692,0.248956,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,427.55
2,10,0,2016,20,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,566.282222,320.540000,320.54,942.94,0.394830,0.000000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,378.36
3,11,0,2016,21,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,547.490000,365.610000,320.54,942.94,0.364637,0.072413,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,420.98
4,12,0,2016,22,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,535.989091,494.990000,320.54,942.94,0.346159,0.280286,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,541.16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,1107,0,2019,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1028.135054,1445.320629,225.30,2889.23,0.301372,0.457978,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1558.69
1100,1108,0,2019,2,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1028.614327,809.810440,225.30,2889.23,0.301552,0.219417,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,656.41
1101,1109,0,2019,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1028.278403,769.949295,225.30,2889.23,0.301426,0.204453,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,875.43
1102,1110,0,2019,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1028.140577,839.784810,225.30,2889.23,0.301374,0.230668,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1079.06


In [5]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-84:]
train_data=train_data[:-84]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index

# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-84],
     time_idx="Index",
     target="2to5",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=1,
     max_prediction_length=1,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["Index","Year", "Day","January","February","March","April","May","June","July","August",
                               "September","October","November","December", "Sunday","Monday","Tuesday","Wednesday",
                               "Thursday","Friday","Saturday","Carnival","LentFasting","Ramadan",
                               "ChristmasSeason","DailyAvg","WeeklyAvg","MinSales",
                               "MaxSales","DailyBusyness","WeeklyBusyness",
                               "A0","A1","A2", "A3","A4","A5","A6","A7","A8","A9",
                               "A10","A11","A12","A13","A14","A15","A16","A17",
                               "A18","A19","A20","A21","A22","A23","A24","A25","A26","A27"],
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["2to5"],
     
)

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [6]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneDayForecast_Actual.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-01-27 13:50:53,743] A new study created in memory with name: no-name-8079db5b-ec46-492f-9a46-7ca74ca19493
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 14:13:52,599] Trial 0 finished with value: 487.45947265625 and parameters: {'gradient_clip_val': 0.08701325084719147, 'hidden_size': 12, 'dropout': 0.2943265167168637, 'hidden_continuous_size': 9, 'attention_head_size': 1, 'learning_rate': 0.008739312159700585}. Best is trial 0 with value: 487.45947265625.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 14:35:25,672] Trial 1 finished with value: 101.35247802734375 and parameters: {'gradient_clip_val': 0.02886816271205812, 'hidden_size': 31, 'dropout': 0.17523002066821874, 'hidden_continuous_size': 9, 'attention_head_size': 1, 'learning_rate': 0.02296155766848181}. Best is trial 1 with value: 101.35247802734375.
GPU avai

[I 2021-01-27 17:02:56,423] Trial 33 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 17:03:50,783] Trial 34 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 17:04:44,956] Trial 35 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 17:06:59,229] Trial 36 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 17:07:53,618] Trial 37 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 17:08:47,944] Trial 38 pruned. Trial was prune

TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 18:14:00,754] Trial 77 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 18:16:14,974] Trial 78 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 18:38:05,224] Trial 79 finished with value: 87.15880584716797 and parameters: {'gradient_clip_val': 0.33893671913844314, 'hidden_size': 67, 'dropout': 0.15471531088642043, 'hidden_continuous_size': 12, 'attention_head_size': 1, 'learning_rate': 0.0977229267397012}. Best is trial 11 with value: 83.72383117675781.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 18:38:59,540] Trial 80 pruned. Trial was pruned at epoch 1.
GPU available: True, used:

{'gradient_clip_val': 0.5295054390944878, 'hidden_size': 93, 'dropout': 0.16187745369173084, 'hidden_continuous_size': 16, 'attention_head_size': 1, 'learning_rate': 0.09782840988888085}


### Actual - Less Features

In [7]:
data = pd.read_csv('Data/RestaurantDataVets_All_2to5.csv')
data = data.drop(columns=['DMY','MissingPrevDays'])

remove=7
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
df["DailyBusyness"] = df.DailyBusyness.astype(float)
df["WeeklyBusyness"] = df.WeeklyBusyness.astype(float)

hotdata = onehotholiday(df)

numcols = len(hotdata.columns)
dataset = hotdata
dataset

numcols = len(hotdata.columns)
X=hotdata.iloc[:, 0:numcols-1]
y=hotdata.iloc[:, numcols-1]

X_new = SelectKBest(f_regression, k=55).fit(X,y)

dfscores = pd.DataFrame(X_new.scores_)
dfcolumns = pd.DataFrame(X.columns)

#concat two dataframes for better visualization 
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(60,'Score'))  #print 10 best features


  corr /= X_norms


              Specs        Score
28        WeeklyAvg  1500.731120
30         MaxSales   311.795792
2              Year   285.344359
32   WeeklyBusyness   277.771456
29         MinSales   258.164774
27         DailyAvg   258.018434
0             Index   257.043505
22         Saturday   207.091595
16           Sunday   169.579736
31    DailyBusyness   166.830205
18          Tuesday    56.879305
17           Monday    41.929382
20         Thursday    34.821244
19        Wednesday    29.642404
56              A23    19.493078
44              A11    14.542047
59              A26    14.542047
37               A4     9.753883
34               A1     9.643405
39               A6     9.643405
13          October     7.010144
33               A0     4.990336
49              A16     4.542011
60              A27     4.101715
47              A14     3.448309
6             March     3.387271
57              A24     3.296638
43              A10     3.199677
14         November     3.158984
41        

In [8]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-84:]
train_data=train_data[:-84]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index

# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-84],
     time_idx="Index",
     target="2to5",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=1,
     max_prediction_length=1,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["Index","WeeklyAvg","DailyBusyness","WeeklyBusyness","MaxSales",
                              "Year","MinSales","DailyAvg","Index", "Monday", "Tuesday",
                              "Wednesday","Thursday","Saturday","Sunday","A4","A1","A6","A23","A11","A26"],
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["2to5"],
     
)

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [9]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneDayForecast_ActualLessFeats.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-01-27 22:10:16,887] A new study created in memory with name: no-name-b79c94b4-fede-4a4b-8920-58d47b617238
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 22:19:12,625] Trial 0 finished with value: 69.03643798828125 and parameters: {'gradient_clip_val': 0.012740128134121299, 'hidden_size': 67, 'dropout': 0.13472634299515251, 'hidden_continuous_size': 32, 'attention_head_size': 3, 'learning_rate': 0.045267001134704866}. Best is trial 0 with value: 69.03643798828125.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-27 22:28:02,405] Trial 1 finished with value: 77.79053497314453 and parameters: {'gradient_clip_val': 0.1314007769832013, 'hidden_size': 14, 'dropout': 0.1761410476385072, 'hidden_continuous_size': 11, 'attention_head_size': 2, 'learning_rate': 0.0812115842128795}. Best is trial 0 with value: 69.03643798828125.
GPU a

TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 00:22:14,150] Trial 78 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 00:22:42,889] Trial 79 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 00:23:09,668] Trial 80 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 00:33:35,610] Trial 81 finished with value: 70.3164291381836 and parameters: {'gradient_clip_val': 0.02277431484608801, 'hidden_size': 94, 'dropout': 0.1668956531438566, 'hidden_continuous_size': 22, 'attention_head_size': 2, 'learning_rate': 0.08962539203824153}. Best is trial 0 with value: 69.03643798828125.
GPU available: True, used: T

[I 2021-01-28 03:38:11,016] Trial 145 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 03:38:33,979] Trial 146 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 03:41:44,353] Trial 147 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 03:42:41,472] Trial 148 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 03:43:03,733] Trial 149 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 03:44:00,341] Trial 150 pruned. Trial w

[I 2021-01-28 04:33:15,453] Trial 187 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 04:33:37,769] Trial 188 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 04:33:59,764] Trial 189 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 04:34:21,931] Trial 190 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 04:34:44,236] Trial 191 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 04:35:06,937] Trial 192 pruned. Trial was

{'gradient_clip_val': 0.19370255729809424, 'hidden_size': 123, 'dropout': 0.10560565992889985, 'hidden_continuous_size': 122, 'attention_head_size': 3, 'learning_rate': 0.09972456878766851}


### Day Difference

In [10]:
dataframe = pd.read_csv('Data/RestaurantDataVets_All_2to5_Differenced.csv')
data = dataframe.drop(columns=['DMY','MissingPrevDays','WeeklyDifference','DiffDifference','2to5'])

remove=14
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
df["DailyBusyness"] = df.DailyBusyness.astype(float)
df["WeeklyBusyness"] = df.WeeklyBusyness.astype(float)

df["AvgDailyDiff"] = df.AvgDailyDiff.astype(float)
df["AvgWeeklyDiff"] = df.AvgWeeklyDiff.astype(float)
df["MaxDailyDiff"] = df.MaxDailyDiff.astype(float)
df["MinDailyDiff"] = df.MinDailyDiff.astype(float)
df["DailyDifference"] = df.DailyDifference.astype(float)
#df["WeeklyDifference"] = df.WeeklyDifference.astype(float)

hotdata = onehotholidaydiff(df,'DailyDifference')

numcols = len(hotdata.columns)
dataset = hotdata
dataset

Unnamed: 0,Index,Group,Year,Day,January,February,March,April,May,June,July,August,September,October,November,December,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Carnival,LentFasting,Ramadan,ChristmasSeason,DailyAvg,WeeklyAvg,MinSales,MaxSales,DailyBusyness,WeeklyBusyness,AvgDailyDiff,AvgWeeklyDiff,MinDailyDiff,MaxDailyDiff,A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,DailyDifference
0,15,0,2016,25,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,559.68143,804.69500,320.54,942.94,0.38422,0.77788,12.13538,276.490000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.29
1,16,0,2016,26,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,581.33600,451.52000,320.54,942.94,0.41902,0.21044,15.57500,-47.940000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-487.01
2,17,0,2016,27,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,569.84563,349.45000,320.54,942.94,0.40056,0.04645,-17.93067,57.820000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,102.08
3,18,0,2016,28,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,565.71176,393.29500,320.54,942.94,0.39391,0.11689,-10.43000,42.620000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-66.01
4,19,0,2016,29,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,558.37000,518.07500,320.54,942.94,0.38212,0.31738,-13.69941,46.170000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-91.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092,1107,0,2019,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1028.13505,1445.32063,225.30,2889.23,0.30137,0.45798,1.03780,5.415190,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-254.53
1093,1108,0,2019,2,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1028.61433,809.81044,225.30,2889.23,0.30155,0.21942,0.80673,-4.407278,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-902.28
1094,1109,0,2019,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1028.27840,769.94929,225.30,2889.23,0.30143,0.20445,-0.00907,12.210903,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,219.02
1095,1110,0,2019,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1028.14058,839.78481,225.30,2889.23,0.30137,0.23067,0.18861,79.801529,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,203.63


In [12]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-80:]
train_data=train_data[:-80]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index


# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-80],
     time_idx="Index",
     target="DailyDifference",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=1,
     max_prediction_length=1,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["Index","Year", "Day","January","February","March","April","May","June","July","August",
                               "September","October","November","December", "Sunday","Monday","Tuesday","Wednesday",
                               "Thursday","Friday","Saturday","Carnival","LentFasting","Ramadan",
                               "ChristmasSeason","DailyAvg","WeeklyAvg","MinSales",
                               "MaxSales","DailyBusyness","WeeklyBusyness",
                               "AvgDailyDiff","AvgWeeklyDiff","MaxDailyDiff","MinDailyDiff",
                               "A0","A1","A2", "A3","A4","A5","A6","A7","A8","A9",
                               "A10","A11","A12","A13","A14","A15","A16","A17",
                               "A18","A19","A20","A21","A22","A23","A24","A25","A26","A27"],
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["DailyDifference"],
     
 )

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [13]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneDayForecast_Daily.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-01-28 09:19:06,781] A new study created in memory with name: no-name-3c01965f-9b58-4641-a155-488b1acbcfd0
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 09:41:58,963] Trial 0 finished with value: 231.6572265625 and parameters: {'gradient_clip_val': 0.15521422563204418, 'hidden_size': 12, 'dropout': 0.11677126013580341, 'hidden_continuous_size': 9, 'attention_head_size': 4, 'learning_rate': 0.0036544807143533585}. Best is trial 0 with value: 231.6572265625.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 10:00:33,454] Trial 1 finished with value: 136.2860107421875 and parameters: {'gradient_clip_val': 0.04129932434386791, 'hidden_size': 8, 'dropout': 0.11149830116580124, 'hidden_continuous_size': 8, 'attention_head_size': 3, 'learning_rate': 0.033920129193302215}. Best is trial 1 with value: 136.2860107421875.
GPU availa

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 12:01:17,414] Trial 36 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 12:02:14,881] Trial 37 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 12:03:12,591] Trial 38 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 12:04:08,248] Trial 39 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 12:06:32,580] Trial 40 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 12:0

[I 2021-01-28 13:55:36,909] Trial 78 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 13:56:22,752] Trial 79 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 13:57:20,967] Trial 80 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 13:59:49,468] Trial 81 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 14:09:07,638] Trial 82 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 14:11:16,272] Trial 83 pruned. Trial was prun

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 16:26:52,208] Trial 119 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 16:27:48,314] Trial 120 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 16:35:52,231] Trial 121 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 16:38:18,490] Trial 122 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 16:40:10,865] Trial 123 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-2

{'gradient_clip_val': 0.04477614616033392, 'hidden_size': 101, 'dropout': 0.14518293024270285, 'hidden_continuous_size': 46, 'attention_head_size': 1, 'learning_rate': 0.08422656775097852}


### Daily Difference - Less Features

In [14]:
data = pd.read_csv('Data/RestaurantDataVets_All_2to5_Differenced.csv')
data = data.drop(columns=['DMY','MissingPrevDays','2to5','WeeklyDifference','DiffDifference'])

remove=14
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
df["DailyBusyness"] = df.DailyBusyness.astype(float)
df["WeeklyBusyness"] = df.WeeklyBusyness.astype(float)


df["AvgDailyDiff"] = df.AvgDailyDiff.astype(float)
df["AvgWeeklyDiff"] = df.AvgWeeklyDiff.astype(float)
df["MaxDailyDiff"] = df.MaxDailyDiff.astype(float)
df["MinDailyDiff"] = df.MinDailyDiff.astype(float)
df["DailyDifference"] = df.DailyDifference.astype(float)
hotdata = onehotholidaydiff(df,'DailyDifference')

numcols = len(hotdata.columns)
dataset = hotdata
dataset

numcols = len(hotdata.columns)
X=hotdata.iloc[:, 0:numcols-1]
y=hotdata.iloc[:, numcols-1]

X_new = SelectKBest(f_regression, k=55).fit(X,y)

dfscores = pd.DataFrame(X_new.scores_)
dfcolumns = pd.DataFrame(X.columns)

#concat two dataframes for better visualization 
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(60,'Score'))  #print 10 best features

  corr /= X_norms


              Specs       Score
17           Monday  432.655965
22         Saturday  198.666326
32   WeeklyBusyness  104.515124
28        WeeklyAvg  102.531345
33     AvgDailyDiff   89.744144
21           Friday   26.227024
48              A11   22.434007
60              A23   13.126514
37               A0   12.797335
34    AvgWeeklyDiff    9.755608
43               A6    8.050688
44               A7    6.731783
61              A24    6.572209
64              A27    6.399512
19        Wednesday    4.874038
56              A19    4.384009
38               A1    3.688036
40               A3    3.527297
63              A26    2.820926
46               A9    2.335293
45               A8    1.684062
41               A4    1.645293
18          Tuesday    1.409225
54              A17    1.271445
47              A10    0.855853
62              A25    0.831714
16           Sunday    0.827064
26  ChristmasSeason    0.791582
24      LentFasting    0.513328
53              A16    0.379622
6       

In [15]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-80:]
train_data=train_data[:-80]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index


# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-80],
     time_idx="Index",
     target="DailyDifference",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=1,
     max_prediction_length=1,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["Monday","Saturday","WeeklyBusyness","WeeklyAvg","AvgDailyDiff",
                              "AvgWeeklyDiff","Friday","A11","A23","A0","A24","A7","A27"],
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["DailyDifference"],
     
 )

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [16]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneDayForecast_DailyLessFeats.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-01-28 17:35:53,267] A new study created in memory with name: no-name-2f1dafe1-e1e4-494c-837d-fa9d304294e9
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 17:42:45,442] Trial 0 finished with value: 150.02618408203125 and parameters: {'gradient_clip_val': 0.36388238140682944, 'hidden_size': 23, 'dropout': 0.1887847475029824, 'hidden_continuous_size': 16, 'attention_head_size': 4, 'learning_rate': 0.015897710202297525}. Best is trial 0 with value: 150.02618408203125.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 17:49:28,457] Trial 1 finished with value: 97.0550308227539 and parameters: {'gradient_clip_val': 0.05259889328847289, 'hidden_size': 32, 'dropout': 0.2723678050718562, 'hidden_continuous_size': 13, 'attention_head_size': 2, 'learning_rate': 0.07501094502713608}. Best is trial 1 with value: 97.0550308227539.
GPU a

[I 2021-01-28 18:53:49,940] Trial 78 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 18:54:32,424] Trial 79 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 18:54:48,951] Trial 80 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 18:56:49,113] Trial 81 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 18:57:30,607] Trial 82 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 19:04:14,071] Trial 83 finished with value: 8

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 19:51:47,511] Trial 115 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 19:52:02,922] Trial 116 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 19:52:19,701] Trial 117 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 19:54:39,492] Trial 118 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 19:54:55,762] Trial 119 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-2

TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 21:38:11,689] Trial 192 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 21:40:38,521] Trial 193 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 21:40:54,346] Trial 194 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 21:41:13,375] Trial 195 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 21:41:31,360] Trial 196 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 

{'gradient_clip_val': 0.792780041654202, 'hidden_size': 116, 'dropout': 0.28962023420866967, 'hidden_continuous_size': 109, 'attention_head_size': 2, 'learning_rate': 0.09802329455644555}


### Week Difference

In [17]:
dataframe = pd.read_csv('Data/RestaurantDataVets_All_2to5_Differenced.csv')
data = dataframe.drop(columns=['DMY','MissingPrevDays','DailyDifference','DiffDifference','2to5'])

remove=14
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
df["DailyBusyness"] = df.DailyBusyness.astype(float)
df["WeeklyBusyness"] = df.WeeklyBusyness.astype(float)

df["AvgDailyDiff"] = df.AvgDailyDiff.astype(float)
df["AvgWeeklyDiff"] = df.AvgWeeklyDiff.astype(float)
df["MaxDailyDiff"] = df.MaxDailyDiff.astype(float)
df["MinDailyDiff"] = df.MinDailyDiff.astype(float)
#df["DailyDifference"] = df.DailyDifference.astype(float)
df["WeeklyDifference"] = df.WeeklyDifference.astype(float)

hotdata = onehotholidaydiff(df,'WeeklyDifference')

numcols = len(hotdata.columns)
dataset = hotdata
dataset

Unnamed: 0,Index,Group,Year,Day,January,February,March,April,May,June,July,August,September,October,November,December,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Carnival,LentFasting,Ramadan,ChristmasSeason,DailyAvg,WeeklyAvg,MinSales,MaxSales,DailyBusyness,WeeklyBusyness,AvgDailyDiff,AvgWeeklyDiff,MinDailyDiff,MaxDailyDiff,A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,WeeklyDifference
0,15,0,2016,25,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,559.68143,804.69500,320.54,942.94,0.38422,0.77788,12.13538,276.490000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-58.44
1,16,0,2016,26,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,581.33600,451.52000,320.54,942.94,0.41902,0.21044,15.57500,-47.940000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-30.06
2,17,0,2016,27,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,569.84563,349.45000,320.54,942.94,0.40056,0.04645,-17.93067,57.820000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121.21
3,18,0,2016,28,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,565.71176,393.29500,320.54,942.94,0.39391,0.11689,-10.43000,42.620000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.58
4,19,0,2016,29,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,558.37000,518.07500,320.54,942.94,0.38212,0.31738,-13.69941,46.170000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-198.79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092,1107,0,2019,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1028.13505,1445.32063,225.30,2889.23,0.30137,0.45798,1.03780,5.415190,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.64
1093,1108,0,2019,2,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1028.61433,809.81044,225.30,2889.23,0.30155,0.21942,0.80673,-4.407278,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-222.95
1094,1109,0,2019,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,1028.27840,769.94929,225.30,2889.23,0.30143,0.20445,-0.00907,12.210903,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-237.58
1095,1110,0,2019,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1028.14058,839.78481,225.30,2889.23,0.30137,0.23067,0.18861,79.801529,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-321.44


In [21]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-80:]
train_data=train_data[:-80]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index

# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-80],
     time_idx="Index",
     target="WeeklyDifference",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=1,
     max_prediction_length=1,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["Index","Year", "Day","January","February","March","April","May","June","July","August",
                               "September","October","November","December", "Sunday","Monday","Tuesday","Wednesday",
                               "Thursday","Friday","Saturday","Carnival","LentFasting","Ramadan",
                               "ChristmasSeason","DailyAvg","WeeklyAvg","MinSales",
                               "MaxSales","DailyBusyness","WeeklyBusyness",
                               "AvgDailyDiff","AvgWeeklyDiff","MaxDailyDiff","MinDailyDiff",
                               "A0","A1","A2", "A3","A4","A5","A6","A7","A8","A9",
                               "A10","A11","A12","A13","A14","A15","A16","A17",
                               "A18","A19","A20","A21","A22","A23","A24","A25","A26","A27"],
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["WeeklyDifference"],
     
 )

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [22]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneDayForecast_Weekly.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-01-28 21:51:11,123] A new study created in memory with name: no-name-d143641d-85f6-4e87-9443-530975d512bb
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 22:13:55,030] Trial 0 finished with value: 90.2642822265625 and parameters: {'gradient_clip_val': 0.02033196551934625, 'hidden_size': 22, 'dropout': 0.15730337668981154, 'hidden_continuous_size': 14, 'attention_head_size': 4, 'learning_rate': 0.06640304627449335}. Best is trial 0 with value: 90.2642822265625.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-28 22:31:15,742] Trial 1 finished with value: 158.26332092285156 and parameters: {'gradient_clip_val': 0.03436487205478171, 'hidden_size': 8, 'dropout': 0.18478345204940577, 'hidden_continuous_size': 8, 'attention_head_size': 1, 'learning_rate': 0.0016918742043625743}. Best is trial 0 with value: 90.2642822265625.
GPU av

TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 03:10:34,538] Trial 77 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 03:11:31,947] Trial 78 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 03:13:53,756] Trial 79 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 03:14:52,603] Trial 80 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 03:22:50,804] Trial 81 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUD

{'gradient_clip_val': 0.02033196551934625, 'hidden_size': 22, 'dropout': 0.15730337668981154, 'hidden_continuous_size': 14, 'attention_head_size': 4, 'learning_rate': 0.06640304627449335}


### Week Difference Less Features

In [23]:
dataframe = pd.read_csv('Data/RestaurantDataVets_All_2to5_Differenced.csv')
data = dataframe.drop(columns=['DMY','MissingPrevDays','DailyDifference','DiffDifference','2to5'])

remove=14
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
df["DailyBusyness"] = df.DailyBusyness.astype(float)
df["WeeklyBusyness"] = df.WeeklyBusyness.astype(float)

df["AvgDailyDiff"] = df.AvgDailyDiff.astype(float)
df["AvgWeeklyDiff"] = df.AvgWeeklyDiff.astype(float)
df["MaxDailyDiff"] = df.MaxDailyDiff.astype(float)
df["MinDailyDiff"] = df.MinDailyDiff.astype(float)
#df["DailyDifference"] = df.DailyDifference.astype(float)
df["WeeklyDifference"] = df.WeeklyDifference.astype(float)

hotdata = onehotholidaydiff(df,'WeeklyDifference')

numcols = len(hotdata.columns)
dataset = hotdata
dataset

numcols = len(hotdata.columns)
X=hotdata.iloc[:, 0:numcols-1]
y=hotdata.iloc[:, numcols-1]

X_new = SelectKBest(f_regression, k=55).fit(X,y)

dfscores = pd.DataFrame(X_new.scores_)
dfcolumns = pd.DataFrame(X.columns)

#concat two dataframes for better visualization 
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(60,'Score'))  #print 10 best features

  corr /= X_norms


              Specs      Score
48              A11  57.296458
63              A26  26.953598
34    AvgWeeklyDiff  23.967553
60              A23  16.344035
43               A6  14.945257
55              A18  10.932097
52              A15   6.864160
64              A27   5.434958
37               A0   5.434757
54              A17   3.212682
45               A8   2.669660
62              A25   2.482104
26  ChristmasSeason   2.027191
3               Day   1.761964
41               A4   1.751050
6             March   1.617099
50              A13   1.200954
44               A7   0.942852
39               A2   0.787587
51              A14   0.602171
24      LentFasting   0.513156
53              A16   0.490658
42               A5   0.423050
11           August   0.405338
5          February   0.384573
25          Ramadan   0.356426
28        WeeklyAvg   0.355412
35     MinDailyDiff   0.335999
27         DailyAvg   0.251915
10             July   0.211090
14         November   0.199972
32   Wee

In [24]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-80:]
train_data=train_data[:-80]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index

# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-80],
     time_idx="Index",
     target="WeeklyDifference",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=1,
     max_prediction_length=1,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["A11","A26","AvgWeeklyDiff","A23","A6","A18","A15","A27","A0"],
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["WeeklyDifference"],
     
 )

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [25]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneDayForecast_WeeklyLessFeats.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-01-29 05:52:54,887] A new study created in memory with name: no-name-ad5daaea-fd68-4f83-99d7-5257ef844abb
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 05:58:40,103] Trial 0 finished with value: 119.43523406982422 and parameters: {'gradient_clip_val': 0.34540120380652134, 'hidden_size': 22, 'dropout': 0.23706547486450372, 'hidden_continuous_size': 16, 'attention_head_size': 4, 'learning_rate': 0.004735036198404529}. Best is trial 0 with value: 119.43523406982422.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 06:04:23,353] Trial 1 finished with value: 107.23677825927734 and parameters: {'gradient_clip_val': 0.05056008256328774, 'hidden_size': 106, 'dropout': 0.2328952695616629, 'hidden_continuous_size': 8, 'attention_head_size': 3, 'learning_rate': 0.0028005678169296857}. Best is trial 1 with value: 107.23677825927734

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 06:34:23,500] Trial 36 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 06:34:37,319] Trial 37 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 06:35:08,777] Trial 38 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 06:35:22,890] Trial 39 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 06:35:36,828] Trial 40 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 06:3

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 07:21:15,000] Trial 73 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 07:21:28,738] Trial 74 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 07:23:22,676] Trial 75 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 07:23:36,313] Trial 76 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 07:23:50,139] Trial 77 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 07:

TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 07:51:30,838] Trial 114 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 07:51:44,368] Trial 115 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 07:51:58,163] Trial 116 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 07:52:12,404] Trial 117 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 07:52:26,061] Trial 118 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 -

[I 2021-01-29 09:17:59,939] Trial 191 finished with value: 95.95420837402344 and parameters: {'gradient_clip_val': 0.09195339743351297, 'hidden_size': 125, 'dropout': 0.20675686925357095, 'hidden_continuous_size': 87, 'attention_head_size': 1, 'learning_rate': 0.0991737540084213}. Best is trial 98 with value: 93.22315979003906.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 09:19:54,834] Trial 192 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 09:20:08,841] Trial 193 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 09:20:43,461] Trial 194 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_D

{'gradient_clip_val': 0.07570769472208098, 'hidden_size': 121, 'dropout': 0.2152768224773502, 'hidden_continuous_size': 32, 'attention_head_size': 1, 'learning_rate': 0.07921950280200976}


## One Week Window

### Actual

In [26]:
data = pd.read_csv('Data/RestaurantDataVets_All_2to5.csv')
data = data.drop(columns=['DMY','MissingPrevDays','DailyAvg','DailyBusyness'])

remove=7
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
#df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
#df["DailyBusyness"] = df.MaxSales.astype(float)
df["WeeklyBusyness"] = df.MaxSales.astype(float)

hotdata = onehotholiday(df)

numcols = len(hotdata.columns)
dataset = hotdata
dataset

Unnamed: 0,Index,Group,Year,Day,January,February,March,April,May,June,July,August,September,October,November,December,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Carnival,LentFasting,Ramadan,ChristmasSeason,WeeklyAvg,MinSales,MaxSales,WeeklyBusyness,A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,2to5
0,8,0,2016,18,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,666.450000,320.54,862.04,862.04,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,942.94
1,9,0,2016,19,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,475.490000,320.54,942.94,942.94,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,427.55
2,10,0,2016,20,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,320.540000,320.54,942.94,942.94,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,378.36
3,11,0,2016,21,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,365.610000,320.54,942.94,942.94,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,420.98
4,12,0,2016,22,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,494.990000,320.54,942.94,942.94,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,541.16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,1107,0,2019,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1445.320629,225.30,2889.23,2889.23,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1558.69
1100,1108,0,2019,2,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,809.810440,225.30,2889.23,2889.23,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,656.41
1101,1109,0,2019,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,769.949295,225.30,2889.23,2889.23,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,875.43
1102,1110,0,2019,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,839.784810,225.30,2889.23,2889.23,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1079.06


In [27]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-84:]
train_data=train_data[:-84]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index

# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-84],
     time_idx="Index",
     target="2to5",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=7,
     max_prediction_length=7,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["Index","Year", "Day","January","February","March","April","May","June","July","August",
                               "September","October","November","December", "Sunday","Monday","Tuesday","Wednesday",
                               "Thursday","Friday","Saturday","Carnival","LentFasting","Ramadan",
                               "ChristmasSeason","WeeklyAvg","MinSales",
                               "MaxSales","WeeklyBusyness",
                               "A0","A1","A2", "A3","A4","A5","A6","A7","A8","A9",
                               "A10","A11","A12","A13","A14","A15","A16","A17",
                               "A18","A19","A20","A21","A22","A23","A24","A25","A26","A27"],
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["2to5"],
     
)

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [28]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneWeekForecast_Actual.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-01-29 13:11:48,456] A new study created in memory with name: no-name-0dae88d2-d3c2-4530-a9fe-0d5e2a6089c9
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 13:33:13,538] Trial 0 finished with value: 366.42474365234375 and parameters: {'gradient_clip_val': 0.010185624688382201, 'hidden_size': 78, 'dropout': 0.26868864852657626, 'hidden_continuous_size': 55, 'attention_head_size': 2, 'learning_rate': 0.004214001377915376}. Best is trial 0 with value: 366.42474365234375.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 13:54:49,602] Trial 1 finished with value: 130.38534545898438 and parameters: {'gradient_clip_val': 0.122387745528728, 'hidden_size': 51, 'dropout': 0.18425718933252255, 'hidden_continuous_size': 13, 'attention_head_size': 3, 'learning_rate': 0.011058261862912838}. Best is trial 1 with value: 130.38534545898438.

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 15:33:04,914] Trial 36 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 15:35:16,955] Trial 37 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 15:36:10,056] Trial 38 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 15:37:03,271] Trial 39 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 15:37:56,970] Trial 40 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 15:3

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 17:34:30,984] Trial 77 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 17:35:24,962] Trial 78 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 17:36:19,474] Trial 79 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 17:37:14,854] Trial 80 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 17:44:48,765] Trial 81 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 18:

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 20:03:58,363] Trial 118 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 20:04:53,072] Trial 119 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 20:05:48,044] Trial 120 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 20:13:26,381] Trial 121 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-29 20:35:22,800] Trial 122 finished with value: 86.22872924804688 and parameters: {'gradient_clip_val': 0.7317959763314575, 'hidden_size': 122, 'dropout': 0.2631693076883295, 'hidden_continu

{'gradient_clip_val': 0.6515398899400968, 'hidden_size': 119, 'dropout': 0.1518543467219442, 'hidden_continuous_size': 9, 'attention_head_size': 2, 'learning_rate': 0.098833244460919}


### Actual - Less Features

In [4]:
data = pd.read_csv('Data/RestaurantDataVets_All_2to5.csv')
data = data.drop(columns=['DMY','MissingPrevDays','DailyAvg','DailyBusyness'])

remove=7
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
#df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
#df["DailyBusyness"] = df.DailyBusyness.astype(float)
df["WeeklyBusyness"] = df.WeeklyBusyness.astype(float)

hotdata = onehotholiday(df)

numcols = len(hotdata.columns)
dataset = hotdata
dataset

numcols = len(hotdata.columns)
X=hotdata.iloc[:, 0:numcols-1]
y=hotdata.iloc[:, numcols-1]

X_new = SelectKBest(f_regression, k=55).fit(X,y)

dfscores = pd.DataFrame(X_new.scores_)
dfcolumns = pd.DataFrame(X.columns)

#concat two dataframes for better visualization 
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(60,'Score'))  #print 10 best features


  corr /= X_norms


              Specs        Score
27        WeeklyAvg  1500.731120
29         MaxSales   311.795792
2              Year   285.344359
30   WeeklyBusyness   277.771456
28         MinSales   258.164774
0             Index   257.043505
22         Saturday   207.091595
16           Sunday   169.579736
18          Tuesday    56.879305
17           Monday    41.929382
20         Thursday    34.821244
19        Wednesday    29.642404
54              A23    19.493078
42              A11    14.542047
57              A26    14.542047
35               A4     9.753883
37               A6     9.643405
32               A1     9.643405
13          October     7.010144
31               A0     4.990336
47              A16     4.542011
58              A27     4.101715
45              A14     3.448309
6             March     3.387271
55              A24     3.296638
41              A10     3.199677
14         November     3.158984
39               A8     2.123052
46              A15     2.051886
49        

In [5]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-84:]
train_data=train_data[:-84]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index

# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-84],
     time_idx="Index",
     target="2to5",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=7,
     max_prediction_length=7,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["Index","WeeklyAvg","WeeklyBusyness","MaxSales",
                              "Year","MinSales","Index", "Monday", "Tuesday",
                              "Wednesday","Thursday","Saturday","Sunday","A4","A1","A6","A23","A11","A26"],
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["2to5"],
     
)

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [6]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneWeekForecast_ActualLessFeats.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-02-01 00:05:03,607] A new study created in memory with name: no-name-ee2ace06-38dd-4a5e-b68b-643c37ee88e1
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 00:14:33,163] Trial 0 finished with value: 645.80908203125 and parameters: {'gradient_clip_val': 0.04963255617462838, 'hidden_size': 42, 'dropout': 0.16707943858905358, 'hidden_continuous_size': 27, 'attention_head_size': 4, 'learning_rate': 0.0011173505233274621}. Best is trial 0 with value: 645.80908203125.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 00:23:32,187] Trial 1 finished with value: 626.2996215820312 and parameters: {'gradient_clip_val': 0.019663162941684246, 'hidden_size': 26, 'dropout': 0.17696242391609637, 'hidden_continuous_size': 24, 'attention_head_size': 3, 'learning_rate': 0.002331074842497151}. Best is trial 1 with value: 626.2996215820312.
GPU 

[I 2021-02-01 00:56:39,606] Trial 37 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 00:56:59,895] Trial 38 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 00:57:20,437] Trial 39 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 00:58:10,061] Trial 40 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 01:06:23,494] Trial 41 finished with value: 87.94921875 and parameters: {'gradient_clip_val': 0.9792671438046848, 'hidden_size': 84, 'dropout': 0.1460516450753059, 'hidden_continuous_size': 75, 'attention_head_size': 2, 'learning_rate

TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 01:57:45,147] Trial 77 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 01:58:05,236] Trial 78 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 01:58:26,322] Trial 79 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 01:58:46,943] Trial 80 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 01:59:29,077] Trial 81 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 03:03:44,324] Trial 115 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 03:04:04,762] Trial 116 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 03:04:24,939] Trial 117 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 03:04:45,913] Trial 118 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 03:05:02,455] Trial 119 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-0

[I 2021-02-01 04:43:12,116] Trial 147 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 04:46:06,817] Trial 148 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 04:54:19,106] Trial 149 finished with value: 75.08891296386719 and parameters: {'gradient_clip_val': 0.8010351001130019, 'hidden_size': 112, 'dropout': 0.13402732809695758, 'hidden_continuous_size': 97, 'attention_head_size': 2, 'learning_rate': 0.08964207338087368}. Best is trial 143 with value: 67.90242004394531.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 04:55:01,296] Trial 150 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_

[I 2021-02-01 06:21:03,277] Trial 182 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 06:21:23,911] Trial 183 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 06:22:16,226] Trial 184 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 06:23:07,775] Trial 185 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 06:31:20,907] Trial 186 finished with value: 72.96429443359375 and parameters: {'gradient_clip_val': 0.06579549932913595, 'hidden_size': 113, 'dropout': 0.24498501567377526, 'hidden_continuous_size': 100, 'attention_head_size': 2,

{'gradient_clip_val': 0.911641958284224, 'hidden_size': 122, 'dropout': 0.2485422037534699, 'hidden_continuous_size': 121, 'attention_head_size': 2, 'learning_rate': 0.09841355116570298}


### Day Difference

In [29]:
dataframe = pd.read_csv('Data/RestaurantDataVets_All_2to5_Differenced.csv')
data = dataframe.drop(columns=['DMY','MissingPrevDays','WeeklyDifference','DiffDifference','2to5'
                                ,'DailyBusyness','DailyAvg','AvgDailyDiff'])

remove=14
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
#df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
#df["DailyBusyness"] = df.DailyBusyness.astype(float)
df["WeeklyBusyness"] = df.WeeklyBusyness.astype(float)

#df["AvgDailyDiff"] = df.AvgDailyDiff.astype(float)
df["AvgWeeklyDiff"] = df.AvgWeeklyDiff.astype(float)
df["MaxDailyDiff"] = df.MaxDailyDiff.astype(float)
df["MinDailyDiff"] = df.MinDailyDiff.astype(float)
df["DailyDifference"] = df.DailyDifference.astype(float)
#df["WeeklyDifference"] = df.WeeklyDifference.astype(float)

hotdata = onehotholidaydiff(df,'DailyDifference')

numcols = len(hotdata.columns)
dataset = hotdata
dataset

Unnamed: 0,Index,Group,Year,Day,January,February,March,April,May,June,July,August,September,October,November,December,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Carnival,LentFasting,Ramadan,ChristmasSeason,WeeklyAvg,MinSales,MaxSales,WeeklyBusyness,AvgWeeklyDiff,MinDailyDiff,MaxDailyDiff,A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,DailyDifference
0,15,0,2016,25,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,804.69500,320.54,942.94,0.77788,276.490000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.29
1,16,0,2016,26,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,451.52000,320.54,942.94,0.21044,-47.940000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-487.01
2,17,0,2016,27,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,349.45000,320.54,942.94,0.04645,57.820000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,102.08
3,18,0,2016,28,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,393.29500,320.54,942.94,0.11689,42.620000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-66.01
4,19,0,2016,29,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,518.07500,320.54,942.94,0.31738,46.170000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-91.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092,1107,0,2019,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1445.32063,225.30,2889.23,0.45798,5.415190,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-254.53
1093,1108,0,2019,2,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,809.81044,225.30,2889.23,0.21942,-4.407278,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-902.28
1094,1109,0,2019,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,769.94929,225.30,2889.23,0.20445,12.210903,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,219.02
1095,1110,0,2019,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,839.78481,225.30,2889.23,0.23067,79.801529,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,203.63


In [31]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-80:]
train_data=train_data[:-80]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index


# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-80],
     time_idx="Index",
     target="DailyDifference",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=7,
     max_prediction_length=7,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["Index","Year", "Day","January","February","March","April","May","June","July","August",
                               "September","October","November","December", "Sunday","Monday","Tuesday","Wednesday",
                               "Thursday","Friday","Saturday","Carnival","LentFasting","Ramadan",
                               "ChristmasSeason","WeeklyAvg","MinSales","MaxDailyDiff","MinDailyDiff",
                               "MaxSales","WeeklyBusyness", "AvgWeeklyDiff",
                               "A0","A1","A2", "A3","A4","A5","A6","A7","A8","A9",
                               "A10","A11","A12","A13","A14","A15","A16","A17",
                               "A18","A19","A20","A21","A22","A23","A24","A25","A26","A27"],
    
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["DailyDifference"],
     
 )

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [32]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneWeekForecast_Daily.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-01-29 23:50:06,372] A new study created in memory with name: no-name-3ba3446a-1cc8-4aba-a370-7b106b8f5426
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 00:14:57,097] Trial 0 finished with value: 131.1192169189453 and parameters: {'gradient_clip_val': 0.03479893586020266, 'hidden_size': 25, 'dropout': 0.1796805388803084, 'hidden_continuous_size': 14, 'attention_head_size': 1, 'learning_rate': 0.04639563894631775}. Best is trial 0 with value: 131.1192169189453.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 00:39:41,751] Trial 1 finished with value: 163.50119018554688 and parameters: {'gradient_clip_val': 0.6104157942532328, 'hidden_size': 60, 'dropout': 0.2613750357622869, 'hidden_continuous_size': 42, 'attention_head_size': 1, 'learning_rate': 0.021269883786505597}. Best is trial 0 with value: 131.1192169189453.
GPU a

[I 2021-01-30 02:10:35,258] Trial 37 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 02:11:31,078] Trial 38 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 02:12:13,464] Trial 39 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 02:13:08,849] Trial 40 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 02:21:14,870] Trial 41 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 02:23:39,351] Trial 42 pruned. Trial was prun

[I 2021-01-30 06:36:14,861] Trial 118 finished with value: 100.9364242553711 and parameters: {'gradient_clip_val': 0.060498028833929095, 'hidden_size': 128, 'dropout': 0.270337693847159, 'hidden_continuous_size': 53, 'attention_head_size': 3, 'learning_rate': 0.09965066342480189}. Best is trial 118 with value: 100.9364242553711.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 06:37:09,381] Trial 119 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 06:39:28,635] Trial 120 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 06:47:08,966] Trial 121 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_

{'gradient_clip_val': 0.060498028833929095, 'hidden_size': 128, 'dropout': 0.270337693847159, 'hidden_continuous_size': 53, 'attention_head_size': 3, 'learning_rate': 0.09965066342480189}


### Daily Difference - Less Features

In [7]:
data = pd.read_csv('Data/RestaurantDataVets_All_2to5_Differenced.csv')
data = data.drop(columns=['DMY','MissingPrevDays','2to5','WeeklyDifference','DiffDifference'])

remove=14
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
#df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
#df["DailyBusyness"] = df.DailyBusyness.astype(float)
df["WeeklyBusyness"] = df.WeeklyBusyness.astype(float)


#df["AvgDailyDiff"] = df.AvgDailyDiff.astype(float)
df["AvgWeeklyDiff"] = df.AvgWeeklyDiff.astype(float)
df["MaxDailyDiff"] = df.MaxDailyDiff.astype(float)
df["MinDailyDiff"] = df.MinDailyDiff.astype(float)
df["DailyDifference"] = df.DailyDifference.astype(float)
hotdata = onehotholidaydiff(df,'DailyDifference')

numcols = len(hotdata.columns)
dataset = hotdata
dataset

numcols = len(hotdata.columns)
X=hotdata.iloc[:, 0:numcols-1]
y=hotdata.iloc[:, numcols-1]

X_new = SelectKBest(f_regression, k=55).fit(X,y)

dfscores = pd.DataFrame(X_new.scores_)
dfcolumns = pd.DataFrame(X.columns)

#concat two dataframes for better visualization 
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(60,'Score'))  #print 10 best features

  corr /= X_norms


              Specs       Score
17           Monday  432.655965
22         Saturday  198.666326
32   WeeklyBusyness  104.515124
28        WeeklyAvg  102.531345
33     AvgDailyDiff   89.744144
21           Friday   26.227024
48              A11   22.434007
60              A23   13.126514
37               A0   12.797335
34    AvgWeeklyDiff    9.755608
43               A6    8.050688
44               A7    6.731783
61              A24    6.572209
64              A27    6.399512
19        Wednesday    4.874038
56              A19    4.384009
38               A1    3.688036
40               A3    3.527297
63              A26    2.820926
46               A9    2.335293
45               A8    1.684062
41               A4    1.645293
18          Tuesday    1.409225
54              A17    1.271445
47              A10    0.855853
62              A25    0.831714
16           Sunday    0.827064
26  ChristmasSeason    0.791582
24      LentFasting    0.513328
53              A16    0.379622
6       

In [8]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-80:]
train_data=train_data[:-80]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index


# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-80],
     time_idx="Index",
     target="DailyDifference",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=7,
     max_prediction_length=7,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["Monday","Saturday","WeeklyBusyness","WeeklyAvg",
                              "AvgWeeklyDiff","Friday","A11","A23","A0","A6","A24","A7","A27"],
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["DailyDifference"],
     
 )

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [9]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneWeekForecast_DailyLessFeats.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-02-01 07:07:57,958] A new study created in memory with name: no-name-edf67b30-46f3-4015-8955-fc9a3caf1c42
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 07:14:36,586] Trial 0 finished with value: 123.77666473388672 and parameters: {'gradient_clip_val': 0.029617487147789768, 'hidden_size': 33, 'dropout': 0.15254180956912056, 'hidden_continuous_size': 10, 'attention_head_size': 1, 'learning_rate': 0.01895070721416732}. Best is trial 0 with value: 123.77666473388672.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 07:21:25,302] Trial 1 finished with value: 170.14964294433594 and parameters: {'gradient_clip_val': 0.2864058137566044, 'hidden_size': 59, 'dropout': 0.25946291557883006, 'hidden_continuous_size': 12, 'attention_head_size': 3, 'learning_rate': 0.0039048318489478917}. Best is trial 0 with value: 123.77666473388672

[I 2021-02-01 08:12:20,537] Trial 30 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 08:19:02,756] Trial 31 finished with value: 104.9613037109375 and parameters: {'gradient_clip_val': 0.3645025558110532, 'hidden_size': 57, 'dropout': 0.1704368912095091, 'hidden_continuous_size': 32, 'attention_head_size': 3, 'learning_rate': 0.0822139642137037}. Best is trial 12 with value: 93.91452026367188.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 08:19:44,018] Trial 32 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 08:20:00,575] Trial 33 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: 

[I 2021-02-01 09:24:48,812] Trial 63 finished with value: 102.50044250488281 and parameters: {'gradient_clip_val': 0.464487708770514, 'hidden_size': 90, 'dropout': 0.2839239880104994, 'hidden_continuous_size': 73, 'attention_head_size': 3, 'learning_rate': 0.07115130958977527}. Best is trial 50 with value: 93.78617858886719.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 09:27:26,800] Trial 64 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 09:27:45,791] Trial 65 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 09:28:03,108] Trial 66 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES

[I 2021-02-01 10:15:45,499] Trial 101 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 10:16:03,298] Trial 102 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 10:16:47,049] Trial 103 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 10:23:41,793] Trial 104 finished with value: 110.01700592041016 and parameters: {'gradient_clip_val': 0.4507722932448734, 'hidden_size': 97, 'dropout': 0.21798281831729527, 'hidden_continuous_size': 73, 'attention_head_size': 3, 'learning_rate': 0.08286250571190318}. Best is trial 67 with value: 92.6802749633789.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DE

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 11:40:38,248] Trial 134 finished with value: 99.1221694946289 and parameters: {'gradient_clip_val': 0.08439032076967774, 'hidden_size': 92, 'dropout': 0.21303753774671, 'hidden_continuous_size': 49, 'attention_head_size': 3, 'learning_rate': 0.099607766511045}. Best is trial 67 with value: 92.6802749633789.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 11:48:12,405] Trial 135 finished with value: 114.30701446533203 and parameters: {'gradient_clip_val': 0.013632377036496933, 'hidden_size': 79, 'dropout': 0.2172723350850857, 'hidden_continuous_size': 47, 'attention_head_size': 3, 'learning_rate': 0.09990816797357437}. Best is trial 67 with value: 92.6802749633789.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 11:50:33,170] Trial 136 pruned. Trial was pruned at epoch 16.
G

[I 2021-02-01 13:04:57,012] Trial 166 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 13:07:25,980] Trial 167 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 13:07:43,706] Trial 168 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 13:08:01,823] Trial 169 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 13:10:37,759] Trial 170 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-02-01 13:11:21,390] Trial 171 pruned. Trial w

{'gradient_clip_val': 0.5745488117660288, 'hidden_size': 123, 'dropout': 0.261710570138562, 'hidden_continuous_size': 68, 'attention_head_size': 4, 'learning_rate': 0.09994016410626705}


### Week Difference

In [34]:
dataframe = pd.read_csv('Data/RestaurantDataVets_All_2to5_Differenced.csv')
data = dataframe.drop(columns=['DMY','MissingPrevDays','DailyDifference','DiffDifference','2to5',
                              'DailyAvg','DailyBusyness','AvgDailyDiff'])

remove=14
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
#df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
#df["DailyBusyness"] = df.DailyBusyness.astype(float)
df["WeeklyBusyness"] = df.WeeklyBusyness.astype(float)

#df["AvgDailyDiff"] = df.AvgDailyDiff.astype(float)
df["AvgWeeklyDiff"] = df.AvgWeeklyDiff.astype(float)
df["MaxDailyDiff"] = df.MaxDailyDiff.astype(float)
df["MinDailyDiff"] = df.MinDailyDiff.astype(float)
#df["DailyDifference"] = df.DailyDifference.astype(float)
df["WeeklyDifference"] = df.WeeklyDifference.astype(float)

hotdata = onehotholidaydiff(df,'WeeklyDifference')

numcols = len(hotdata.columns)
dataset = hotdata
dataset

Unnamed: 0,Index,Group,Year,Day,January,February,March,April,May,June,July,August,September,October,November,December,Sunday,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Carnival,LentFasting,Ramadan,ChristmasSeason,WeeklyAvg,MinSales,MaxSales,WeeklyBusyness,AvgWeeklyDiff,MinDailyDiff,MaxDailyDiff,A0,A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,A16,A17,A18,A19,A20,A21,A22,A23,A24,A25,A26,A27,WeeklyDifference
0,15,0,2016,25,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,804.69500,320.54,942.94,0.77788,276.490000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-58.44
1,16,0,2016,26,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,451.52000,320.54,942.94,0.21044,-47.940000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-30.06
2,17,0,2016,27,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,349.45000,320.54,942.94,0.04645,57.820000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121.21
3,18,0,2016,28,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,393.29500,320.54,942.94,0.11689,42.620000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.58
4,19,0,2016,29,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,518.07500,320.54,942.94,0.31738,46.170000,-515.39,321.11,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-198.79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092,1107,0,2019,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1445.32063,225.30,2889.23,0.45798,5.415190,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.64
1093,1108,0,2019,2,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,809.81044,225.30,2889.23,0.21942,-4.407278,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-222.95
1094,1109,0,2019,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,769.94929,225.30,2889.23,0.20445,12.210903,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-237.58
1095,1110,0,2019,4,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,839.78481,225.30,2889.23,0.23067,79.801529,-1827.44,1929.78,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-321.44


In [35]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-80:]
train_data=train_data[:-80]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index

# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-80],
     time_idx="Index",
     target="WeeklyDifference",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=7,
     max_prediction_length=7,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["Index","Year", "Day","January","February","March","April","May","June","July","August",
                               "September","October","November","December", "Sunday","Monday","Tuesday","Wednesday",
                               "Thursday","Friday","Saturday","Carnival","LentFasting","Ramadan",
                               "ChristmasSeason","WeeklyAvg","MinSales","MaxDailyDiff","MinDailyDiff",
                               "MaxSales","WeeklyBusyness", "AvgWeeklyDiff",
                               "A0","A1","A2", "A3","A4","A5","A6","A7","A8","A9",
                               "A10","A11","A12","A13","A14","A15","A16","A17",
                               "A18","A19","A20","A21","A22","A23","A24","A25","A26","A27"],
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["WeeklyDifference"],
     
 )

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [36]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneWeekForecast_Weekly.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-01-30 09:18:54,215] A new study created in memory with name: no-name-1c319d54-a1d8-4a52-845c-6faaaf64fe74
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 09:40:52,259] Trial 0 finished with value: 108.1421890258789 and parameters: {'gradient_clip_val': 0.025921756461341077, 'hidden_size': 20, 'dropout': 0.2369439911142748, 'hidden_continuous_size': 16, 'attention_head_size': 4, 'learning_rate': 0.022935568686758077}. Best is trial 0 with value: 108.1421890258789.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 10:02:45,731] Trial 1 finished with value: 100.25920867919922 and parameters: {'gradient_clip_val': 0.1650042508553855, 'hidden_size': 14, 'dropout': 0.2681790905028924, 'hidden_continuous_size': 10, 'attention_head_size': 2, 'learning_rate': 0.03913876972813039}. Best is trial 1 with value: 100.25920867919922.
GPU

[I 2021-01-30 13:23:39,523] Trial 79 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 13:24:34,015] Trial 80 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 13:41:31,026] Trial 81 finished with value: 97.63304138183594 and parameters: {'gradient_clip_val': 0.5963272425684054, 'hidden_size': 96, 'dropout': 0.2110631477445206, 'hidden_continuous_size': 96, 'attention_head_size': 3, 'learning_rate': 0.09968798250148984}. Best is trial 81 with value: 97.63304138183594.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 13:43:51,897] Trial 82 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES:

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 15:27:02,745] Trial 120 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 15:29:19,872] Trial 121 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 15:31:36,048] Trial 122 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 15:32:31,673] Trial 123 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30 15:34:47,159] Trial 124 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-30

{'gradient_clip_val': 0.03542660204271917, 'hidden_size': 127, 'dropout': 0.23610003487870085, 'hidden_continuous_size': 106, 'attention_head_size': 1, 'learning_rate': 0.08725581446395367}


### Week Difference - Less Features

In [44]:
dataframe = pd.read_csv('Data/RestaurantDataVets_All_2to5_Differenced.csv')
data = dataframe.drop(columns=['DMY','MissingPrevDays','DailyDifference','DiffDifference','2to5'])

remove=14
df = data.drop([x for x in range(remove)])
df = df.reset_index(drop=True)

#Objects need to be converted to float due to missing values at load time.
df["DailyAvg"] = df.DailyAvg.astype(float)
df["WeeklyAvg"] = df.WeeklyAvg.astype(float)
df["MinSales"] = df.MinSales.astype(float)
df["MaxSales"] = df.MaxSales.astype(float)
df["DailyBusyness"] = df.DailyBusyness.astype(float)
df["WeeklyBusyness"] = df.WeeklyBusyness.astype(float)

df["AvgDailyDiff"] = df.AvgDailyDiff.astype(float)
df["AvgWeeklyDiff"] = df.AvgWeeklyDiff.astype(float)
df["MaxDailyDiff"] = df.MaxDailyDiff.astype(float)
df["MinDailyDiff"] = df.MinDailyDiff.astype(float)
#df["DailyDifference"] = df.DailyDifference.astype(float)
df["WeeklyDifference"] = df.WeeklyDifference.astype(float)

hotdata = onehotholidaydiff(df,'WeeklyDifference')

numcols = len(hotdata.columns)
dataset = hotdata
dataset

numcols = len(hotdata.columns)
X=hotdata.iloc[:, 0:numcols-1]
y=hotdata.iloc[:, numcols-1]

X_new = SelectKBest(f_regression, k=55).fit(X,y)

dfscores = pd.DataFrame(X_new.scores_)
dfcolumns = pd.DataFrame(X.columns)

#concat two dataframes for better visualization 
featureScores = pd.concat([dfcolumns,dfscores],axis=1)
featureScores.columns = ['Specs','Score']  #naming the dataframe columns
print(featureScores.nlargest(60,'Score'))  #print 10 best features

  corr /= X_norms


              Specs      Score
48              A11  57.296458
63              A26  26.953598
34    AvgWeeklyDiff  23.967553
60              A23  16.344035
43               A6  14.945257
55              A18  10.932097
52              A15   6.864160
64              A27   5.434958
37               A0   5.434757
54              A17   3.212682
45               A8   2.669660
62              A25   2.482104
26  ChristmasSeason   2.027191
3               Day   1.761964
41               A4   1.751050
6             March   1.617099
50              A13   1.200954
44               A7   0.942852
39               A2   0.787587
51              A14   0.602171
24      LentFasting   0.513156
53              A16   0.490658
42               A5   0.423050
11           August   0.405338
5          February   0.384573
25          Ramadan   0.356426
28        WeeklyAvg   0.355412
35     MinDailyDiff   0.335999
27         DailyAvg   0.251915
10             July   0.211090
14         November   0.199972
32   Wee

In [45]:
train_data = dataset[lambda x: x.Index < train_time]

val_data = train_data[-80:]
train_data=train_data[:-80]

test_data = dataset[lambda x: x.Index >= train_time]


train_index = np.arange(1,len(train_data)+1, dtype="int")
val_index = np.arange(1,len(val_data)+1, dtype="int")
test_index = np.arange(1,len(test_data)+1, dtype="int")

train_data.Index = train_index
test_data.Index = test_index
val_data.Index = val_index

# create the dataset from the pandas dataframe
training = TimeSeriesDataSet(
     dataset[lambda x: x.Index < train_time-80],
     time_idx="Index",
     target="WeeklyDifference",
     group_ids=["Group"],
     min_encoder_length=14,  # allowing predictions without history
     max_encoder_length=14,
     min_prediction_length=7,
     max_prediction_length=7,
     static_categoricals=[],
     static_reals=[ ],
     time_varying_known_categoricals=[],
     
     time_varying_known_reals=["A11","A26","AvgWeeklyDiff","A23","A6","A18","A15","A27","A0"],
     time_varying_unknown_categoricals=[],
     time_varying_unknown_reals=["WeeklyDifference"],
     
 )

validating=TimeSeriesDataSet.from_dataset(training, val_data, stop_randomization=True)
testing=TimeSeriesDataSet.from_dataset(training, test_data, stop_randomization=True)

batch_size = 16

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validating.to_dataloader(train=False, batch_size=batch_size, num_workers=0)
test_dataloader = testing.to_dataloader(train=False, batch_size=batch_size, num_workers=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [46]:
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    val_dataloader,
    model_path="FinalStudy",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
)

# save study results - also we can resume tuning at a later point in time
with open("OneWeekForecast_WeeklyLessFeats.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)

[I 2021-01-31 09:54:38,067] A new study created in memory with name: no-name-8968d590-2178-49ce-b70e-68bae2dd2f42
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-31 10:01:03,115] Trial 0 finished with value: 108.90898132324219 and parameters: {'gradient_clip_val': 0.016620713579318637, 'hidden_size': 96, 'dropout': 0.14652585549865688, 'hidden_continuous_size': 10, 'attention_head_size': 1, 'learning_rate': 0.05393774012753767}. Best is trial 0 with value: 108.90898132324219.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-31 10:06:39,321] Trial 1 finished with value: 115.83901977539062 and parameters: {'gradient_clip_val': 0.06985931634698572, 'hidden_size': 8, 'dropout': 0.245970273601761, 'hidden_continuous_size': 8, 'attention_head_size': 2, 'learning_rate': 0.009849515523401183}. Best is trial 0 with value: 108.90898132324219.
GP

TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-31 11:46:31,799] Trial 70 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-31 11:53:01,828] Trial 71 finished with value: 105.25302124023438 and parameters: {'gradient_clip_val': 0.5809042893266501, 'hidden_size': 115, 'dropout': 0.16105884154769012, 'hidden_continuous_size': 24, 'attention_head_size': 4, 'learning_rate': 0.08903182715426637}. Best is trial 5 with value: 99.51667022705078.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-31 11:55:16,391] Trial 72 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-31 11:55:58,698] Trial 73 pruned. Trial was pruned at epoch 4.
GPU available: True, use

TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-31 14:09:15,319] Trial 139 pruned. Trial was pruned at epoch 1.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-31 14:11:42,795] Trial 140 pruned. Trial was pruned at epoch 16.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-31 14:12:27,882] Trial 141 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-31 14:13:09,701] Trial 142 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[I 2021-01-31 14:13:52,497] Trial 143 pruned. Trial was pruned at epoch 4.
GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 

{'gradient_clip_val': 0.17326072969911535, 'hidden_size': 123, 'dropout': 0.2179922212966772, 'hidden_continuous_size': 21, 'attention_head_size': 4, 'learning_rate': 0.09355049810260364}
