<a href="https://colab.research.google.com/github/Ben-Costa/Research_Fall_2022_AutoML/blob/main/Time_series_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install sktime

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
pip install autoPyTorch[forecasting]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting autoPyTorch[forecasting]
  Using cached autoPyTorch-0.2.1-py3-none-any.whl (711 kB)
Collecting scikit-learn<0.25.0,>=0.24.0
  Downloading scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)
[K     |████████████████████████████████| 22.3 MB 32.1 MB/s 
Collecting smac>=1.2
  Using cached smac-1.4.0.tar.gz (202 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting pyrfr<0.9,>=0.7
  Using cached pyrfr-0.8.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)
Collecting flaky
  Using cached flaky-3.7.0-py2.py3-none-any.whl (22 kB)
Collecting lockfile
  Using cached lockfile-0.12.2-py2.py3-none-any.whl (13 kB)
Collecting pynisher>=0.6.3
  Using cached pynisher-1.0.0.tar.gz (30 kB)
  Installing build dependencies ... [?25l[?25hdone


In [None]:
import os
import tempfile as tmp
import warnings
import copy

os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'

warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

from sktime.datasets import load_longley
targets, features = load_longley()

forecasting_horizon = 3

# Dataset optimized by APT-TS can be a list of np.ndarray/ pd.DataFrame where each series represents an element in the
# list, or a single pd.DataFrame that records the series
# index information: to which series the timestep belongs? This id can be stored as the DataFrame's index or a separate
# column
# Within each series, we take the last forecasting_horizon as test targets. The items before that as training targets
# Normally the value to be forecasted should follow the training sets
y_train = [targets[: -forecasting_horizon]]
y_test = [targets[-forecasting_horizon:]]

# same for features. For uni-variant models, X_train, X_test can be omitted and set as None
X_train = [features[: -forecasting_horizon]]
# Here x_test indicates the 'known future features': they are the features known previously, features that are unknown
# could be replaced with NAN or zeros (which will not be used by our networks). If no feature is known beforehand,
# we could also omit X_test
known_future_features = list(features.columns)
X_test = [features[-forecasting_horizon:]]

start_times = [targets.index.to_timestamp()[0]]
freq = '1Y'

from autoPyTorch.api.time_series_forecasting import TimeSeriesForecastingTask

In [None]:
print(known_future_features)

['GNPDEFL', 'GNP', 'UNEMP', 'ARMED', 'POP']


In [None]:
print(targets)
print(type(targets))
print(features)
print(start_times)

Period
1947    60323.0
1948    61122.0
1949    60171.0
1950    61187.0
1951    63221.0
1952    63639.0
1953    64989.0
1954    63761.0
1955    66019.0
1956    67857.0
1957    68169.0
1958    66513.0
1959    68655.0
1960    69564.0
1961    69331.0
1962    70551.0
Freq: A-DEC, Name: TOTEMP, dtype: float64
<class 'pandas.core.series.Series'>
        GNPDEFL       GNP   UNEMP   ARMED       POP
Period                                             
1947       83.0  234289.0  2356.0  1590.0  107608.0
1948       88.5  259426.0  2325.0  1456.0  108632.0
1949       88.2  258054.0  3682.0  1616.0  109773.0
1950       89.5  284599.0  3351.0  1650.0  110929.0
1951       96.2  328975.0  2099.0  3099.0  112075.0
1952       98.1  346999.0  1932.0  3594.0  113270.0
1953       99.0  365385.0  1870.0  3547.0  115094.0
1954      100.0  363112.0  3578.0  3350.0  116219.0
1955      101.2  397469.0  2904.0  3048.0  117388.0
1956      104.6  419180.0  2822.0  2857.0  118734.0
1957      108.4  442769.0  2936.0  

In [None]:
print(y_train[0])

In [None]:
print(y_test[0])

In [None]:
print(targets)

In [None]:
print(features)

In [None]:
api = TimeSeriesForecastingTask()

In [None]:
api.search(
    X_train=X_train,
    y_train=copy.deepcopy(y_train),
    X_test=X_test,
    optimize_metric='mean_MASE_forecasting',
    n_prediction_steps=forecasting_horizon,
    memory_limit=16 * 1024,   # Currently, forecasting models use much more memories
    freq=freq,
    start_times=start_times,
    func_eval_time_limit_secs=50,
    total_walltime_limit=500,
    min_num_test_instances=1000,  # proxy validation sets. This only works for the tasks with more than 1000 series
    known_future_features=known_future_features,
)


from autoPyTorch.datasets.time_series_dataset import TimeSeriesSequence

test_sets = []

# We could construct test sets from scratch
for feature, future_feature, target, start_time in zip(X_train, X_test,y_train, start_times):
    test_sets.append(
        TimeSeriesSequence(X=feature.values,
                           Y=target.values,
                           X_test=future_feature.values,
                           start_time=start_time,
                           is_test_set=True,
                           # additional information required to construct a new time series sequence
                           **api.dataset.sequences_builder_kwargs
                           )
    )
# Alternatively, if we only want to forecast the value after the X_train, we could directly ask datamanager to
# generate a test set:
# test_sets2 = api.dataset.generate_test_seqs()

pred = api.predict(test_sets)

In [None]:
from numpy.core.fromnumeric import shape
print("X")
print(type(X_train))
print(shape(X_train))
#print(X_train)
print(type(X_test))
print(X_test)
print(shape(X_test))

print("Y")
print(type(y_train))
print(y_train)
print(shape(y_train))
print(type(y_test))
print(y_test)
print(shape(y_test))

print(known_future_features)
print(forecasting_horizon)
print(freq)
print(start_times)

X
<class 'list'>
(1, 13, 5)
<class 'list'>
[        GNPDEFL       GNP   UNEMP   ARMED       POP
Period                                             
1960      114.2  502601.0  3931.0  2514.0  125368.0
1961      115.7  518173.0  4806.0  2572.0  127852.0
1962      116.9  554894.0  4007.0  2827.0  130081.0]
(1, 3, 5)
Y
<class 'list'>
[Period
1947    60323.0
1948    61122.0
1949    60171.0
1950    61187.0
1951    63221.0
1952    63639.0
1953    64989.0
1954    63761.0
1955    66019.0
1956    67857.0
1957    68169.0
1958    66513.0
1959    68655.0
Freq: A-DEC, Name: TOTEMP, dtype: float64]
(1, 13)
<class 'list'>
[Period
1960    69564.0
1961    69331.0
1962    70551.0
Freq: A-DEC, Name: TOTEMP, dtype: float64]
(1, 3)
['GNPDEFL', 'GNP', 'UNEMP', 'ARMED', 'POP']
3
1Y
[Timestamp('1947-01-01 00:00:00', freq='AS-JAN')]
