## TOC:
* [Datasets](#datasets-bullet)
* [Logging Settings](#logging-bullet)
* [Request](#requests-bullet)
* [Forecast](#forecast-bullet)
* [Save Results](#results-bullet)

Additional to train api testing notebook features, this notebook contains 
- train test split for evaluation,
- forecasting to see results,
- metrics for forecasted period

In [1]:
import json
import yaml
import requests
import getpass
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

## Datasets <a class="anchor" id="datasets-bullet"></a>

In [2]:
testing_datasets = [
    {
        "file_name": "https://github.com/ourownstory/neuralprophet-data/raw/main/datasets/air_passengers.csv",
        "format": "csv",
        "time_col_index": 0,
        "target_col_index": -1,
        "exovar": False,
    },
    {
        "file_name": "../datasets/air_quality/BeijingPM25_0.csv",
        "format": "csv",
        "time_col_index": 0,
        "target_col_index": -1,
        "exovar": False,
    }
]

In [3]:
TEST_PERIOD=7

In [4]:
def prepare_dataset_forecast(dataset_info, test_period=TEST_PERIOD):
    if dataset_info["format"] == "csv":
        train_df = pd.read_csv(dataset_info["file_name"])
        time_col = train_df.columns[dataset_info["time_col_index"]]
        target_col = train_df.columns[dataset_info["target_col_index"]]
        train_df[time_col] = train_df[time_col].astype(str)
        train_df[target_col] = train_df[target_col].astype(float)
        train_data = []
        for value in train_df.values:
            train_data.append(list(value))
    return train_data[:-test_period], train_data[-test_period:]

## Logging Settings <a class="anchor" id="logging-bullet"></a>

In [20]:
LOG_RESULTS = True

In [6]:
user_name = getpass.getuser()

In [7]:
output_file_name = "./test_logs/test_logs_forecast_{}.csv".format(user_name)
print(output_file_name)

column_names = ["file_name", "model_request", "user", "time", "response", 
                "type", "metrics", "model", "test_data"]
log_df = pd.DataFrame(columns=column_names)

./test_logs/test_logs_forecast_studio-lab-user.csv


## Request <a class="anchor" id="requests-bullet"></a>

Define Model Request (optional)

In [8]:
# Customized model request
model_request = {
    "type": "meta_lr",  # 'meta_wa'
    "scorers": ["smape", "mape"],
    "params": {
        "preprocessors": [
            {"type": "dartsimputer", "params": {"strategy": "mean"}},
            # {'type': 'simpleimputer', 'params': {'strategy': 'mean'}},
            {"type": "minmaxscaler"},
        ],
        "base_models": [
            {"type": "darts_naive"},
            {"type": "darts_seasonalnaive"},
            {"type": "darts_autotheta"},
            {"type": "darts_autoets"},
            {"type": "darts_autoarima"},
            #{"type": "darts_tbats"},
            #{"type": "darts_linearregression"},
            # {'type': 'darts_lightgbm'},
            # {'type': 'darts_rnn'},
        ],
    },
}

In [9]:
with open("url.yaml", "r") as file:
    url_dict = yaml.safe_load(file)

# URL to our SYBIL AWS service
protocol = url_dict["protocol"]
host = url_dict["host"]
port = url_dict["port"]
endpoint = "train"

url = "%s://%s:%s/%s" % (protocol, host, str(port), endpoint)

In [10]:
for dataset in testing_datasets:
    train_data, test_data = prepare_dataset_forecast(dataset)
    api_json = {
        "data": train_data,
        "model": model_request,  # (optional) can be commented out
    }
    start_time = time.time()
    response = requests.post(url, json=api_json)
    exc_time = time.time() - start_time
    model_req = api_json["model"] if "model" in api_json.keys() else "default"
    new_row = pd.DataFrame(
        [
            {
                "file_name": dataset["file_name"],
                "model_request": model_req,
                "user": user_name,
                "time": exc_time,
                "response": response.status_code,
                "type": response.json()['type'],
                "metrics": response.json()['metrics'],
                "model": response.json()['model'],
                "test_data": test_data
            }
        ]
    )
    log_df = pd.concat([log_df, new_row], ignore_index=True)
    print(response)

<Response [200]>
<Response [200]>


## Forecast <a class="anchor" id="forecast-bullet"></a>

In [12]:
responses = []
for idx, row in log_df.iterrows():
    dates = list(np.array(row['test_data'])[:,0])
    model = row['model']
    api_json = {
                    'model': model,
                    'data': dates
                }
    endpoint = 'forecast'
    url = '%s://%s:%s/%s' % (protocol, host, str(port), endpoint)
    response = requests.post(url, json=api_json)
    responses.append(response.json()['data'])

In [14]:
log_df['forecast_data'] = responses

In [18]:
# Comparison of the results
for idx, row in log_df.iterrows():
    comparison_df = pd.DataFrame({'test':list(np.array(row['test_data'])[:,1]),
                'pred':list(np.array(row['forecast_data'])[:,1])})
    display(comparison_df)

Unnamed: 0,test,pred
0,535.0,536.6447784284949
1,622.0,624.7126156353609
2,606.0,640.3087357562652
3,508.0,528.9142194897604
4,461.0,470.3129232763376
5,390.0,425.8023794460936
6,432.0,472.8051842789944


Unnamed: 0,test,pred
0,9.0,244.06093488531084
1,10.0,244.48628522951088
2,8.0,244.63454529401204
3,10.0,244.8708786939524
4,10.0,245.1138124924792
5,8.0,245.36014990794823
6,12.0,245.59873940475293


## Save Results <a class="anchor" id="results-bullet"></a>

In [19]:
# Logs
log_df

Unnamed: 0,file_name,model_request,user,time,response,type,metrics,model,test_data,forecast_data
0,https://github.com/ourownstory/neuralprophet-d...,"{'type': 'meta_lr', 'scorers': ['smape', 'mape...",studio-lab-user,60.140282,200,meta_lr,"[{'type': 'smape', 'value': 4.667966174549589}...",AgEBCNHWAADQ1gAAUXcAABgAAABMdwAASg4AAD+AAGVsaZ...,"[[1960-06-01, 535.0], [1960-07-01, 622.0], [19...","[[1960-05-31T00:00:00, 536.6447784284949], [19..."
1,../datasets/air_quality/BeijingPM25_0.csv,"{'type': 'meta_lr', 'scorers': ['smape', 'mape...",studio-lab-user,1127.717623,200,meta_lr,"[{'type': 'smape', 'value': 200.0}, {'type': '...",AgEBCPQYcwAAABAAvUxHADAAAACatg4A9bkBAOctPABiih...,"[[2014-12-31 17:00:00, 9.0], [2014-12-31 18:00...","[[2014-12-31T17:00:00, 244.06093488531084], [2..."


In [None]:
log_df['model_request'].iloc[0]

In [None]:
# Broken runs
log_df[log_df['response'] != 200]

In [None]:
if LOG_RESULTS:
    if os.path.exists(output_file_name):
        existing_log_df = pd.read_csv(output_file_name)
        updated_df = pd.concat([existing_log_df, log_df], ignore_index=True)
    else:
        updated_df = log_df
    updated_df.to_csv(output_file_name, index=False)