## TOC:
* [Datasets](#datasets-bullet)
* [Logging Settings](#logging-bullet)
* [Request](#requests-bullet)
* [Forecast](#forecast-bullet)
* [Save Results](#results-bullet)

Additional to train api testing notebook features, this notebook contains 
- train test split for evaluation,
- forecasting to see results,
- metrics for forecasted period

In [1]:
import json
import yaml
import requests
import getpass
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

## Datasets <a class="anchor" id="datasets-bullet"></a>

### Monash Datasets Extraction

In [2]:
from bs4 import BeautifulSoup
import zipfile
from datetime import datetime
from distutils.util import strtobool

# URL of the page containing the table
url = "https://forecastingdata.org/"

# Send a GET request to fetch the page content
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Locate the table containing the datasets
table = soup.find_all('table')[0]  # Assuming the first table is the target

# Extract table headers
headers = [th.text.strip() for th in table.find_all('th')]
headers.append('Download Links')

# Extract table rows
rows = []
for row in table.find_all('tr')[1:]:  # Skip the header row
    cols = [td.text.strip() for td in row.find_all('td')]
    download_links = [a['href'] for a in row.find_all('a', href=True)]
    cols.append(download_links)
    rows.append(cols)

# Create a pandas DataFrame
df = pd.DataFrame(rows, columns=headers)
df['Download'] = df["Download"].apply(lambda x: x.split('\n'))
df["Download Links"] = df["Download Links"].apply(lambda x: [l for l in x if 'zenodo' in l])
df = df.explode(["Download", "Download Links"]).reset_index()

def download_link_extract(url):
    try:
        # Send a GET request to the page
        response = requests.get(url)
        response.raise_for_status()  # Ensure we notice bad responses
    
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, "html.parser")
    
        # Find all links on the page
        links = soup.find_all('a', href=True)
    
        # Look for the specific download link based on the "download=1" parameter
        download_link = None
        for link in links:
            if "download=1" in link['href']:
                download_link = link['href']
                break
    
        if download_link:
            return  'https://zenodo.org' + download_link
        else:
            return None
    except requests.exceptions.RequestException as e:
        print("An error occurred while making the request:", e)

df["download_link"] = df['Download Links'].apply(lambda x: download_link_extract(x))

def download_and_unzip(url, extract_to='.'):
    # Download the file from `url` and save it locally under `file_name`:
    local_zip_file = 'temp.zip'
    
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(local_zip_file, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    
    # Unzip the file
    with zipfile.ZipFile(local_zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    
    # Optionally remove the zip file after extraction
    os.remove(local_zip_file)

    

In [47]:
df[df['Domain'] == 'Economic']

Unnamed: 0,index,Dataset,Domain,No: of Series,Min. Length,Max. Length,Competition,Multivariate,Download,Source,Download Links,download_link
23,9,Bitcoin,Economic,18,2659,4581,No,No,W Missing,Curated by us,https://zenodo.org/record/5121965,https://zenodo.org/records/5121965/files/bitco...
24,9,Bitcoin,Economic,18,2659,4581,No,No,W/O Missing,Curated by us,https://zenodo.org/record/5122101,https://zenodo.org/records/5122101/files/bitco...
43,19,FRED-MD,Economic,107,728,728,No,Yes,Monthly,"McCracken and Ng, 2016",https://zenodo.org/record/4654833,https://zenodo.org/records/4654833/files/fred_...


In [48]:
download_links = df[df['Domain'] == 'Economic']['download_link'].tolist()

In [49]:
for data_zip_path in download_links:
    download_and_unzip(data_zip_path, extract_to='./files')

In [50]:
def list_files(folder_path, file_format):
    # List all files in the given folder
    files = os.listdir(folder_path)
    # Filter out the files that end with .tsf
    tsf_files = [folder_path + '/' + f for f in files if f.endswith(file_format)]
    return tsf_files

In [51]:
files = list_files('./files', '.tsf')

In [52]:
files

['./files/bitcoin_dataset_with_missing_values.tsf',
 './files/bitcoin_dataset_without_missing_values.tsf',
 './files/fred_md_dataset.tsf']

In [53]:
# Code snippet taking from https://github.com/rakshitha123/TSForecasting/blob/master/utils/data_loader.py

# Converts the contents in a .tsf file into a dataframe and returns it along with other meta-data of the dataset: frequency, horizon, whether the dataset contains missing values and whether the series have equal lengths
#
# Parameters
# full_file_path_and_name - complete .tsf file path
# replace_missing_vals_with - a term to indicate the missing values in series in the returning dataframe
# value_column_name - Any name that is preferred to have as the name of the column containing series values in the returning dataframe
def convert_tsf_to_dataframe(
    full_file_path_and_name,
    replace_missing_vals_with="NaN",
    value_column_name="series_value",
):
    col_names = []
    col_types = []
    all_data = {}
    line_count = 0
    frequency = None
    forecast_horizon = None
    contain_missing_values = None
    contain_equal_length = None
    found_data_tag = False
    found_data_section = False
    started_reading_data_section = False

    with open(full_file_path_and_name, "r", encoding="cp1252") as file:
        for line in file:
            # Strip white space from start/end of line
            line = line.strip()

            if line:
                if line.startswith("@"):  # Read meta-data
                    if not line.startswith("@data"):
                        line_content = line.split(" ")
                        if line.startswith("@attribute"):
                            if (
                                len(line_content) != 3
                            ):  # Attributes have both name and type
                                raise Exception("Invalid meta-data specification.")

                            col_names.append(line_content[1])
                            col_types.append(line_content[2])
                        else:
                            if (
                                len(line_content) != 2
                            ):  # Other meta-data have only values
                                raise Exception("Invalid meta-data specification.")

                            if line.startswith("@frequency"):
                                frequency = line_content[1]
                            elif line.startswith("@horizon"):
                                forecast_horizon = int(line_content[1])
                            elif line.startswith("@missing"):
                                contain_missing_values = bool(
                                    strtobool(line_content[1])
                                )
                            elif line.startswith("@equallength"):
                                contain_equal_length = bool(strtobool(line_content[1]))

                    else:
                        if len(col_names) == 0:
                            raise Exception(
                                "Missing attribute section. Attribute section must come before data."
                            )

                        found_data_tag = True
                elif not line.startswith("#"):
                    if len(col_names) == 0:
                        raise Exception(
                            "Missing attribute section. Attribute section must come before data."
                        )
                    elif not found_data_tag:
                        raise Exception("Missing @data tag.")
                    else:
                        if not started_reading_data_section:
                            started_reading_data_section = True
                            found_data_section = True
                            all_series = []

                            for col in col_names:
                                all_data[col] = []

                        full_info = line.split(":")

                        if len(full_info) != (len(col_names) + 1):
                            raise Exception("Missing attributes/values in series.")

                        series = full_info[len(full_info) - 1]
                        series = series.split(",")

                        if len(series) == 0:
                            raise Exception(
                                "A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series. Missing values should be indicated with ? symbol"
                            )

                        numeric_series = []

                        for val in series:
                            if val == "?":
                                numeric_series.append(replace_missing_vals_with)
                            else:
                                numeric_series.append(float(val))

                        if numeric_series.count(replace_missing_vals_with) == len(
                            numeric_series
                        ):
                            raise Exception(
                                "All series values are missing. A given series should contains a set of comma separated numeric values. At least one numeric value should be there in a series."
                            )

                        all_series.append(pd.Series(numeric_series).array)

                        for i in range(len(col_names)):
                            att_val = None
                            if col_types[i] == "numeric":
                                att_val = int(full_info[i])
                            elif col_types[i] == "string":
                                att_val = str(full_info[i])
                            elif col_types[i] == "date":
                                att_val = datetime.strptime(
                                    full_info[i], "%Y-%m-%d %H-%M-%S"
                                )
                            else:
                                raise Exception(
                                    "Invalid attribute type."
                                )  # Currently, the code supports only numeric, string and date types. Extend this as required.

                            if att_val is None:
                                raise Exception("Invalid attribute value.")
                            else:
                                all_data[col_names[i]].append(att_val)

                line_count = line_count + 1

        if line_count == 0:
            raise Exception("Empty file.")
        if len(col_names) == 0:
            raise Exception("Missing attribute section.")
        if not found_data_section:
            raise Exception("Missing series information under data section.")

        all_data[value_column_name] = all_series
        loaded_data = pd.DataFrame(all_data)

        return (
            loaded_data,
            frequency,
            forecast_horizon,
            contain_missing_values,
            contain_equal_length,
        )


# Example of usage
# loaded_data, frequency, forecast_horizon, contain_missing_values, contain_equal_length = convert_tsf_to_dataframe("TSForecasting/tsf_data/sample.tsf")

# print(loaded_data)
# print(frequency)
# print(forecast_horizon)
# print(contain_missing_values)
# print(contain_equal_length)

In [54]:
def convert_to_pandas_frequency(frequency_str):
    # Dictionary to map units to pandas frequency codes
    unit_map = {
        'second': 'S',
        'seconds': 'S',
        'minute': 'T',
        'minutes': 'T',
        'hour': 'H',
        'hours': 'H',
        'hourly': 'H',
        'day': 'D',
        'days': 'D',
        'daily': 'D',
        'week': 'W',
        'weeks': 'W',
        'weekly': 'W',
        'month': 'M',
        'months': 'M',
        'monthly': 'M',
        'year': 'A',
        'years': 'A',
        'yearly': 'A',
        'quarter': 'Q',
        'quarters': 'Q',
        'quarterly': 'Q'
    }
    
    # Split the input string to get the number and unit
    parts = frequency_str.split('_')
    if len(parts) == 1:
        number = 1
        unit = parts[0]
        #raise ValueError("Input format should be '<number>_<unit>'")
    elif len(parts) ==2:
        number, unit = parts
        # Dict"ionary to map each unit to its half-unit equivalent
        half_unit_map = {
            'A': ('Q', 2),  # Half of a year is 2 quarters
            'Q': ('M', 2),  # Half of a quarter is 2 months
            'M': ('D', 15),  # Approximate half of a month is 15 days
            'W': ('D', 3.5),  # Half of a week is 3.5 days
            'D': ('H', 12),  # Half of a day is 12 hours
            'H': ('T', 30),  # Half of an hour is 30 minutes
            'T': ('S', 30)  # Half of a minute is 30 seconds
        }
    
        if number == 'half':
            # Get the unit key from the unit map
            unit_key = unit_map[unit]
            
            # Get the half-unit equivalent from the half_unit_map
            if unit_key in half_unit_map:
                half_unit, half_number = half_unit_map[unit_key]
                pandas_frequency = f"{int(half_number)}{half_unit}"
                return pandas_frequency
            else:
                raise ValueError(f"Half unit mapping not available for unit '{unit}'")
        else:
            try:
                number = int(number)
            except ValueError:
                raise ValueError("The number part of the input should be an integer")
    else:
        raise ValueError("Input format should be '<number>_<unit>' Maybe update unit_map")
    

    
    if unit not in unit_map:
        raise ValueError(f"Unit '{unit}' is not recognized")
    
    # Convert to pandas frequency string
    pandas_frequency = f"{number}{unit_map[unit]}"
    return pandas_frequency

In [55]:
def generate_timestamps(start_timestamp, frequency, values):
    # Generate a range of timestamps
    timestamps = pd.date_range(start=start_timestamp, periods=len(values), freq=convert_to_pandas_frequency(frequency))
    
    # Create a DataFrame with the values and the generated timestamps
    df = pd.DataFrame({'Timestamp': timestamps, 'Value': values})
    
    return df

In [56]:
df_list = []
for file in files:
    name = file.split('/')[-1].split('.tsf')[0]
    loaded_data, frequency, forecast_horizon, contain_missing_values, contain_equal_length = convert_tsf_to_dataframe(file)
    for index, row in loaded_data.iterrows():
        serie_name = name + '_' + row['series_name']
        serie = generate_timestamps(row['start_timestamp'], frequency, row['series_value'])
        df_list.append({'name': serie_name, 'serie': serie})
        serie.to_csv("./files/" + serie_name + ".csv", index=False)
    os.remove(file)

In [57]:
df_list

[{'name': 'bitcoin_dataset_with_missing_values_difficulty',
  'serie':       Timestamp             Value
  0    2009-01-03               1.0
  1    2009-01-04               NaN
  2    2009-01-05               NaN
  3    2009-01-06               NaN
  4    2009-01-07               NaN
  ...         ...               ...
  4576 2021-07-15  14363025673660.0
  4577 2021-07-16  14363025673660.0
  4578 2021-07-17  14348541098817.0
  4579 2021-07-18  13672594272814.0
  4580 2021-07-19               NaN
  
  [4581 rows x 2 columns]},
 {'name': 'bitcoin_dataset_with_missing_values_sent_addresses',
  'serie':       Timestamp     Value
  0    2009-01-03       NaN
  1    2009-01-04       NaN
  2    2009-01-05       NaN
  3    2009-01-06       NaN
  4    2009-01-07       NaN
  ...         ...       ...
  4576 2021-07-15  386824.0
  4577 2021-07-16  350835.0
  4578 2021-07-17  269883.0
  4579 2021-07-18  187721.0
  4580 2021-07-19       NaN
  
  [4581 rows x 2 columns]},
 {'name': 'bitcoin_dataset_w

In [59]:
files = list_files('./files', '.csv')

In [60]:
files

['./files/fred_md_dataset_T10.csv',
 './files/fred_md_dataset_T12.csv',
 './files/fred_md_dataset_T13.csv',
 './files/fred_md_dataset_T15.csv',
 './files/fred_md_dataset_T17.csv',
 './files/fred_md_dataset_T19.csv',
 './files/fred_md_dataset_T21.csv',
 './files/fred_md_dataset_T18.csv',
 './files/fred_md_dataset_T20.csv',
 './files/bitcoin_dataset_with_missing_values_difficulty.csv',
 './files/bitcoin_dataset_with_missing_values_sent_addresses.csv',
 './files/bitcoin_dataset_with_missing_values_send_usd.csv',
 './files/bitcoin_dataset_with_missing_values_market_cap.csv',
 './files/bitcoin_dataset_with_missing_values_confirmation_time.csv',
 './files/bitcoin_dataset_with_missing_values_transactions.csv',
 './files/bitcoin_dataset_with_missing_values_median_transaction_size.csv',
 './files/bitcoin_dataset_with_missing_values_mining_profitability.csv',
 './files/bitcoin_dataset_with_missing_values_fee_reward.csv',
 './files/bitcoin_dataset_with_missing_values_top_100_percent.csv',
 './fil

In [61]:
testing_datasets = []
for file in files[:4]:
    testing_datasets.append({"file_name": file,
                            "format": "csv",
                            "time_col_index": 0,
                            "target_col_index": -1,
                            "exovar": False,})

In [62]:
testing_datasets

[{'file_name': './files/fred_md_dataset_T10.csv',
  'format': 'csv',
  'time_col_index': 0,
  'target_col_index': -1,
  'exovar': False},
 {'file_name': './files/fred_md_dataset_T12.csv',
  'format': 'csv',
  'time_col_index': 0,
  'target_col_index': -1,
  'exovar': False},
 {'file_name': './files/fred_md_dataset_T13.csv',
  'format': 'csv',
  'time_col_index': 0,
  'target_col_index': -1,
  'exovar': False},
 {'file_name': './files/fred_md_dataset_T15.csv',
  'format': 'csv',
  'time_col_index': 0,
  'target_col_index': -1,
  'exovar': False}]

In [63]:
TEST_PERIOD=7

In [64]:
def prepare_dataset_forecast(dataset_info, test_period=TEST_PERIOD):
    if dataset_info["format"] == "csv":
        train_df = pd.read_csv(dataset_info["file_name"])
        time_col = train_df.columns[dataset_info["time_col_index"]]
        target_col = train_df.columns[dataset_info["target_col_index"]]
        train_df[time_col] = train_df[time_col].astype(str)
        train_df[target_col] = train_df[target_col].astype(float)
        train_data = []
        for value in train_df.iloc[:, [dataset_info["time_col_index"],
                                       dataset_info["target_col_index"]]].values:
            train_data.append(list(value))
    return train_data[:-test_period], train_data[-test_period:]

## Logging Settings <a class="anchor" id="logging-bullet"></a>

In [66]:
LOG_RESULTS = True

In [67]:
user_name = getpass.getuser()

In [68]:
output_file_name = "./test_logs/test_logs_forecast_{}.csv".format(user_name)
print(output_file_name)

column_names = ["file_name", "model_request", "user", "time", "response", 
                "type", "metrics", "model", "test_data"]
log_df = pd.DataFrame(columns=column_names)

./test_logs/test_logs_forecast_studio-lab-user.csv


## Request <a class="anchor" id="requests-bullet"></a>

Define Model Request (optional)

In [69]:
# # Customized model request
# model_request = {
#     "type": "meta_lr",  # 'meta_wa'
#     "scorers": ["smape", "mape"],
#     "params": {
#         "preprocessors": [
#             {"type": "dartsimputer", "params": {"strategy": "mean"}},
#             # {'type': 'simpleimputer', 'params': {'strategy': 'mean'}},
#             {"type": "minmaxscaler"},
#         ],
#         "base_models": [
#             {"type": "darts_naive"},
#             {"type": "darts_seasonalnaive"},
#             {"type": "darts_autotheta"},
#             {"type": "darts_autoets"},
#             {"type": "darts_autoarima"},
#             #{"type": "darts_tbats"},
#             #{"type": "darts_linearregression"},
#             # {'type': 'darts_lightgbm'},
#             # {'type': 'darts_rnn'},
#         ],
#     },
# }

Load using JSON model request 

In [70]:
file_path = 'model_request.json'
# For writing the model request to a json file
with open(file_path, 'r') as file:
    model_request = json.load(file)

Load using YAML model request

In [71]:
file_path = 'model_request.yaml'
# For reading the model request from a yaml file
with open(file_path, 'r') as file:
    model_request = yaml.safe_load(file)

In [72]:
with open("url.yaml", "r") as file:
    url_dict = yaml.safe_load(file)

# URL to our SYBIL AWS service
protocol = url_dict["protocol"]
host = url_dict["host"]
port = url_dict["port"]
endpoint = "train"

url = "%s://%s:%s/%s" % (protocol, host, str(port), endpoint)

In [74]:
for dataset in testing_datasets:
    train_data, test_data = prepare_dataset_forecast(dataset)
    api_json = {
        "data": train_data,
        "model": model_request,  # (optional) can be commented out
    }
    start_time = time.time()
    response = requests.post(url, json=api_json)
    exc_time = time.time() - start_time
    model_req = api_json["model"] if "model" in api_json.keys() else "default"
    new_row = pd.DataFrame(
        [
            {
                "file_name": dataset["file_name"],
                "model_request": model_req,
                "user": user_name,
                "time": exc_time,
                "response": response.status_code,
                "type": response.json()['type'],
                "metrics": response.json()['metrics'],
                "model": response.json()['model'],
                "test_data": test_data
            }
        ]
    )
    log_df = pd.concat([log_df, new_row], ignore_index=True)
    print(response)

<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>


In [75]:
log_df

Unnamed: 0,file_name,model_request,user,time,response,type,metrics,model,test_data
0,./files/fred_md_dataset_T10.csv,"{'type': 'meta_wa', 'scorers': ['mase', 'smape...",studio-lab-user,425.591252,200,meta_wa,"[{'type': 'mase', 'value': 3.2043658395515218}...",AgEBCOPFGwAAABAAMgwXABgAAACS4QwAjKABAD+AAGVsaZ...,"[[2019-02-28, 101.9729], [2019-03-31, 103.2309..."
1,./files/fred_md_dataset_T12.csv,"{'type': 'meta_wa', 'scorers': ['mase', 'smape...",studio-lab-user,351.685782,200,meta_wa,"[{'type': 'mase', 'value': 15.75924537769056},...",AgEBCJndGwAAABAAsQQXABgAAAAc1AwAZJwBAD+AAGVsaZ...,"[[2019-02-28, 114.7969], [2019-03-31, 114.46],..."
2,./files/fred_md_dataset_T13.csv,"{'type': 'meta_wa', 'scorers': ['mase', 'smape...",studio-lab-user,337.166843,200,meta_wa,"[{'type': 'mase', 'value': 11.588277224010945}...",AgEBCGqoGwAAABAAFgsXABgAAABj9AwANKABAD+AAGVsaZ...,"[[2019-02-28, 108.4955], [2019-03-31, 107.9445..."
3,./files/fred_md_dataset_T15.csv,"{'type': 'meta_wa', 'scorers': ['mase', 'smape...",studio-lab-user,358.119383,200,meta_wa,"[{'type': 'mase', 'value': 4.928702326157349},...",AgEBCO8OHAAAABAAr0YXABgAAAAO3wwA8Z0BAD+AAGVsaZ...,"[[2019-02-28, 105.2961], [2019-03-31, 105.2392..."


## Forecast <a class="anchor" id="forecast-bullet"></a>

In [76]:
responses = []
for idx, row in log_df.iterrows():
    dates = list(np.array(row['test_data'])[:,0])
    model = row['model']
    api_json = {
                    'model': model,
                    'data': dates
                }
    endpoint = 'forecast'
    url = '%s://%s:%s/%s' % (protocol, host, str(port), endpoint)
    response = requests.post(url, json=api_json)
    responses.append(response.json()['data'])

In [77]:
log_df['forecast_data'] = responses

In [78]:
# Comparison of the results
for idx, row in log_df.iterrows():
    comparison_df = pd.DataFrame({'test':list(np.array(row['test_data'])[:,1]),
                'pred':list(np.array(row['forecast_data'])[:,1])})
    display(comparison_df)

Unnamed: 0,test,pred
0,101.9729,100.22488677617764
1,103.2309,100.97632436017685
2,100.9801,101.04775115235508
3,101.1794,101.07354799866388
4,101.4176,100.94843797345348
5,101.5514,101.01802786888923
6,101.8472,101.09479615135082


Unnamed: 0,test,pred
0,114.7969,113.58359305210246
1,114.46,113.96570873841269
2,114.7893,114.03209610781325
3,114.6911,114.08126879199833
4,114.4141,114.11668889515325
5,114.2963,114.12719139888448
6,115.2718,114.17139902205956


Unnamed: 0,test,pred
0,108.4955,106.80137312298976
1,107.9445,107.16871747951429
2,107.4001,107.15616449296186
3,107.2815,107.12695235107472
4,107.7241,107.07659542512644
5,107.2339,107.00914574279945
6,107.9056,107.02468748241502


Unnamed: 0,test,pred
0,105.2961,104.23735029667486
1,105.2392,104.74988780770006
2,104.3399,104.89906941204042
3,104.462,104.8024484340046
4,105.0892,104.94673295041696
5,104.678,105.12352027742897
6,105.2156,105.04018159113797


In [79]:
comparison_df['test']

0    105.2961
1    105.2392
2    104.3399
3     104.462
4    105.0892
5     104.678
6    105.2156
Name: test, dtype: object

## Save Results <a class="anchor" id="results-bullet"></a>

In [80]:
# Logs
log_df

Unnamed: 0,file_name,model_request,user,time,response,type,metrics,model,test_data,forecast_data
0,./files/fred_md_dataset_T10.csv,"{'type': 'meta_wa', 'scorers': ['mase', 'smape...",studio-lab-user,425.591252,200,meta_wa,"[{'type': 'mase', 'value': 3.2043658395515218}...",AgEBCOPFGwAAABAAMgwXABgAAACS4QwAjKABAD+AAGVsaZ...,"[[2019-02-28, 101.9729], [2019-03-31, 103.2309...","[[2019-02-28, 100.22488677617763], [2019-03-31..."
1,./files/fred_md_dataset_T12.csv,"{'type': 'meta_wa', 'scorers': ['mase', 'smape...",studio-lab-user,351.685782,200,meta_wa,"[{'type': 'mase', 'value': 15.75924537769056},...",AgEBCJndGwAAABAAsQQXABgAAAAc1AwAZJwBAD+AAGVsaZ...,"[[2019-02-28, 114.7969], [2019-03-31, 114.46],...","[[2019-02-28, 113.58359305210246], [2019-03-31..."
2,./files/fred_md_dataset_T13.csv,"{'type': 'meta_wa', 'scorers': ['mase', 'smape...",studio-lab-user,337.166843,200,meta_wa,"[{'type': 'mase', 'value': 11.588277224010945}...",AgEBCGqoGwAAABAAFgsXABgAAABj9AwANKABAD+AAGVsaZ...,"[[2019-02-28, 108.4955], [2019-03-31, 107.9445...","[[2019-02-28, 106.80137312298976], [2019-03-31..."
3,./files/fred_md_dataset_T15.csv,"{'type': 'meta_wa', 'scorers': ['mase', 'smape...",studio-lab-user,358.119383,200,meta_wa,"[{'type': 'mase', 'value': 4.928702326157349},...",AgEBCO8OHAAAABAAr0YXABgAAAAO3wwA8Z0BAD+AAGVsaZ...,"[[2019-02-28, 105.2961], [2019-03-31, 105.2392...","[[2019-02-28, 104.23735029667486], [2019-03-31..."


In [81]:
if LOG_RESULTS:
    if os.path.exists(output_file_name):
        existing_log_df = pd.read_csv(output_file_name)
        updated_df = pd.concat([existing_log_df, log_df], ignore_index=True)
    else:
        updated_df = log_df
    updated_df.to_csv(output_file_name, index=False)

In [82]:
log_df = pd.read_csv(output_file_name)

In [83]:
log_df

Unnamed: 0,file_name,model_request,user,time,response,type,metrics,model,test_data,forecast_data
0,https://github.com/ourownstory/neuralprophet-d...,"{'type': 'meta_lr', 'scorers': ['smape', 'mape...",studio-lab-user,65.887619,200,meta_lr,"[{'type': 'smape', 'value': 4.667966174549589}...",AgEBCNHWAADQ1gAAUXcAABgAAABMdwAASg4AAD+AAGVsaZ...,"[['1960-06-01', 535.0], ['1960-07-01', 622.0],...","[['1960-05-31T00:00:00', 536.6447784284949], [..."
1,../datasets/air_quality/BeijingPM25_0.csv,"{'type': 'meta_lr', 'scorers': ['smape', 'mape...",studio-lab-user,1156.983846,200,meta_lr,"[{'type': 'smape', 'value': 200.0}, {'type': '...",AgEBCPQYcwAAABAAvUxHADAAAACatg4A9bkBALXbHQBiih...,"[['2014-12-31 17:00:00', 9.0], ['2014-12-31 18...","[['2014-12-31T17:00:00', 244.06093488531084], ..."
2,../datasets/energy/elecdemand_dataset.csv,"{'type': 'meta_lr', 'scorers': ['smape', 'mape...",studio-lab-user,275.278771,200,meta_lr,"[{'type': 'smape', 'value': 200.0}, {'type': '...",AgEBCJJFMAAAABAAGWQlAOBjHgCjng8AtkUAACAAAACSRQ...,"[['2014-12-31 20:30:00', 3.8734], ['2014-12-31...","[['2014-12-31T20:30:00', -502.8264662468482], ..."
3,../datasets/climate/temp_anom_monthly.csv,"{'type': 'meta_lr', 'scorers': ['smape', 'mape...",studio-lab-user,105.654048,200,meta_lr,"[{'type': 'smape', 'value': 200.0}, {'type': '...",AgEBCC2dBQAonQUAXqcDABgAAABVpwMAkmgAAD+AAGVsaZ...,"[['2022-12-01', 0.84], ['2023-01-01', 0.87], [...","[['2022-12-02T00:00:00', -236716.56937655865],..."
4,../datasets/climate/temp_anom_w_forcing.csv,"{'type': 'meta_lr', 'scorers': ['smape', 'mape...",studio-lab-user,12.695486,200,meta_lr,"[{'type': 'smape', 'value': 178.41811091755022...",AgEBCJOtAACQrQAAU4UAABgAAABMhQAAuxAAAD+AAGVsaZ...,"[['2006', 0.66], ['2007', 0.65], ['2008', 0.55...","[['2006-01-02T00:00:00', 89717453190.78818], [..."
5,../datasets/climate/yosemite_temps.csv,"{'type': 'meta_lr', 'scorers': ['smape', 'mape...",studio-lab-user,228.408271,200,meta_lr,"[{'type': 'smape', 'value': 199.99982946699566...",AgEBCI6TMwAAABAA7uofAAu8DwBdiBUAspMDACAAAACOkw...,"[['2017-07-04 23:30:00', 43.6], ['2017-07-04 2...","[['2017-07-04T23:30:00', -47712.19258963689], ..."
6,../datasets/retail/retail_sales.csv,"{'type': 'meta_lr', 'scorers': ['smape', 'mape...",studio-lab-user,4.940552,200,meta_lr,"[{'type': 'smape', 'value': 38.12746439829832}...",AgEBCEcMAQBADAEAW6UAABgAAABQpQAAFRcAAD+AAGVsaZ...,"[['2015-11-01', 444507.0], ['2015-12-01', 5182...","[['2015-10-31T00:00:00', 538812.0810625734], [..."
7,../datasets/finance/Returns_short_interest_dat...,"{'type': 'meta_lr', 'scorers': ['smape', 'mape...",studio-lab-user,13.911756,200,meta_lr,"[{'type': 'smape', 'value': 199.99999677569406...",AgEBCFM2AgBQNgIA3m8BABgAAADXbwEA3C0AAD+AAGVsaZ...,"[['2002-06-01', -0.0727656666666666], ['2002-0...","[['2002-05-31T00:00:00', 2855912.2869539745], ..."
8,../datasets/retail/retail_sales.csv,"{'type': 'meta_wa', 'scorers': ['mase', 'smape...",studio-lab-user,124.59538,200,meta_wa,"[{'type': 'mase', 'value': 1.3431122299893605}...",AgEBCExYDABIWAwAp9QJABgAAACf1AkAuDwBAD+AAGVsaZ...,"[['2015-11-01', 444507.0], ['2015-12-01', 5182...","[['2015-11-01', 440535.40927294065], ['2015-12..."
9,../datasets/finance/Returns_short_interest_dat...,"{'type': 'meta_wa', 'scorers': ['mase', 'smape...",studio-lab-user,236.713296,200,meta_wa,"[{'type': 'mase', 'value': 0.8107361150541684}...",AgEBCP9oHwAAABAALAMbABgAAADvXw0As7ABAD+AAGVsaZ...,"[['2002-06-01', -0.0727656666666666], ['2002-0...","[['2002-06-01', -0.0051766621132544955], ['200..."


In [84]:
from ast import literal_eval

In [85]:
# Comparison of the results
for idx, row in log_df.iterrows():
    comparison_df = pd.DataFrame({'test':list(np.array(literal_eval(row['test_data']))[:,1]),
                'pred':list(np.array(literal_eval(row['forecast_data']))[:,1])})
    display(comparison_df)

Unnamed: 0,test,pred
0,535.0,536.6447784284949
1,622.0,624.7126156353609
2,606.0,640.3087357562652
3,508.0,528.9142194897604
4,461.0,470.3129232763376
5,390.0,425.8023794460936
6,432.0,472.8051842789944


Unnamed: 0,test,pred
0,9.0,244.06093488531084
1,10.0,244.48628522951088
2,8.0,244.63454529401204
3,10.0,244.8708786939524
4,10.0,245.1138124924792
5,8.0,245.36014990794823
6,12.0,245.59873940475293


Unnamed: 0,test,pred
0,3.8734,-502.8264662468482
1,3.7916,-502.3340654989552
2,3.7248,-502.07583139561393
3,3.7619,-501.8132927970073
4,3.8094,-501.6797112133754
5,4.1359,-501.5344886645544
6,4.217,-501.457731425804


Unnamed: 0,test,pred
0,0.84,-236716.56937655865
1,0.87,-236716.75797421116
2,1.0,-236717.21293754716
3,1.23,-236717.3857011199
4,0.98,-236717.53365027648
5,0.96,-236717.61220792425
6,1.05,-236717.4635986198


Unnamed: 0,test,pred
0,0.66,89717453190.78818
1,0.65,89717453191.64856
2,0.55,89717453192.40302
3,0.66,89717453192.7938
4,0.73,89717453191.89508
5,0.62,89717453192.43274
6,0.66,89717453192.7128


Unnamed: 0,test,pred
0,43.6,-47712.19258963689
1,43.3,-47582.104079052806
2,42.8,-47554.03632862121
3,43.0,-47452.84592115879
4,42.1,-47441.376500979066
5,42.1,-47433.12783724442
6,41.4,-47400.16464453563


Unnamed: 0,test,pred
0,444507.0,538812.0810625734
1,518253.0,554122.3903617455
2,400928.0,535718.6125380868
3,413554.0,543128.8328519018
4,460093.0,558538.4791210323
5,450935.0,539992.9729526193
6,471421.0,547443.67965084


Unnamed: 0,test,pred
0,-0.0727656666666666,2855912.2869539745
1,-0.075387,2855912.291698833
2,0.0057109999999999,2855912.2750650398
3,-0.1103773333333333,2855912.3082263693
4,0.0872823333333333,2855912.297117535
5,0.057994,2855912.28504902
6,-0.0598256666666666,2855912.2891600262


Unnamed: 0,test,pred
0,444507.0,440535.4092729407
1,518253.0,444225.8629786348
2,400928.0,436376.0214338422
3,413554.0,438063.5139463412
4,460093.0,444285.99912602775
5,450935.0,439743.63831874175
6,471421.0,443734.4230123824


Unnamed: 0,test,pred
0,-0.0727656666666666,-0.0051766621132544
1,-0.075387,-0.0031216183211076
2,0.0057109999999999,0.008952509657922
3,-0.1103773333333333,-0.0158871523247581
4,0.0872823333333333,0.0017865356998722
5,0.057994,-0.0028449161600057
6,-0.0598256666666666,-0.0009535623808827


Unnamed: 0,test,pred
0,101.9729,100.22488677617764
1,103.2309,100.97632436017685
2,100.9801,101.04775115235508
3,101.1794,101.07354799866388
4,101.4176,100.94843797345348
5,101.5514,101.01802786888923
6,101.8472,101.09479615135082


Unnamed: 0,test,pred
0,114.7969,113.58359305210246
1,114.46,113.96570873841269
2,114.7893,114.03209610781325
3,114.6911,114.08126879199833
4,114.4141,114.11668889515325
5,114.2963,114.12719139888448
6,115.2718,114.17139902205956


Unnamed: 0,test,pred
0,108.4955,106.80137312298976
1,107.9445,107.16871747951429
2,107.4001,107.15616449296186
3,107.2815,107.12695235107472
4,107.7241,107.07659542512644
5,107.2339,107.00914574279945
6,107.9056,107.02468748241502


Unnamed: 0,test,pred
0,105.2961,104.23735029667486
1,105.2392,104.74988780770006
2,104.3399,104.89906941204042
3,104.462,104.8024484340046
4,105.0892,104.94673295041696
5,104.678,105.12352027742897
6,105.2156,105.04018159113797
