In [2]:
import numpy as np 
import pandas as pd
import plotly.express as px
import plotly.colors as pcolors
from plotly.subplots import make_subplots


# Long-Term Forecasting

In [6]:
data_path = "C:/Users/cleme/ETH/Master/Thesis/data/longterm/long_term_forecast"

ett_path = data_path + "/ETT-small/ETT-small"
etth1 = pd.read_csv(ett_path + "/ETTh1.csv")
etth2 = pd.read_csv(ett_path + "/ETTh2.csv")
ettm1 = pd.read_csv(ett_path + "/ETTm1.csv")
ettm2 = pd.read_csv(ett_path + "/ETTm2.csv")

electricity_path = data_path + "/electricity/electricity/"
elec_df = pd.read_csv(electricity_path + "electricity.csv")

weather_path = data_path + "/weather/weather"
weather_df = pd.read_csv(weather_path + "/weather.csv")

illness_path = data_path + "/illness/illness"
illness_df = pd.read_csv(illness_path + "/national_illness.csv")

traffic_path = data_path + "/traffic/traffic"
traffic_df = pd.read_csv(traffic_path + "/traffic.csv")

exchange_path = data_path + "/exchange_rate/exchange_rate"
exchange_df = pd.read_csv(exchange_path + "/exchange_rate.csv")
exchange_df["date"] = pd.to_datetime(exchange_df["date"])
exchange_df["date"] = exchange_df["date"].dt.strftime("%Y-%m-%d %H:%M:%S")

names = ["ETTh1", "ETTh2", "ETTm1", "ETTm2", "Electricity", "Weather", "Illness", "Traffic", "Exchange Rate"]
data = [etth1, etth2, ettm1, ettm2, elec_df, weather_df, illness_df, traffic_df, exchange_df]
colors = pcolors.qualitative.Plotly + pcolors.qualitative.Dark24



# number of timeseries 
for name, series in zip(names, data):
    print(f"{name} has {len(series.columns) - 1} series | total size {len(series)}")

# Create subplots grid
fig = make_subplots(rows=3, cols=3, subplot_titles=names)
for i, df in enumerate(data):
    # Use px.line to create a figure for each DataFrame
    df = df.iloc[:1000, :]
    temp_fig = px.line(df, x="date", y=df.columns[1], title=f"Series {i+1}")
    row = i // 3  + 1
    col = i % 3 + 1
    # Add each trace from px.line to the subplot
    for trace in temp_fig.data:
        trace.line.color = colors[i]  
        fig.add_trace(trace, row=row, col=col)

# fig.update_xaxes(
#     tickformat="%b\n%Y",
#     ticklabelmode="period",
#     row = len(data), 
#       col = 1
# )
# 
fig.update_layout( height=150 * len(data),
                  title={
                    "text": "Long-term Forecasting SOTA Benchmarks Visualization",
                    "x": 0.5,  # Center horizontally (0 = left, 1 = right)
                    "xanchor": "center",
                    "yanchor": "top"
                },
                    title_font=dict(
                        size=20,        # Font size
                        family="Arial", # Font family
                        color="black",  # Font color
                    ))
fig.show()

fig.write_image("sota_benchmarks_viz.pdf")


ETTh1 has 7 series | total size 17420
ETTh2 has 7 series | total size 17420
ETTm1 has 7 series | total size 69680
ETTm2 has 7 series | total size 69680
Electricity has 321 series | total size 26304
Weather has 21 series | total size 52696
Illness has 7 series | total size 966
Traffic has 862 series | total size 17544
Exchange Rate has 8 series | total size 7588


# Non-Stationary Tests

In [18]:
from statsmodels.tsa.stattools import kpss, adfuller
import warnings
warnings.filterwarnings("ignore")
for df, name in zip(data, names):
    if name in ["Electricity", "Traffic"]:
        continue
    print(f"Processing {name}")
    array = df.iloc[:, 1:].to_numpy()
    T, C = array.shape
    fullers = [] 
    kpps_cs = [] 
    kpps_cts = []
    for j in range(C):
        fuller = adfuller(array[:, j])
        kpps_c = kpss(array[:, j], regression="c")
        kpps_ct = kpss(array[:, j ], regression="ct")
        fullers.append([fuller[0], fuller[1]])
        kpps_cs.append([kpps_c[0], kpps_c[1]])
        kpps_cts.append([kpps_ct[0], kpps_ct[1]])
    fullers = np.stack(fullers, axis=0)
    kpps_cs = np.stack(kpps_cs, axis=0)
    kpps_cts= np.stack(kpps_cts, axis=0)

    for test_name, stats_array in zip(["ADF", "KPSS-C", "KPSS-CT"], [fullers, kpps_cs, kpps_cts]):
        mean_stats = np.mean(stats_array, axis=0)
        std_stats = np.std(stats_array, axis=0)

        print(f"{test_name} Test Statistic Mean: {mean_stats[0]:.3f} | Std Dev: {std_stats[0]:.3f}")
        print(stats_array[:, 0])
        print(f"{test_name} P Value Mean: {mean_stats[1]:.3f} | Std Dev: {std_stats[1]:.3f}")
        print(stats_array[:, 1])

    print("-" * 50) # Separator for readability

Processing ETTh1
ADF Test Statistic Mean: -5.909 | Std Dev: 1.810
[-8.55051715 -5.16905527 -8.62120886 -4.9640706  -5.7968952  -4.77268565
 -3.48796358]
ADF P Value Mean: 0.001 | Std Dev: 0.003
[9.25027595e-14 1.01653353e-05 6.09807931e-14 2.61287542e-05
 4.72896365e-07 6.13212315e-05 8.30164949e-03]
KPSS-C Test Statistic Mean: 4.589 | Std Dev: 3.371
[6.47813213 2.22655054 9.06332222 1.7189936  2.01470753 1.1559192
 9.46205851]
KPSS-C P Value Mean: 0.010 | Std Dev: 0.000
[0.01 0.01 0.01 0.01 0.01 0.01 0.01]
KPSS-CT Test Statistic Mean: 1.603 | Std Dev: 0.756
[2.45610304 2.11413332 2.41099925 1.72063672 0.45108732 0.60690209
 1.45875313]
KPSS-CT P Value Mean: 0.010 | Std Dev: 0.000
[0.01 0.01 0.01 0.01 0.01 0.01 0.01]
--------------------------------------------------
Processing ETTh2
ADF Test Statistic Mean: -4.136 | Std Dev: 1.184
[-6.52635605 -4.5542415  -4.04464051 -4.4529977  -2.4355311  -3.34076494
 -3.59713187]
ADF P Value Mean: 0.022 | Std Dev: 0.045
[1.01233217e-08 1.56720089e-

## ETT

In [None]:
ett_path = data_path + "/ETT-small/ETT-small"

### ETTh1

In [None]:
etth1 = pd.read_csv(ett_path + "/ETTh1.csv")
columns = etth1.columns[1:]
for column in columns:
    etth1[["date", column]].plot()
    break



### ETTh2

In [None]:
etth2 = pd.read_csv(ett_path + "/ETTh2.csv")
columns = etth2.columns[1:]
for column in columns:
    etth2[["date", column]].plot()


### ETTm1

In [None]:
ettm1 = pd.read_csv(ett_path + "/ETTm1.csv")
columns = ettm1.columns[1:]
for column in columns:
    etth1[["date", column]].plot()


### ETTm2

In [None]:
ettm2 = pd.read_csv(ett_path + "/ETTm2.csv")
columns = ettm2.columns[1:]
for column in columns:
    ettm2[["date", column]].plot()


## Electricity

In [None]:
electricity_path = data_path + "/electricity/electricity/"
elec_df = pd.read_csv(electricity_path + "electricity.csv")
n_series = 10
for i in range(n_series):
    elec_df[["date", str(i)]].plot()

In [None]:
fig = px.line(elec_df, x="date", y="0", title='Electricity Channel 0')
fig.show()

## Weather

In [None]:
weather_path = data_path + "/weather/weather"
weather_df = pd.read_csv(weather_path + "/weather.csv")
print(weather_df.columns)
for column in weather_df.columns[1:11]:
    fig = px.line(weather_df, x="date", y=column, title=f'Weather Column {column}')
    fig.show()

## Illness

In [None]:
illness_path = data_path + "/illness/illness"
illness_df = pd.read_csv(illness_path + "/national_illness.csv")
print(illness_df.columns)
fig = px.line(illness_df, x="date", y='%UNWEIGHTED ILI', title='ILI Dataset')
fig.show()

## Traffic

In [None]:
traffic_path = data_path + "/traffic/traffic"
traffic_df = pd.read_csv(traffic_path + "/traffic.csv")

fig = px.line(traffic_df, x="date", y='0', title='Traffic Channel 0')
fig.show()

## Exchange Rate

In [None]:
exchange_path = data_path + "/exchange_rate/exchange_rate"
exchange_df = pd.read_csv(exchange_path + "/exchange_rate.csv")
fig = px.line(exchange_df, x="date", y='0', title='Exchange Channel 0')
fig.show()

# Short-Term Forecasting

In [1]:
m4_path = "C:/Users/cleme/ETH/Master/Thesis/data/shortterm/short_term_forecast/m4/m4"

### Hourly

In [3]:
hourly_df = pd.read_csv(m4_path + "/Hourly-test.csv")
fig = px.line(hourly_df, x="V1", y='V2', title='M4 Hourly Test V2')
fig.show()

### Daily

In [4]:
daily_df = pd.read_csv(m4_path + "/Daily-test.csv")
fig = px.line(daily_df, x="V1", y='V2', title='M4 Daily Test V2')
fig.show()

### Monthly

In [5]:
monthly_df = pd.read_csv(m4_path + "/Monthly-test.csv")
fig = px.line(monthly_df, x="V1", y='V2', title='M4 Monthly Test V2')
fig.show()

### Quarterly

In [None]:
quarterly_df = pd.read_csv(m4_path + "/Quarterly-test.csv")
fig = px.line(quarterly_df, x="V1", y='V2', title='M4 Quarterly Test V2')
fig.show()

### Yearly

In [None]:
yearly_df = pd.read_csv(m4_path + "/Yearly-test.csv")
fig = px.line(yearly_df, x="V1", y='V2', title='M4 Yearly Test V2')
fig.show()