In [None]:
from timeseries_split import load_time_series

mat_path = 'Xtrain.mat'

series = load_time_series(mat_path)

print(series[:10])

[ 86 141  95  41  22  21  32  72 138 111]


In [None]:
from timeseries_split import create_lagged_features

n_lags = 10
df = create_lagged_features(series, n_lags, dropna=True)
print(df)


       y  lag_1  lag_2  lag_3  lag_4  lag_5  lag_6  lag_7  lag_8  lag_9  \
0     86    NaN    NaN    NaN    NaN    NaN    NaN    NaN    NaN    NaN   
1    141   86.0    NaN    NaN    NaN    NaN    NaN    NaN    NaN    NaN   
2     95  141.0   86.0    NaN    NaN    NaN    NaN    NaN    NaN    NaN   
3     41   95.0  141.0   86.0    NaN    NaN    NaN    NaN    NaN    NaN   
4     22   41.0   95.0  141.0   86.0    NaN    NaN    NaN    NaN    NaN   
..   ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   
995   61  166.0  136.0   45.0   18.0   12.0   15.0   33.0  103.0  179.0   
996   20   61.0  166.0  136.0   45.0   18.0   12.0   15.0   33.0  103.0   
997   12   20.0   61.0  166.0  136.0   45.0   18.0   12.0   15.0   33.0   
998   13   12.0   20.0   61.0  166.0  136.0   45.0   18.0   12.0   15.0   
999   23   13.0   12.0   20.0   61.0  166.0  136.0   45.0   18.0   12.0   

     lag_10  
0       NaN  
1       NaN  
2       NaN  
3       NaN  
4       NaN  
..      ...  
9

In [45]:
from timeseries_split import train_test_split_lagged
from timeseries_split import plot_lagged_chunks

train_size = 0.8
total_size = 200
stride = 200

splits = train_test_split_lagged(df, train_size, total_size=total_size, stride=stride)
train_df, val_df = splits[0]

plot_lagged_chunks(series, splits, n_lags=n_lags, stride = stride)


In [46]:
X_train = train_df.drop(columns='y')
y_train = train_df['y']
X_val = val_df.drop(columns='y')
y_val = val_df['y']

X_train = X_train.dropna()
y_train = y_train.loc[X_train.index]

X_val = X_val.dropna()
y_val = y_val.loc[X_val.index]

from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_val)


In [47]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

mse = mean_squared_error(y_val, y_pred)
mae = mean_absolute_error(y_val, y_pred)

print(f"MSE: {mse:.5f}")
print(f"MAE: {mae:.5f}")



MSE: 10598.83191
MAE: 68.16285


In [48]:
import plotly.graph_objects as go

fig = go.Figure()

# Echte waardes
fig.add_trace(go.Scatter(y=y_val.values, mode='lines', name='True', line=dict(color='blue')))

# Voorspellingen
fig.add_trace(go.Scatter(y=y_pred, mode='lines', name='Predicted', line=dict(color='red')))

fig.update_layout(
    title='True vs Predicted Values',
    xaxis_title='Index',
    yaxis_title='Value',
    height=400
)

fig.show()


In [None]:
#trainen en predicten op alle cross validation test data (telkens laatste 40 van 200 datapunten)

all_y_true = []
all_y_pred = []

for train_df, val_df in splits:
    X_train = train_df.drop(columns='y')
    y_train = train_df['y']
    X_val = val_df.drop(columns='y')
    y_val = val_df['y']

    X_train = X_train.dropna()
    y_train = y_train.loc[X_train.index]

    X_val = X_val.dropna()
    y_val = y_val.loc[X_val.index]

    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)

    all_y_true.extend(y_val.values)
    all_y_pred.extend(y_pred)


In [50]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(y=all_y_true, mode='lines', name='True', line=dict(color='blue')))
fig.add_trace(go.Scatter(y=all_y_pred, mode='lines', name='Predicted', line=dict(color='red')))
fig.update_layout(title='True vs Predicted (All Splits)', xaxis_title='Index', yaxis_title='Value')
fig.show()


In [51]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

mse = mean_squared_error(y_val, y_pred)
mae = mean_absolute_error(y_val, y_pred)

print(f"MSE: {mse:.5f}")
print(f"MAE: {mae:.5f}")

MSE: 129.98632
MAE: 8.17674


In [None]:
#N parameter (aantal lags) tunen

for n_lags in [5, 8, 10, 20, 50]:
    df = create_lagged_features(series, n_lags, dropna=True)
    splits = train_test_split_lagged(df, train_size=0.8, total_size=200, stride=200)

    maes = []
    for train_df, val_df in splits:
        X_train = train_df.drop(columns='y')
        y_train = train_df['y']
        X_val = val_df.drop(columns='y')
        y_val = val_df['y']

        X_train = X_train.dropna()
        y_train = y_train.loc[X_train.index]
        X_val = X_val.dropna()
        y_val = y_val.loc[X_val.index]

        model = LinearRegression()
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)

        mae = mean_absolute_error(y_val, y_pred)
        maes.append(mae)

    avg_mae = sum(maes) / len(maes)
    print(f"n_lags = {n_lags}: Gemiddelde MAE = {avg_mae:.5f}")


n_lags = 5: Gemiddelde MAE = 19.12143
n_lags = 8: Gemiddelde MAE = 16.89769
n_lags = 10: Gemiddelde MAE = 20.73532
n_lags = 20: Gemiddelde MAE = 20.82275
n_lags = 50: Gemiddelde MAE = 28.07108


In [None]:
#trainen en predicten op alle data

df = create_lagged_features(series, 8, dropna=True)
df = df.dropna().reset_index(drop=True)

X = df.drop(columns='y')
y = df['y']

from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)

y_pred_all = model.predict(X)


In [93]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(y=y.values, mode='lines', name='True', line=dict(color='blue')))
fig.add_trace(go.Scatter(y=y_pred_all, mode='lines', name='Predicted', line=dict(color='red')))
fig.update_layout(title='True vs Predicted on All Train Data', xaxis_title='Index', yaxis_title='Value')
fig.show()


In [94]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

mse = mean_squared_error(y, y_pred_all)
mae = mean_absolute_error(y, y_pred_all)

print(f"MSE: {mse:.5f}")
print(f"MAE: {mae:.5f}")


MSE: 412.15244
MAE: 12.13247
