# ARIMAX

## Pre-Processing

In [80]:
# Libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import itertools

import statsmodels.api as sm
import statsmodels.tsa.api as smts
from statsmodels.tsa.seasonal import seasonal_decompose, STL
from statsmodels.tsa.stattools import grangercausalitytests
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

from scipy import stats
from scipy.stats import pearsonr
from typing import Union
from itertools import product
from joblib import Parallel, delayed
from tqdm.notebook import tqdm
from tabulate import tabulate

import warnings
warnings.filterwarnings('ignore')

In [81]:
# Functions

def plot_stl_decomposition(data, period=7, color='#636EFA', title=''):
    # STL Decomposition
    decomposition = STL(data, period=period).fit()
    res_season = decomposition.seasonal.dropna()
    lbvalue, pvalue = acorr_ljungbox(res_season, lags=3)
    print(f'Ljung-Box Test p-value: {pvalue[0]}')

    # Plotting
    fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=4, ncols=1, sharex=True, dpi=300, figsize=(12,4))
    fig.suptitle(title, y=1.02)
    ax1.plot(decomposition.observed, color=color)
    ax1.set_ylabel('Observed')
    ax2.plot(decomposition.trend, color='#00CC96')
    ax2.set_ylabel('Trend')
    ax3.plot(decomposition.seasonal, color='#FECB52')
    ax3.set_ylabel('Seasonal')
    ax4.plot(decomposition.resid, color='#FFA15A')
    ax4.set_ylabel('Residuals')
    plt.show()


def calculate_aic(y_true, y_pred, num_features):
    resid = y_true - y_pred
    sse = np.sum(resid ** 2)
    aic = len(y_true) * np.log(sse / len(y_true)) + 2 * num_features
    return aic


def calculate_bic(y_true, y_pred, num_features):
    resid = y_true - y_pred
    sse = np.sum(resid ** 2)
    n = len(y_true)
    bic = n * np.log(sse / n) + num_features * np.log(n)
    return bic


def tsplot(y, lags=None, figsize=(20, 10), title=None):
    fig = plt.figure(figsize=figsize)

    layout = (2, 2)
    ts_ax = plt.subplot2grid(layout, (0, 0), colspan=2)
    acf_ax = plt.subplot2grid(layout, (1, 0))
    pacf_ax = plt.subplot2grid(layout, (1, 1))

    y.plot(ax=ts_ax)
    p_value = smts.adfuller(y)[1]
    if title is None:
        title = 'Time Series Analysis Plots'
    ts_ax.set_title('{0}\n Dickey-Fuller: p={1:.5f}'.format(title, p_value))
    
    smts.graphics.plot_acf(y, lags=lags, ax=acf_ax)
    smts.graphics.plot_pacf(y, lags=lags, ax=pacf_ax)
    plt.tight_layout()


def forward_step(X_train, y_train, selected_features, remaining_features, criterion_func):
    best_criterion = np.inf
    best_feature = None
    
    for feature in remaining_features:
        temp_features = selected_features + [feature]
        temp_model = sm.OLS(y_train, sm.add_constant(X_train[temp_features])).fit()
        temp_criterion = criterion_func(y_train, temp_model.predict(sm.add_constant(X_train[temp_features])), len(temp_model.params))
        
        if temp_criterion < best_criterion:
            best_criterion = temp_criterion
            best_feature = feature
    
    return best_criterion, best_feature

def backward_step(X_train, y_train, selected_features, criterion_func):
    best_criterion = np.inf
    best_feature = None
    
    for feature in selected_features:
        temp_features = selected_features.copy()
        temp_features.remove(feature)
        temp_model = sm.OLS(y_train, sm.add_constant(X_train[temp_features])).fit()
        temp_criterion = criterion_func(y_train, temp_model.predict(sm.add_constant(X_train[temp_features])), len(temp_model.params))
        
        if temp_criterion < best_criterion:
            best_criterion = temp_criterion
            best_feature = feature
    
    return best_criterion, best_feature

def stepwise_bidirectional_selection(X_train, y_train, method='aic'):
    features = list(X_train.columns)
    selected_features = []
    best_criterion = np.inf
    
    while len(features) > 0:
        if method == 'aic':
            forward_criterion, forward_feature = forward_step(X_train, y_train, selected_features, features, calculate_aic)
            backward_criterion, backward_feature = backward_step(X_train, y_train, selected_features, calculate_aic)
        elif method == 'bic':
            forward_criterion, forward_feature = forward_step(X_train, y_train, selected_features, features, calculate_bic)
            backward_criterion, backward_feature = backward_step(X_train, y_train, selected_features, calculate_bic)
        else:
            raise ValueError("Invalid criterion_func. Use 'aic' or 'bic'.")
        
        if forward_criterion < backward_criterion and forward_criterion < best_criterion:
            selected_features.append(forward_feature)
            best_criterion = forward_criterion
        elif backward_criterion < forward_criterion and backward_criterion < best_criterion:
            selected_features.remove(backward_feature)
            best_criterion = backward_criterion
        else:
            break
    
    return selected_features


def fit_arima_model(order, endog, d):
    try:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            model = SARIMAX(endog, order=(order[0], d, order[1]), simple_differencing=False).fit(disp=False)
            aic = model.aic
            bic = model.bic
            return [(order[0], d, order[1]), round(aic, 2), round(bic, 2)]
    except Exception as e:
        return None

def optimize_ARIMA(endog: Union[pd.Series, list], order_list: list, d: int) -> pd.DataFrame:
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        results = Parallel(n_jobs=-1)(delayed(fit_arima_model)(order, endog, d) for order in tqdm(order_list))
    
    results = [result for result in results if result is not None]

    result_df = pd.DataFrame(results, columns=['(p, d, q)', 'AIC', 'BIC'])
    result_df = result_df.sort_values(by='AIC', ascending=True).reset_index(drop=True)
    order_arima = result_df['(p, d, q)'].iloc[0]    
    
    return result_df, order_arima


def fit_arimax_model(order, endog, exog, d):
    try:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            model = SARIMAX(endog, exog=exog, order=(order[0], d, order[1]), simple_differencing=False).fit(disp=False)
            aic = model.aic
            bic = model.bic
            return [(order[0], d, order[1]), round(aic, 2), round(bic, 2)]
    except Exception as e:
        return None

def optimize_ARIMAX(endog: Union[pd.Series, list], exog: Union[pd.Series, list], order_list: list, d: int) -> pd.DataFrame:
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        results = Parallel(n_jobs=-1)(delayed(fit_arimax_model)(order, endog, exog, d) for order in tqdm(order_list))
    
    results = [result for result in results if result is not None]

    result_df = pd.DataFrame(results, columns=['(p, d, q)', 'AIC', 'BIC'])
    result_df = result_df.sort_values(by='AIC', ascending=True).reset_index(drop=True)
    order_arimax = result_df['(p, d, q)'].iloc[0]    
    
    return result_df, order_arimax


def mape(y_true, y_pred):
    non_zero_indices = np.where(y_true != 0)[0]
    y_true_no_zeros = np.array(y_true)[non_zero_indices]
    y_pred_no_zeros = np.array(y_pred)[non_zero_indices]

    absolute_percentage_errors = np.abs((y_true_no_zeros - y_pred_no_zeros) / y_true_no_zeros)
    mape_value = np.mean(absolute_percentage_errors) * 100
    return mape_value

def mae(y_true, y_pred):
    return mean_absolute_error(y_true, y_pred)

def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

In [82]:
df = pd.read_csv('data_sensors_rovere.csv')
df = df.rename(columns={'group': 'group_id'})

df_rovere = df[['reading_id', 'timestamp', 'sensor_id', 'value', 'description', 'group_id']]

df_rovere['reading_id'] = df_rovere['reading_id'].astype(str)
df_rovere['timestamp'] = pd.to_datetime(df_rovere['timestamp']).dt.floor('D').dt.date
df_rovere['sensor_id'] = df_rovere['sensor_id'].astype(str)
df_rovere['value'] = df_rovere['value'].astype(float)
df_rovere['description'] = df_rovere['description'].astype(str)
df_rovere['group_id'] = df_rovere['group_id'].astype(str)

condition_30 = df_rovere['sensor_id'].isin(['72', '76', '73', '74', '61', '63', '67', '65'])
condition_60 = df_rovere['sensor_id'].isin(['71', '69', '75', '70', '62', '64', '68', '66'])
condition_irrigation = df_rovere['description'] == 'irrigation'

df_rovere.loc[condition_30, 'description'] = 'Tensiometer 30'
df_rovere.loc[condition_60, 'description'] = 'Tensiometer 60'
df_rovere.loc[condition_irrigation, 'description'] = 'Irrigation'

print('Shape:', df_rovere.shape)
print('Types:\n', df_rovere.dtypes)
df_rovere.head(10)

In [83]:
sensor_group = df_rovere[['sensor_id', 'description', 'group_id']].drop_duplicates().sort_values(by='group_id').reset_index(drop=True)
sensor_group

In [84]:
df_group_1 = df_rovere[df_rovere['group_id'] == '1'].reset_index(drop=True)
df_group_1 = df_group_1.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_1.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_1 = df_group_1.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_1.columns.name = None
df_pivot_1.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']

df_pivot_1 = df_pivot_1.dropna().reset_index(drop=True)
df = df_pivot_1


columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)

df

## Exploratory Data Analysis

In [85]:
round(df.describe(),2)

### Univariate Analysis

In [86]:
# Distributions of Tensiometers

fig, axes = plt.subplots(2, 3, figsize=(15, 10))

# Distribution of Tensiometer (Depth=30)

# Boxplot
sns.boxplot(y=df['avg_tens30'], ax=axes[0, 0], color='#636EFA', width=0.5)
axes[0, 0].set_title('Boxplot')
axes[0, 0].set_ylabel('Tensiometer (30)')

# Histogram with KDE
axes[0, 1].hist(df['avg_tens30'], bins=15, color='#636EFA', edgecolor='black', density=True)
sns.kdeplot(df['avg_tens30'], color='#FFA15A', ax=axes[0, 1])
mu, sigma = df['avg_tens30'].mean(), df['avg_tens30'].std()
xmin, xmax = axes[0, 1].set_xlim()
x = np.linspace(xmin, xmax, 100)
p = stats.norm.pdf(x, mu, sigma)
axes[0, 1].plot(x, p, '#00CC96', linewidth=2)

axes[0, 1].set_title('Histogram with KDE')
axes[0, 1].set_xlabel('Tensiometer (30)')
axes[0, 1].set_ylabel('Density')

# Q-Q Plot
stats.probplot(df['avg_tens30'], dist='norm', plot=axes[0, 2])
axes[0, 2].set_title('Gaussian Q-Q Plot')
axes[0, 2].set_xlabel('Theoretical Quantile')
axes[0, 2].set_ylabel('Observed Quantile')


# Distribution of Tensiometer (Depth=60)
sns.boxplot(y=df['avg_tens60'], ax=axes[1, 0], color='#EF553B', width=0.5)
axes[1, 0].set_title('Boxplot')
axes[1, 0].set_ylabel('Tensiometer (60)')

axes[1, 1].hist(df['avg_tens60'], bins=15, color='#EF553B', edgecolor='black', density=True)
sns.kdeplot(df['avg_tens60'], color='#FFA15A', ax=axes[1, 1])
mu, sigma = df['avg_tens60'].mean(), df['avg_tens60'].std()
xmin, xmax = axes[1, 1].set_xlim()
x = np.linspace(xmin, xmax, 100)
p = stats.norm.pdf(x, mu, sigma)
axes[1, 1].plot(x, p, '#00CC96', linewidth=2)

axes[1, 1].set_title('Histogram with KDE')
axes[1, 1].set_xlabel('Tensiometer (60)')
axes[1, 1].set_ylabel('Density')

stats.probplot(df['avg_tens30'], dist='norm', plot=axes[1, 2])
axes[1, 2].set_title('Gaussian Q-Q Plot')
axes[1, 2].set_xlabel('Theoretical Quantile')
axes[1, 2].set_ylabel('Observed Quantile')

plt.suptitle('Tensiometer Distribution at Depths 30 and 60', fontsize=18)
plt.tight_layout(rect=[0, 0, 1, 0.97]) 
plt.show()

In [87]:
# Distributions of the main Covariates

fig, axes = plt.subplots(2, 5, figsize=(20, 8))

variables = ['avg_hum', 'avg_temp', 'avg_solar', 'sum_irr', 'sum_rain']

for i, var in enumerate(variables):
    sns.histplot(df[var], bins=15, color='#00CC96', edgecolor='black', kde=True, ax=axes[0, i])
    sns.boxplot(y=df[var], ax=axes[1, i], color='#00CC96', width=0.5)
    
    axes[0, i].set_title(f'{var}')
    axes[0, i].set_xlabel(var)
    axes[0, i].set_ylabel('Density')
    axes[1, i].set_ylabel(var)

plt.suptitle('Distributions of Covariates', fontsize=18)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

### Bivariate Analysis

In [88]:
# Scatterplots between Target Variables and the main Covariates

selected_columns = df[['avg_tens30', 'avg_tens60', 'avg_hum', 'avg_temp', 'avg_solar', 'sum_irr', 'sum_rain']]

sns.pairplot(selected_columns, kind='reg', height=1.5, aspect=2, plot_kws={'line_kws':{'color':'red'}})
plt.suptitle('Scatterplots', y=1.02, fontsize=18)

plt.show()

In [89]:
# Heatmap between Target Variables and the main Covariates

corr_mat = selected_columns.corr()
corr_mat.sort_values(by='avg_tens30', axis=0, ascending=False, inplace=True)
corr_mat.sort_values(by='avg_tens30', axis=1, ascending=False, inplace=True)

plt.figure(figsize=(8, 6))
sns.heatmap(corr_mat, annot=True, cmap='coolwarm', vmin=-1, vmax=1)

In [90]:
# Heatmap between Target Variables all the possible Covariates

corr_mat_all = round(df.iloc[:, 1:].corr(), 2)
corr_mat_all.sort_values(by='avg_tens30', axis=0, ascending=False, inplace=True)
corr_mat_all.sort_values(by='avg_tens30', axis=1, ascending=False, inplace=True)

plt.figure(figsize=(20, 10))
sns.heatmap(corr_mat_all, annot=True, cmap='coolwarm', vmin=-1, vmax=1)

### Time Series EDA

In [91]:
# Time Series of the Tensiometers

plt.figure(figsize=(15, 5))

plt.plot(df['date'], df['avg_tens30'], linestyle='-', label='Avg Tensiometer (30)', color='#636EFA')
plt.plot(df['date'], df['avg_tens60'], linestyle='-', label='Avg Tensiometer (60)', color='#EF553B')

plt.title('Tensiometer Comparison (30 vs 60)')
plt.ylabel('Value')
plt.legend()

locator = mdates.MonthLocator(bymonthday=1)
formatter = mdates.DateFormatter('%b')

plt.gca().xaxis.set_major_locator(locator)
plt.gca().xaxis.set_major_formatter(formatter)

plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [92]:
# Seasonal and Trend Decomposition using Loess (STL)

## Weekly Decomporition

plot_stl_decomposition(df['avg_tens30'], period=7, title='STL Decomposition - Tensiometer (30)')
plot_stl_decomposition(df['avg_tens60'], period=7, color='#EF553B', title='STL Decomposition - Tensiometer (60)')

In [93]:
## Monthly Decomposition

plot_stl_decomposition(df['avg_tens30'], period=30, title='STL Decomposition - Tensiometer (30)')
plot_stl_decomposition(df['avg_tens60'], period=30, color='#EF553B', title='STL Decomposition - Tensiometer (60)')

In [94]:
# Time Series of the Main Features

fig, axes = plt.subplots(nrows=4, ncols=2, dpi=300, figsize=(12, 6))

colors = ['#636EFA', '#EF553B', '#00CC96', '#00CC96', '#00CC96', '#00CC96', '#00CC96']
variables = ['avg_tens30', 'avg_tens60', 'sum_irr', 'avg_hum', 'sum_rain', 'avg_temp', 'avg_solar']

for i, (variable, ax) in enumerate(zip(variables, axes.flatten())):
    data = df[variable]
    ax.plot(data, color=colors[i], linewidth=1)
    ax.set_title(variable)
    ax.xaxis.set_ticks_position('none')
    ax.yaxis.set_ticks_position('none')
    ax.spines['top'].set_alpha(0)
    ax.tick_params(labelsize=6)

for j in range(i + 1, len(axes.flat)):
    axes.flatten()[j].axis('off')

plt.tight_layout()
plt.show()

## Regression

### Group 1

In [95]:
col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

In [96]:
# Analysis of Causality with Granger Causality Test

## Analysis of Causes on the Tensiometer (30) based on 1 past day

print('Tensiometer (60)')
granger_1 = grangercausalitytests(df[['avg_tens30', 'avg_tens60']], [1])

print('\nHumidity')
granger_2 = grangercausalitytests(df[['avg_tens30', 'avg_hum']], [1])

print('\nTemperature')
granger_3 = grangercausalitytests(df[['avg_tens30', 'avg_temp']], [1])

print('\nSolar Radiation')
granger_4 = grangercausalitytests(df[['avg_tens30', 'avg_solar']], [1])

print('\nIrrigation')
granger_5 = grangercausalitytests(df[['avg_tens30', 'sum_irr']], [1])

print('\nRain')
granger_6 = grangercausalitytests(df[['avg_tens30', 'sum_rain']], [1])

In [97]:
## Analysis of Causes on the Tensiometer (30) based on 3 past days

print('Tensiometer (60)')
granger_1 = grangercausalitytests(df[['avg_tens30', 'avg_tens60']], [3])

print('\nHumidity')
granger_2 = grangercausalitytests(df[['avg_tens30', 'avg_hum']], [3])

print('\nTemperature')
granger_3 = grangercausalitytests(df[['avg_tens30', 'avg_temp']], [3])

print('\nSolar Radiation')
granger_4 = grangercausalitytests(df[['avg_tens30', 'avg_solar']], [3])

print('\nIrrigation')
granger_5 = grangercausalitytests(df[['avg_tens30', 'sum_irr']], [3])

print('\nRain')
granger_6 = grangercausalitytests(df[['avg_tens30', 'sum_rain']], [3])

In [98]:
## Analysis of Causes on the Tensiometer (30) based on 5 past days

print('Tensiometer (60)')
granger_1 = grangercausalitytests(df[['avg_tens30', 'avg_tens60']], [5])

print('\nHumidity')
granger_2 = grangercausalitytests(df[['avg_tens30', 'avg_hum']], [5])

print('\nTemperature')
granger_3 = grangercausalitytests(df[['avg_tens30', 'avg_temp']], [5])

print('\nSolar Radiation')
granger_4 = grangercausalitytests(df[['avg_tens30', 'avg_solar']], [5])

print('\nIrrigation')
granger_5 = grangercausalitytests(df[['avg_tens30', 'sum_irr']], [5])

print('\nRain')
granger_6 = grangercausalitytests(df[['avg_tens30', 'sum_rain']], [5])

In [99]:
## Analysis of Causes on the Tensiometer (60) based on 1 past day

print('Tensiometer (30)')
granger_1 = grangercausalitytests(df[['avg_tens60', 'avg_tens30']], [1])

print('\nHumidity')
granger_2 = grangercausalitytests(df[['avg_tens60', 'avg_hum']], [1])

print('\nTemperature')
granger_3 = grangercausalitytests(df[['avg_tens60', 'avg_temp']], [1])

print('\nSolar Radiation')
granger_4 = grangercausalitytests(df[['avg_tens60', 'avg_solar']], [1])

print('\nIrrigation')
granger_5 = grangercausalitytests(df[['avg_tens60', 'sum_irr']], [1])

print('\nRain')
granger_6 = grangercausalitytests(df[['avg_tens60', 'sum_rain']], [1])

In [100]:
## Analysis of Causes on the Tensiometer (60) based on 3 past days

print('Tensiometer (30)')
granger_1 = grangercausalitytests(df[['avg_tens60', 'avg_tens30']], [3])

print('\nHumidity')
granger_2 = grangercausalitytests(df[['avg_tens60', 'avg_hum']], [3])

print('\nTemperature')
granger_3 = grangercausalitytests(df[['avg_tens60', 'avg_temp']], [3])

print('\nSolar Radiation')
granger_4 = grangercausalitytests(df[['avg_tens60', 'avg_solar']], [3])

print('\nIrrigation')
granger_5 = grangercausalitytests(df[['avg_tens60', 'sum_irr']], [3])

print('\nRain')
granger_6 = grangercausalitytests(df[['avg_tens60', 'sum_rain']], [3])

In [101]:
## Analysis of Causes on the Tensiometer (60) based on 5 past days

print('Tensiometer (30)')
granger_1 = grangercausalitytests(df[['avg_tens60', 'avg_tens30']], [5])

print('\nHumidity')
granger_2 = grangercausalitytests(df[['avg_tens60', 'avg_hum']], [5])

print('\nTemperature')
granger_3 = grangercausalitytests(df[['avg_tens60', 'avg_temp']], [5])

print('\nSolar Radiation')
granger_4 = grangercausalitytests(df[['avg_tens60', 'avg_solar']], [5])

print('\nIrrigation')
granger_5 = grangercausalitytests(df[['avg_tens60', 'sum_irr']], [5])

print('\nRain')
granger_6 = grangercausalitytests(df[['avg_tens60', 'sum_rain']], [5])

#### Sensor 72

In [102]:
# Linear Regression and Feature Selection

y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


# Predictions with Naive Method on the entire dataset

naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [103]:
# Train-Test Split

y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

In [104]:
# Hyperparameters Tuning

## Stationary Verification (choice of the integrated terms d)

tsplot(y_train)

In [105]:
y_train_diff = y_train.diff().dropna()
tsplot(y_train_diff)

In [106]:
## AIC search (choice of the number of the autoregressive terms p and the number of moving average terms q) for ARIMA model

ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df

In [107]:
## AIC search (choice of the number of the autoregressive terms p and the number of moving average terms q) for ARIMAX model

result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)
result_df

In [108]:
# Model Training and Coefficients Analysis

model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax

print(results.summary())

In [109]:
# Model Diagnostics with the Residuals Analysis

results.plot_diagnostics(figsize=(20,10))
plt.show()

In [110]:
# Forecasting on Test Set

## ARIMA Rolling Forecasting

arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


## ARIMAX Rolling Forecasting

arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


df_predictions = pd.DataFrame({'Real': y_test, 'Naive Prediction': naive_pred[-len(y_test):], 'Linear Prediction': linear_pred, 'ARIMA Prediction': arima_pred, 'ARIMAX Prediction': arimax_pred})
df_predictions.head(15)

In [111]:
# Results

mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

In [112]:
# Predicted Time Series Plot

residuals_naive = y_test - naive_pred[-len(y_test):]
residuals_ARIMAX = y_test - arimax_pred

fig, ax = plt.subplots(figsize=(15, 5))

combined_index = np.concatenate([y_train.index, y_test.index])
combined_index_1 = np.concatenate([y_train.index, y_test[:-1].index])

new_indices = np.arange(len(y_train), len(y_train) + len(y_test))
arimax_pred_df = pd.DataFrame(arimax_pred, index=new_indices, columns=['ARIMAX Prediction'])

ax.plot(combined_index, np.concatenate([y_train, y_test]), label='Original Series', color='blue')
ax.plot(y_test, 'b-', label='Actual', color='black')

ax.plot(naive_pred[-len(y_test):], 'r:', label='Naive Method', color='red')
ax.scatter(combined_index[-len(y_test):], residuals_naive, color='orange', label='Residuals Naive', marker='o')

ax.plot(arimax_pred_df.iloc[:-1], 'k--', label='ARIMAX', color='green')
ax.scatter(combined_index[-len(y_test):-1], residuals_ARIMAX[:-1], color='purple', label='Residuals ARIMAX', marker='x')

ax.axhline(y=0, color='gray', linestyle='--', linewidth=1)

ax.set_xlabel('Time')
ax.set_ylabel('Avg Tens 30')
ax.legend()

plt.show()

#### Sensor 71

In [113]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))

print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [114]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

In [115]:
tsplot(y_train)

In [116]:
y_train_diff = y_train.diff().dropna()
tsplot(y_train_diff)

In [117]:
ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df

In [118]:
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)
result_df

In [119]:
model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax

print(results.summary())

In [120]:
results.plot_diagnostics(figsize=(20,10))
plt.show()

In [121]:
arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)



arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


df_predictions = pd.DataFrame({'Real': y_test, 'Naive Prediction': naive_pred[-len(y_test):], 'Linear Prediction': linear_pred, 'ARIMA Prediction': arima_pred, 'ARIMAX Prediction': arimax_pred})
df_predictions.head(15)

In [122]:
mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

In [124]:
residuals_naive = y_test - naive_pred[-len(y_test):]
residuals_ARIMAX = y_test - arimax_pred

fig, ax = plt.subplots(figsize=(15, 5))

combined_index = np.concatenate([y_train.index, y_test.index])
combined_index_1 = np.concatenate([y_train.index, y_test[:-1].index])

new_indices = np.arange(len(y_train), len(y_train) + len(y_test))
arimax_pred_df = pd.DataFrame(arimax_pred, index=new_indices, columns=['ARIMAX Prediction'])

ax.plot(combined_index, np.concatenate([y_train, y_test]), label='Train', color='blue')
ax.plot(y_test, 'b-', label='Test', color='black')

ax.plot(naive_pred[-len(y_test):], 'r:', label='Naive Method', color='red')
ax.scatter(combined_index[-len(y_test):], residuals_naive, color='orange', label='Naive Residuals', marker='o')

ax.plot(arimax_pred_df.iloc[:-1], 'k--', label='ARIMAX', color='green')
ax.scatter(combined_index[-len(y_test):-1], residuals_ARIMAX[:-1], color='purple', label='ARIMAX Residuals', marker='x')

ax.axhline(y=0, color='gray', linestyle='--', linewidth=1)

ax.set_xlabel('Time')
ax.set_ylabel('Avg Tens 60')
ax.legend()

plt.show()

### Group 2

In [45]:
df_group_2 = df_rovere[df_rovere['group_id'] == '2'].reset_index(drop=True)
df_group_2 = df_group_2.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_2.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_2 = df_group_2.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_2.columns.name = None
df_pivot_2.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']
df_pivot_2 = df_pivot_2.dropna().reset_index(drop=True)

df = df_pivot_2
columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 76

In [46]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [47]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 69

In [48]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [49]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 3

In [50]:
df_group_3 = df_rovere[df_rovere['group_id'] == '3'].reset_index(drop=True)
df_group_3 = df_group_3.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_3.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_3 = df_group_3.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_3.columns.name = None
df_pivot_3.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']
df_pivot_3 = df_pivot_3.dropna().reset_index(drop=True)

df = df_pivot_3
columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 73

In [51]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [52]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 75

In [53]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [54]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 4

In [55]:
df_group_4 = df_rovere[df_rovere['group_id'] == '4'].reset_index(drop=True)
df_group_4 = df_group_4.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_4.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_4 = df_group_4.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_4.columns.name = None
df_pivot_4.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']
df_pivot_4 = df_pivot_4.dropna().reset_index(drop=True)

df = df_pivot_4
columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)

col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 74

In [56]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [57]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 70

In [58]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [59]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 5

In [60]:
df_group_5 = df_rovere[df_rovere['group_id'] == '5'].reset_index(drop=True)
df_group_5 = df_group_5.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_5.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_5 = df_group_5.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_5.columns.name = None
df_pivot_5.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']
df_pivot_5 = df_pivot_5.dropna().reset_index(drop=True)

df = df_pivot_5
columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens60']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 61

In [61]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [62]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 62

In [63]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [64]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 6

In [65]:
df_group_6 = df_rovere[df_rovere['group_id'] == '6'].reset_index(drop=True)
df_group_6 = df_group_6.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_6.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_6 = df_group_6.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_6.columns.name = None
df_pivot_6.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']
df_pivot_6 = df_pivot_6.dropna().reset_index(drop=True)

df = df_pivot_6
columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens60']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 63

In [66]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [67]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 64

In [68]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [69]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 7

In [70]:
df_group_7 = df_rovere[df_rovere['group_id'] == '7'].reset_index(drop=True)
df_group_7 = df_group_7.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_7.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_7 = df_group_7.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_7.columns.name = None
df_pivot_7.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']
df_pivot_7 = df_pivot_7.dropna().reset_index(drop=True)

df = df_pivot_7
columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 67

In [71]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [72]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 68

In [73]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [74]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 8

In [75]:
df_group_8 = df_rovere[df_rovere['group_id'] == '8'].reset_index(drop=True)
df_group_8 = df_group_8.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_8.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_8 = df_group_8.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_8.columns.name = None
df_pivot_8.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']
df_pivot_8 = df_pivot_8.dropna().reset_index(drop=True)

df = df_pivot_8
columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 65

In [76]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [77]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 66

In [78]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])
all_features_model = sm.OLS(y_train, sm.add_constant(X_train)).fit()
all_features_predictions = all_features_model.predict(sm.add_constant(X_train))
all_features_predictions_test = all_features_model.predict(sm.add_constant(X_test))

selected_features = stepwise_bidirectional_selection(X_train, y_train, method='aic')
final_model = sm.OLS(y_train, sm.add_constant(X_train[selected_features])).fit()

train_linear_pred = final_model.predict(sm.add_constant(X_train[selected_features]))
linear_pred = final_model.predict(sm.add_constant(X_test[selected_features]))


print(f'Number of Selected Features: {len(selected_features)}')
print("Selected Features:")
for feature in selected_features:
    print(f" - {feature}")

In [79]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))
X = X[selected_features]

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))

result_df, order_arima = optimize_ARIMA(y_train, order_list, d)
result_df, order_arimax = optimize_ARIMAX(y_train, X_train[selected_features], order_list, d)


model_arima = SARIMAX(endog=y_train, order=order_arima)
results_arima = model_arima.fit(disp=False)

res_train_arima = results_arima.resid
train_arima_pred = y_train - res_train_arima


model = SARIMAX(endog=y_train, exog=X_train[selected_features], order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arima_pred = []
y_new_train = y_train.copy()

for i in range(len(y_test)):
    
    forecast = results_arima.get_prediction(start=len(y_new_train), end=len(y_new_train))
    forecast_value = forecast.predicted_mean.values[0]
    arima_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    
    model = SARIMAX(endog=y_new_train, order=order_arima)
    results_arima = model.fit(disp=False)


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train.copy()

for i in range(len(X_test[selected_features])):
    
    next_exog = X_test.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear_all = round(mape(y_train, all_features_predictions), 3)
mae_train_linear_all = round(mae(y_train, all_features_predictions), 3)
rmse_train_linear_all = round(rmse(y_train, all_features_predictions), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arima = round(mape(y_train, train_arima_pred), 3)
mae_train_arima = round(mae(y_train, train_arima_pred), 3)
rmse_train_arima = round(rmse(y_train, train_arima_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear_all = round(mape(y_test, all_features_predictions_test), 3)
mae_linear_all = round(mae(y_test, all_features_predictions_test), 3)
rmse_linear_all = round(rmse(y_test, all_features_predictions_test), 3)

mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_arima = round(mape(y_test, arima_pred), 3)
mae_arima = round(mae(y_test, arima_pred), 3)
rmse_arima = round(rmse(y_test, arima_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train ALL', 'LM Train', 'ARIMA Train', 'ARIMAX Train', 'LM Test ALL', 'LM Test', 'ARIMA Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear_all, mape_train_linear, mape_train_arima, mape_train_arimax, mape_linear_all, mape_linear, mape_arima, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear_all, mae_train_linear, mae_train_arima, mae_train_arimax, mae_linear_all, mae_linear, mae_arima, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear_all, rmse_train_linear, rmse_train_arima, rmse_train_arimax, rmse_linear_all, rmse_linear, rmse_arima, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))