In [1]:
# Libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import statsmodels.api as sm
import statsmodels.tsa.api as smts
from statsmodels.tsa.statespace.sarimax import SARIMAX

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

from typing import Union
from itertools import product
from joblib import Parallel, delayed
from tqdm.notebook import tqdm
from tabulate import tabulate

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Functions

def fit_arimax_model(order, endog, exog, d):
    try:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            model = SARIMAX(endog, exog=exog, order=(order[0], d, order[1]), simple_differencing=False).fit(disp=False)
            aic = model.aic
            bic = model.bic
            return [(order[0], d, order[1]), round(aic, 2), round(bic, 2)]
    except Exception as e:
        return None

def optimize_ARIMAX(endog: Union[pd.Series, list], exog: Union[pd.Series, list], order_list: list, d: int) -> pd.DataFrame:
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        results = Parallel(n_jobs=-1)(delayed(fit_arimax_model)(order, endog, exog, d) for order in tqdm(order_list))
    
    results = [result for result in results if result is not None]

    result_df = pd.DataFrame(results, columns=['(p, d, q)', 'AIC', 'BIC'])
    result_df = result_df.sort_values(by='AIC', ascending=True).reset_index(drop=True)
    order_arimax = result_df['(p, d, q)'].iloc[0]    
    
    return result_df, order_arimax


def mape(y_true, y_pred):
    non_zero_indices = np.where(y_true != 0)[0]
    y_true_no_zeros = np.array(y_true)[non_zero_indices]
    y_pred_no_zeros = np.array(y_pred)[non_zero_indices]
    abs_perc_err = np.abs((y_true_no_zeros - y_pred_no_zeros) / y_true_no_zeros)
    mape_value = np.mean(abs_perc_err) * 100
    return mape_value

def mae(y_true, y_pred):
    return mean_absolute_error(y_true, y_pred)

def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))


def optimize_ncomp_pls(X, y, n_comp_range, ylabel, objective):
    errors = []
    xticks = np.arange(1, n_comp_range + 1)

    for n_comp in xticks:
        pls = PLSRegression(n_components=n_comp)
        y_cv = cross_val_predict(pls, X, y, cv=10)
        error = rmse(y, y_cv)
        errors.append(error)

    with plt.style.context('ggplot'):
        plt.plot(xticks, np.array(errors), 'o-', color='blue', mfc='blue')
        if objective == 'min':
            idx = np.argmin(errors)
        else:
            idx = np.argmax(errors)
        plt.plot(xticks[idx], np.array(errors)[idx], 'P', ms=10, mfc='red')

        plt.xlabel('Number of PLS components')
        plt.xticks = xticks
        plt.ylabel(ylabel)
        plt.title('PLS')
        plt.grid(True, linestyle='dashed', color='gray')
        plt.gca().set_facecolor('white')

    plt.show()

In [3]:
df = pd.read_csv('data_sensors_rovere.csv')
df = df.rename(columns={'group': 'group_id'})

df_rovere = df[['reading_id', 'timestamp', 'sensor_id', 'value', 'description', 'group_id']]

df_rovere['reading_id'] = df_rovere['reading_id'].astype(str)
df_rovere['timestamp'] = pd.to_datetime(df_rovere['timestamp']).dt.floor('D').dt.date
df_rovere['sensor_id'] = df_rovere['sensor_id'].astype(str)
df_rovere['value'] = df_rovere['value'].astype(float)
df_rovere['description'] = df_rovere['description'].astype(str)
df_rovere['group_id'] = df_rovere['group_id'].astype(str)

condition_30 = df_rovere['sensor_id'].isin(['72', '76', '73', '74', '61', '63', '67', '65'])
condition_60 = df_rovere['sensor_id'].isin(['71', '69', '75', '70', '62', '64', '68', '66'])
condition_irrigation = df_rovere['description'] == 'irrigation'

df_rovere.loc[condition_30, 'description'] = 'Tensiometer 30'
df_rovere.loc[condition_60, 'description'] = 'Tensiometer 60'
df_rovere.loc[condition_irrigation, 'description'] = 'Irrigation'

### Group 1

In [4]:
df_group_1 = df_rovere[df_rovere['group_id'] == '1'].reset_index(drop=True)
df_group_1 = df_group_1.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_1.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_1 = df_group_1.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_1.columns.name = None
df_pivot_1.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']

df_pivot_1 = df_pivot_1.dropna().reset_index(drop=True)
df = df_pivot_1


columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 72

In [5]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

In [6]:
optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [7]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=2)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))

In [8]:
ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

In [9]:
residuals_naive = y_test - naive_pred[-len(y_test):]
residuals_ARIMAX = y_test - arimax_pred

fig, ax = plt.subplots(figsize=(15, 5))

combined_index = np.concatenate([y_train.index, y_test.index])
combined_index_1 = np.concatenate([y_train.index, y_test[:-1].index])

new_indices = np.arange(len(y_train), len(y_train) + len(y_test))
arimax_pred_df = pd.DataFrame(arimax_pred, index=new_indices, columns=['ARIMAX Prediction'])

ax.plot(combined_index, np.concatenate([y_train, y_test]), label='Original Series', color='blue')
ax.plot(y_test, 'b-', label='Actual', color='black')

ax.plot(naive_pred[-len(y_test):], 'r:', label='Naive Method', color='red')
ax.scatter(combined_index[-len(y_test):], residuals_naive, color='orange', label='Residuals Naive', marker='o')

ax.plot(arimax_pred_df.iloc[:-1], 'k--', label='ARIMAX', color='green')
ax.scatter(combined_index[-len(y_test):-1], residuals_ARIMAX[:-1], color='purple', label='Residuals ARIMAX', marker='x')

ax.axhline(y=0, color='gray', linestyle='--', linewidth=1)

ax.set_xlabel('Time')
ax.set_ylabel('Avg Tens 30')
ax.legend()

plt.show()

#### Sensor 71

In [10]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

In [11]:
optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [12]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=6)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))

In [13]:
ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

In [14]:
residuals_naive = y_test - naive_pred[-len(y_test):]
residuals_ARIMAX = y_test - arimax_pred

fig, ax = plt.subplots(figsize=(15, 5))

combined_index = np.concatenate([y_train.index, y_test.index])
combined_index_1 = np.concatenate([y_train.index, y_test[:-1].index])

new_indices = np.arange(len(y_train), len(y_train) + len(y_test))
arimax_pred_df = pd.DataFrame(arimax_pred, index=new_indices, columns=['ARIMAX Prediction'])

ax.plot(combined_index, np.concatenate([y_train, y_test]), label='Original Series', color='blue')
ax.plot(y_test, 'b-', label='Actual', color='black')

ax.plot(naive_pred[-len(y_test):], 'r:', label='Last Method', color='red')
ax.scatter(combined_index[-len(y_test):], residuals_naive, color='orange', label='Residuals Naive', marker='o')

ax.plot(arimax_pred_df.iloc[:-1], 'k--', label='ARIMAX', color='green')
ax.scatter(combined_index[-len(y_test):-1], residuals_ARIMAX[:-1], color='purple', label='Residuals ARIMAX', marker='x')

ax.axhline(y=0, color='gray', linestyle='--', linewidth=1)

ax.set_xlabel('Time')
ax.set_ylabel('Avg Tens 60')
ax.legend()

plt.show()

### Group 2

In [15]:
df_group_2 = df_rovere[df_rovere['group_id'] == '2'].reset_index(drop=True)
df_group_2 = df_group_2.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_2.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_2 = df_group_2.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_2.columns.name = None
df_pivot_2.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']

df_pivot_2 = df_pivot_2.dropna().reset_index(drop=True)
df = df_pivot_2


columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 76

In [16]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [17]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=1)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 69

In [18]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [19]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=1)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 3

In [20]:
df_group_3 = df_rovere[df_rovere['group_id'] == '3'].reset_index(drop=True)
df_group_3 = df_group_3.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_3.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_3 = df_group_3.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_3.columns.name = None
df_pivot_3.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']

df_pivot_3 = df_pivot_3.dropna().reset_index(drop=True)
df = df_pivot_3


columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 73

In [21]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [22]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=2)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 75

In [23]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [24]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=2)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 4

In [25]:
df_group_4 = df_rovere[df_rovere['group_id'] == '4'].reset_index(drop=True)
df_group_4 = df_group_4.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_4.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_4 = df_group_4.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_4.columns.name = None
df_pivot_4.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']

df_pivot_4 = df_pivot_4.dropna().reset_index(drop=True)
df = df_pivot_4


columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 74

In [26]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [27]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=14)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10', 'PC11', 'PC12', 'PC13', 'PC14'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10', 'PC11', 'PC12', 'PC13', 'PC14'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 70

In [28]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [29]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=5)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 5

In [30]:
df_group_5 = df_rovere[df_rovere['group_id'] == '5'].reset_index(drop=True)
df_group_5 = df_group_5.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_5.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_5 = df_group_5.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_5.columns.name = None
df_pivot_5.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']
df_pivot_5 = df_pivot_5.dropna().reset_index(drop=True)

df = df_pivot_5
columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens60']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 61

In [31]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [32]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=2)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 62

In [33]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [34]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=2)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 6

In [35]:
df_group_6 = df_rovere[df_rovere['group_id'] == '6'].reset_index(drop=True)
df_group_6 = df_group_6.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_6.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_6 = df_group_6.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_6.columns.name = None
df_pivot_6.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']
df_pivot_6 = df_pivot_6.dropna().reset_index(drop=True)

df = df_pivot_6
columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens60']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 63

In [36]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [37]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=15)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10', 'PC11', 'PC12', 'PC13', 'PC14', 'PC15'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10', 'PC11', 'PC12', 'PC13', 'PC14', 'PC15'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 64

In [38]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [39]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=11)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10', 'PC11'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10', 'PC11'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 7

In [40]:
df_group_7 = df_rovere[df_rovere['group_id'] == '7'].reset_index(drop=True)
df_group_7 = df_group_7.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_7.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_7 = df_group_7.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_7.columns.name = None
df_pivot_7.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']
df_pivot_7 = df_pivot_7.dropna().reset_index(drop=True)

df = df_pivot_7
columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 67

In [41]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [42]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=13)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10', 'PC11', 'PC12', 'PC13'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9', 'PC10', 'PC11', 'PC12', 'PC13'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 68

In [43]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [44]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=1)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

### Group 8

In [45]:
df_group_8 = df_rovere[df_rovere['group_id'] == '8'].reset_index(drop=True)
df_group_8 = df_group_8.groupby(['timestamp', 'description']).agg({'value': ['min', 'max', 'mean', 'median', 'sum']}).reset_index()
df_group_8.columns = ['timestamp', 'description', 'val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']

df_pivot_8 = df_group_8.pivot(index='timestamp', columns='description', values=['val_min', 'val_max', 'val_avg', 'val_med', 'val_sum']).reset_index()
df_pivot_8.columns.name = None
df_pivot_8.columns = ['date', 'min_hum', 'min_temp', 'min_solar', 'min_irr', 'min_rain', 'min_tens30', 'min_tens60',
                      'max_hum', 'max_temp', 'max_solar', 'max_irr', 'max_rain', 'max_tens30', 'max_tens60',
                      'avg_hum', 'avg_temp', 'avg_solar', 'avg_irr', 'avg_rain', 'avg_tens30', 'avg_tens60',
                      'med_hum', 'med_temp', 'med_solar', 'med_irr', 'med_rain', 'med_tens30', 'med_tens60',
                      'sum_hum', 'sum_temp', 'sum_solar', 'sum_irr', 'sum_rain', 'sum_tens30', 'sum_tens60']
df_pivot_8 = df_pivot_8.dropna().reset_index(drop=True)

df = df_pivot_8
columns_to_drop = ['min_irr', 'max_irr', 'avg_irr', 'med_irr', 'min_rain', 'sum_hum', 'sum_temp', 'sum_solar', 'avg_rain']
df = df.drop(columns=columns_to_drop).dropna().reset_index(drop=True)


col_drop_30 = ['min_tens60', 'max_tens60', 'avg_tens60', 'med_tens60', 'sum_tens60']
df_30 = df.drop(columns=col_drop_30).dropna().reset_index(drop=True)

col_drop_60 = ['min_tens30', 'max_tens30', 'avg_tens30', 'med_tens30', 'sum_tens30']
df_60 = df.drop(columns=col_drop_60).dropna().reset_index(drop=True)

#### Sensor 65

In [46]:
y = df_30['avg_tens30']
X = df_30.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens30_lag1', 'avg_tens30_lag2', 'avg_tens30_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [47]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=2)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))

#### Sensor 66

In [48]:
y = df_60['avg_tens60']
X = df_60.drop(['date'], axis=1)

X = X.shift(1).add_suffix('_lag1').join(X.shift(2).add_suffix('_lag2')).join(X.shift(3).add_suffix('_lag3'))

columns_to_drop = ['avg_tens60_lag1', 'avg_tens60_lag2', 'avg_tens60_lag3']
X = X.drop(columns=columns_to_drop, errors='ignore')

y = y.iloc[3:].reset_index(drop=True)
X = X.iloc[3:].reset_index(drop=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.29878, shuffle=False)

optimize_ncomp_pls(X_train, y_train, 30, 'RMSE', 'min')

In [49]:
naive_pred = pd.concat([y_train.shift(1).fillna(y_train.iloc[-1])[1:], y_test.shift(1).fillna(y_train.iloc[-1])])

pls_model = PLSRegression(n_components=8)
pls_model.fit(X_train, y_train)

X_train_pls = pls_model.transform(X_train)
X_test_pls = pls_model.transform(X_test)
X_train_pls = pd.DataFrame(X_train_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8'])
X_test_pls = pd.DataFrame(X_test_pls, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8'])

linear_reg_model = sm.OLS(y_train, sm.add_constant(X_train_pls)).fit()

train_linear_pred = linear_reg_model.predict(sm.add_constant(X_train_pls))
linear_pred = linear_reg_model.predict(sm.add_constant(X_test_pls))


ps = range(0, 11, 1)
qs = range(0, 11, 1)
d = 1

order_list = list(product(ps, qs))


result_df, order_arimax = optimize_ARIMAX(y_train, X_train_pls, order_list, d)
model = SARIMAX(endog=y_train, exog=X_train_pls, order=order_arimax)
results = model.fit(disp=False)

res_train_arimax = results.resid
train_arimax_pred = y_train - res_train_arimax


arimax_pred = []
y_new_train = y_train.copy()
X_new_train = X_train_pls.copy()

for i in range(len(y_test)):
    
    next_exog = X_test_pls.iloc[i, :]
    
    forecast = results.get_forecast(steps=1, exog=next_exog)
    forecast_value = forecast.predicted_mean.values[0]
    arimax_pred.append(forecast_value)

    y_new_train = pd.concat([y_new_train, pd.Series([y_test.iloc[i]], index=[y_new_train.index[-1] + 1])])
    X_new_train = pd.concat([X_new_train, next_exog.to_frame().transpose()], ignore_index=True)
    
    model = SARIMAX(endog=y_new_train, exog=X_new_train, order=order_arimax)
    results = model.fit(disp=False)


mape_naive = round(mape(pd.concat([y_train[1:], y_test]), naive_pred), 3)
mae_naive = round(mae(pd.concat([y_train[1:], y_test]), naive_pred), 3)
rmse_naive = round(rmse(pd.concat([y_train[1:], y_test]), naive_pred), 3)

mape_train_linear = round(mape(y_train, train_linear_pred), 3)
mae_train_linear = round(mae(y_train, train_linear_pred), 3)
rmse_train_linear = round(rmse(y_train, train_linear_pred), 3)

mape_train_arimax = round(mape(y_train, train_arimax_pred), 3)
mae_train_arimax = round(mae(y_train, train_arimax_pred), 3)
rmse_train_arimax = round(rmse(y_train, train_arimax_pred), 3)


mape_linear = round(mape(y_test, linear_pred), 3)
mae_linear = round(mae(y_test, linear_pred), 3)
rmse_linear = round(rmse(y_test, linear_pred), 3)

mape_ARIMAX = round(mape(y_test, arimax_pred), 3)
mae_ARIMAX = round(mae(y_test, arimax_pred), 3)
rmse_ARIMAX = round(rmse(y_test, arimax_pred), 3)


table = [
    ['Metric', 'Naive Method', 'LM Train', 'ARIMAX Train', 'LM Test', 'ARIMAX Test'],
    ['MAPE', mape_naive, mape_train_linear, mape_train_arimax, mape_linear, mape_ARIMAX],
    ['MAE', mae_naive, mae_train_linear, mae_train_arimax, mae_linear, mae_ARIMAX],
    ['RMSE', rmse_naive, rmse_train_linear, rmse_train_arimax, rmse_linear, rmse_ARIMAX]
]

print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))