Time Series Modelling

Objective:

To monitor the current status of environmental factors and predict how each factor may change over the next hour due to the influence of other factors.
Due to limited data availability, we were able to model hourly temperature changes based on specific humidity levels during the seedling stage.

Input:

•	Specific Humidity Level: Captured from real-time sensor data for the current hour in the greenhouse.

Process:

•	Time Series Modeling: Utilizing time series analysis to examine the relationship between specific humidity and temperature changes over time. The model is trained on historical data and uses current data to forecast short-term specific humidity variations based on observed temperatures.

Output:

•	Predicted Temperature for the Next Hour: A forecasted temperature value for the greenhouse environment within the next hour,
This output will allow the decision support system to make proactive adjustments to maintain optimal specific humidity for the seedlings (optimum humidity for the seedling stage is defined at stage 2).



In [None]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import itertools
import io

In [None]:
from google.colab import files
uploaded = files.upload()

Saving timeseries.csv to timeseries.csv


In [None]:
data= pd.read_csv(io.BytesIO(uploaded['timeseries.csv']))
data.head()

Unnamed: 0,Date,Time,Relative_Humidity,Temperature
0,1/25/2024,00:00:00,6.247241,18.30388
1,1/25/2024,2:00:00,9.704286,17.50749
2,1/25/2024,4:00:00,8.391964,16.332457
3,1/25/2024,6:00:00,7.591951,21.748969
4,1/25/2024,8:00:00,4.936112,21.830393


In [None]:
# Combine Date and Time into a single datetime column and set as index
data['Date'] = data['Date'].str.strip()
data['Time'] = data['Time'].str.replace(r'\s+', '', regex=True)
data['Datetime'] = pd.to_datetime(data['Date'] + ' ' + data['Time'], errors='coerce')
data.set_index('Datetime', inplace=True)
data = data.drop(['Date', 'Time'], axis=1)

# Add Time of Day encoding as sine and cosine # cyclical encoding
data['Hour'] = data.index.hour
data['Hour_sin'] = np.sin(2 * np.pi * data['Hour'] / 24)
data['Hour_cos'] = np.cos(2 * np.pi * data['Hour'] / 24)

In [None]:
print(data.columns)

Index(['Relative_Humidity ', 'Temperature ', 'Hour', 'Hour_sin', 'Hour_cos'], dtype='object')


In [None]:
# Train-test split
train_size = int(len(data) * 0.8)
train, test = data[:train_size], data[train_size:]

# Exogenous variables including Temperature, Hour_sin, and Hour_cos
exog_train = train[['Temperature ', 'Hour_sin', 'Hour_cos']]
exog_test = test[['Temperature ', 'Hour_sin', 'Hour_cos']]

In [None]:
 #Define SARIMAX parameters and perform grid search
p = d = q = range(0, 2)
seasonal_order = (0, 0, 0, 24)
param_combinations = list(itertools.product(p, d, q))
best_mae, best_rmse = float("inf"), float("inf")
best_order = None

for order in param_combinations:
    try:
        model = SARIMAX(
            train['Relative_Humidity'],
            exog=exog_train,
            order=order,
            seasonal_order=seasonal_order,
            enforce_stationarity=False,
            enforce_invertibility=False
        )
        model_fit = model.fit(disp=False)

        predictions = model_fit.predict(start=test.index[0], end=test.index[-1], exog=exog_test)
        mae = mean_absolute_error(test['Relative_Humidity'], predictions)
        rmse = np.sqrt(mean_squared_error(test['Relative_Humidity'], predictions))

        if mae < best_mae:
            best_mae, best_rmse = mae, rmse
            best_order = order

    except Exception:
        continue


In [None]:
print(data.columns)

Index(['Relative_Humidity ', 'Temperature ', 'Hour', 'Hour_sin', 'Hour_cos'], dtype='object')


In [None]:
# Fit final model with best parameters
final_model = SARIMAX(
    data['Relative_Humidity '],  # Replace with the actual column name if different
    exog=data[['Temperature ', 'Hour_sin', 'Hour_cos']],
    order=best_order,
    seasonal_order=seasonal_order,
    enforce_stationarity=False,
    enforce_invertibility=False
)
final_model_fit = final_model.fit(disp=False)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [None]:
# Forecast example for next 5 time steps with hypothetical future temperatures
future_temperatures = pd.DataFrame({'Temperature (°C)': [20, 21, 19, 18, 20]})
future_temperatures['Hour'] = [10, 12, 14, 16, 18]  # Hypothetical future times
future_temperatures['Hour_sin'] = np.sin(2 * np.pi * future_temperatures['Hour'] / 24)
future_temperatures['Hour_cos'] = np.cos(2 * np.pi * future_temperatures['Hour'] / 24)

forecast = final_model_fit.get_forecast(steps=5, exog=future_temperatures[['Temperature (°C)', 'Hour_sin', 'Hour_cos']])
forecast_summary = forecast.summary_frame(alpha=0.05)

print("Forecasted Relative Humidity:")
print(forecast_summary[['mean', 'mean_ci_lower', 'mean_ci_upper']])


Forecasted Relative Humidity:
Relative_Humidity        mean  mean_ci_lower  mean_ci_upper
2024-02-21 02:00:00  5.799079       1.866337       9.731822
2024-02-21 04:00:00  5.642068       1.709326       9.574811
2024-02-21 06:00:00  4.797674       0.864931       8.730416
2024-02-21 08:00:00  4.579283       0.646540       8.512025
2024-02-21 10:00:00  5.588978       1.656236       9.521720


In [None]:
# Calculate MAE, MSE, RMSE
predicted_values = forecast_summary['mean'].values

# Since we're forecasting, we usually don't have actual future values
# For demonstration, let's assume some hypothetical actual values for the next 5 steps:
actual_values = np.array([8, 8.5, 8.2, 8, 8.3])  # Replace with your actual values if available

mae = mean_absolute_error(actual_values, predicted_values)
mse = mean_squared_error(actual_values, predicted_values)
rmse = np.sqrt(mse)

print("\nEvaluation Metrics:")
print(f"MAE: {mae:.3f}")
print(f"MSE: {mse:.3f}")
print(f"RMSE: {rmse:.3f}")


Evaluation Metrics:
MAE: 2.919
MSE: 8.728
RMSE: 2.954


In [None]:
# Function to predict specific humidity based on user input
def predict_relative_humidity():
    try:
        # Collect user input
        temperature = float(input("Enter Temperature (°C): "))
        time_of_day = int(input("Enter Time of Day (hour in 24-hour format): "))

        # Encode time as cyclic features
        hour_sin = np.sin(2 * np.pi * time_of_day / 24)
        hour_cos = np.cos(2 * np.pi * time_of_day / 24)

        # Create a DataFrame with the input features
        user_input = pd.DataFrame({
            'Temperature (°C)': [temperature],
            'Hour_sin': [hour_sin],
            'Hour_cos': [hour_cos]
        })

        # Generate prediction
        forecast = final_model_fit.get_forecast(steps=1, exog=user_input)
        prediction = forecast.predicted_mean.iloc[0]
        conf_int = forecast.conf_int()

        # Access confidence interval values using column index instead of names
        # This assumes 'lower Relative_Humidity %' and 'upper Relative_Humidity %'
        # are the first two columns of the conf_int DataFrame.
        # Adjust the column indices (0 and 1) if necessary based on the actual column order.

        # Display the result
        print(f"\nPredicted Relative Humidity: {prediction:.2f} %")
        print(f"95% Confidence Interval: [{conf_int.iloc[0, 0]:.2f}, {conf_int.iloc[0, 1]:.2f}]")

    except ValueError:
        print("Invalid input. Please enter numeric values for temperature and hour.")

# Run the prediction function
predict_relative_humidity()

Enter Temperature (°C): 27
Enter Time of Day (hour in 24-hour format): 14

Predicted Relative Humidity: 7.31 %
95% Confidence Interval: [3.38, 11.24]
