In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import joblib, os
import matplotlib.pyplot as plt
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, LayerNormalization, RepeatVector
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [14]:
np.random.seed(42)
tf.random.set_seed(42)

In [15]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"GPU is available: {len(gpus)} GPU(s) detected")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU found, using CPU instead")

No GPU found, using CPU instead


In [16]:
from tensorflow.keras.metrics import Metric
import tensorflow as tf

class RSquare(Metric):
    def __init__(self, name='r_square', **kwargs):
        super(RSquare, self).__init__(name=name, **kwargs)
        self.total_sum = self.add_weight(name='total_sum', initializer='zeros')
        self.residual_sum = self.add_weight(name='residual_sum', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)
        
        # Calculate total sum of squares
        mean_y_true = tf.reduce_mean(y_true)
        total_error = tf.reduce_sum(tf.square(y_true - mean_y_true))
        
        # Calculate residual sum of squares
        unexplained_error = tf.reduce_sum(tf.square(y_true - y_pred))
        
        # Update state
        self.total_sum.assign_add(total_error)
        self.residual_sum.assign_add(unexplained_error)

    def result(self):
        return 1.0 - tf.math.divide_no_nan(self.residual_sum, self.total_sum)

    def reset_states(self):
        self.total_sum.assign(0.)
        self.residual_sum.assign(0.)

In [None]:
class IrrigationTimeSeriesForecaster:
    def __init__(self, look_back=18, forecast_horizon=6):
        self.look_back = look_back
        self.forecast_horizon = forecast_horizon
        self.temp_scaler = MinMaxScaler(feature_range=(0, 1))
        self.humidity_scaler = MinMaxScaler(feature_range=(0, 1))
        self.models = {
            'temperature': None,
            'humidity': None
        }
        
    def load_data(self):
        """Load and preprocess the dataset"""
        # Load data
        df = pd.read_csv("../Dataset/Better_Dataset.csv")
        
        # Convert date column to datetime
        df['date'] = pd.to_datetime(df['date'])
        
        # Set date as index
        df.set_index('date', inplace=True)
        
        # Check for missing values
        if df.isnull().sum().sum() > 0:
            print(f"Found {df.isnull().sum().sum()} missing values. Filling with forward fill method.")
            df = df.ffill()  # Forward fill
            # If there are still missing values at the beginning, fill them with backward fill
            if df.isnull().sum().sum() > 0:
                df = df.bfill()
        
        # Check for duplicated timestamps
        if df.index.duplicated().sum() > 0:
            print(f"Found {df.index.duplicated().sum()} duplicated timestamps. Keeping the first occurrence.")
            df = df[~df.index.duplicated(keep='first')]
        
        # Sort by date
        df = df.sort_index()
        
        # Check if data is evenly spaced
        time_diffs = df.index.to_series().diff().dropna()
        if len(time_diffs.unique()) > 1:
            print("Warning: Time series has irregular intervals. Consider resampling.")
            # Resample to 4-hour intervals
            df = df.resample('4H').mean().interpolate(method='time')
        
        # Extract relevant features
        self.df = df[['temperature', 'humidity']]
        
        # Normalize the data
        self.df['temperature_scaled'] = self.temp_scaler.fit_transform(df[['temperature']])
        self.df['humidity_scaled'] = self.humidity_scaler.fit_transform(df[['humidity']])
        
        joblib.dump(self.temp_scaler, "temp_scaler.pkl")
        joblib.dump(self.humidity_scaler, "humidity_scaler.pkl")
        
        return self.df
    
    def create_sequences(self, data, target_col):
        """Create input-output pairs for training the model"""
        X, y = [], []
        scaled_col = f"{target_col}_scaled"
        
        for i in range(len(data) - self.look_back - self.forecast_horizon + 1):
            # Input sequence (look_back days)
            X.append(data[scaled_col].values[i:(i + self.look_back)])
            
            # Output sequence (next forecast_horizon timestamps)
            y.append(data[scaled_col].values[(i + self.look_back):(i + self.look_back + self.forecast_horizon)])
        
        return np.array(X), np.array(y)
    
    def split_data(self, X, y, train_ratio=0.7, val_ratio=0.15):
        """Split data into training, validation, and test sets"""
        n = len(X)
        train_size = int(n * train_ratio)
        val_size = int(n * val_ratio)
        
        # Training set
        X_train, y_train = X[:train_size], y[:train_size]
        
        # Validation set
        X_val, y_val = X[train_size:train_size + val_size], y[train_size:train_size + val_size]
        
        # Test set
        X_test, y_test = X[train_size + val_size:], y[train_size + val_size:]
        
        return X_train, y_train, X_val, y_val, X_test, y_test
    
    def build_model(self, input_shape):
        from tensorflow.keras.layers import Input, Attention, concatenate
        from tensorflow.keras.models import Model
        from tensorflow.keras.losses import Huber
    
        # Input layer
        inputs = Input(shape=input_shape)
        
        # Encoder LSTM
        lstm1 = Bidirectional(LSTM(256, return_sequences=True))(inputs)
        norm1 = LayerNormalization()(lstm1)
        drop1 = Dropout(0.3)(norm1)
        
        # Context vector
        lstm2 = Bidirectional(LSTM(128, return_sequences=False))(drop1)
        norm2 = LayerNormalization()(lstm2)
        drop2 = Dropout(0.3)(norm2)
        
        # Repeat vector for sequence output
        repeated = RepeatVector(self.forecast_horizon)(drop2)
        
        # Decoder LSTM with attention
        decoder_lstm = LSTM(128, return_sequences=True)(repeated)
        
        # Attention mechanism
        attention = Attention()([decoder_lstm, decoder_lstm])
        concat = concatenate([decoder_lstm, attention])
        
        # Output layers
        dense1 = Dense(128, activation='relu')(concat)
        outputs = Dense(1)(dense1)
    
        model = Model(inputs=inputs, outputs=outputs)
        
        # Use Huber loss class instead of string identifier
        model.compile(optimizer=Adam(learning_rate=0.001),
                      loss=Huber(),
                      metrics=[RSquare()])
        return model
    
    def train_model(self, target_col, epochs=100, batch_size=512, verbose=1):
        """Train the forecasting model for a specific target column"""
        # Create sequences
        X, y = self.create_sequences(self.df, target_col)
        
        # Reshape for LSTM [samples, timesteps, features]
        X = X.reshape((X.shape[0], X.shape[1], 1))
        y = y.reshape((y.shape[0], y.shape[1], 1))
        
        # Split data
        X_train, y_train, X_val, y_val, X_test, y_test = self.split_data(X, y)
        
        # Build model
        model = self.build_model((self.look_back, 1))
        
        # Create callbacks
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001),
            ModelCheckpoint(f'best_{target_col}_model.keras', monitor='val_loss', save_best_only=True)
        ]
        
        # Train model
        history = model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(X_val, y_val),
            callbacks=callbacks,
            verbose=verbose
        )
        
        # Evaluate model
        mse = model.evaluate(X_test, y_test, verbose=0)[0]
        print(f"{target_col} Test MSE: {mse}")
        
        # Save model
        self.models[target_col] = model
        
        # # Plot training history
        # self.plot_training_history(history, target_col)
        
        # # Plot predictions
        # self.plot_predictions(X_test, y_test, model, target_col)
        
        return model, history
    
    def plot_training_history(self, history, target_col):
        """Plot training and validation loss"""
        plt.figure(figsize=(12, 5))
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title(f'{target_col} Model - Training and Validation Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.savefig(f'{target_col}_training_history.png')
        plt.close()
    
    def plot_predictions(self, X_test, y_test, model, target_col):
        """Plot test predictions against actual values"""
        # Make predictions
        y_pred = model.predict(X_test)
        
        # Select a sample from test set
        sample_idx = np.random.randint(0, len(X_test))
        
        # Inverse transform the predictions and actual values for the sample
        if target_col == 'temperature':
            scaler = self.temp_scaler
        else:
            scaler = self.humidity_scaler
        
        y_pred_sample = y_pred[sample_idx].reshape(-1, 1)
        y_test_sample = y_test[sample_idx].reshape(-1, 1)
        
        y_pred_sample = scaler.inverse_transform(y_pred_sample)
        y_test_sample = scaler.inverse_transform(y_test_sample)
        
        # Plot
        plt.figure(figsize=(12, 5))
        plt.plot(range(self.forecast_horizon), y_test_sample, 'b-', label='Actual')
        plt.plot(range(self.forecast_horizon), y_pred_sample, 'r--', label='Predicted')
        plt.title(f'{target_col} - Actual vs Predicted (Sample)')
        plt.xlabel('Time Steps (4-hour intervals)')
        plt.ylabel(target_col)
        plt.legend()
        plt.savefig(f'{target_col}_predictions_sample.png')
        plt.close()
        
        # Calculate metrics
        mae = mean_absolute_error(y_test.reshape(-1), y_pred.reshape(-1))
        rmse = np.sqrt(mean_squared_error(y_test.reshape(-1), y_pred.reshape(-1)))
        
        print(f"{target_col} Test MAE: {mae}")
        print(f"{target_col} Test RMSE: {rmse}")
    
    def forecast_next_day(self, last_sequence):
        # Initialize dataframe for forecasts
        forecast_df = pd.DataFrame(index=pd.date_range(
            start=last_sequence.index[-1] + pd.Timedelta(hours=4), 
            periods=self.forecast_horizon, 
            freq='4H'
        ))
        
        # For each target (temperature and humidity)
        for target_col in ['temperature', 'humidity']:
            # Prepare input data
            scaled_col = f"{target_col}_scaled"
            input_seq = last_sequence[scaled_col].values[-self.look_back:].reshape(1, self.look_back, 1)
            
            # Make prediction
            scaled_pred = self.models[target_col].predict(input_seq)[0]
            
            # Inverse transform to get actual values
            if target_col == 'temperature':
                scaler = self.temp_scaler
            else:
                scaler = self.humidity_scaler
                
            predictions = scaler.inverse_transform(scaled_pred.reshape(-1, 1)).flatten()
            
            # Add to forecast dataframe
            forecast_df[target_col] = predictions
        
        return forecast_df
    
    def save_models(self, path='saved_models'):
        """Save trained models"""
        if not os.path.exists(path):
            os.makedirs(path)
            
        for target, model in self.models.items():
            if model is not None:
                model.save(f"{path}/{target}_model.h5")
                print(f"Saved {target} model to {path}/{target}_model.h5")
    
    def load_models(self, path='saved_models'):
        """Load saved models"""
        for target in self.models.keys():
            model_path = f"{path}/{target}_model.h5"
            if os.path.exists(model_path):
                self.models[target] = tf.keras.models.load_model(model_path)
                print(f"Loaded {target} model from {model_path}")
            else:
                print(f"Model file {model_path} not found.")

In [None]:
# Instantiate the forecaster
forecaster = IrrigationTimeSeriesForecaster(look_back=18, forecast_horizon=6)

print("Loading and preprocessing data...")
df = forecaster.load_data()
    
print("\nTraining temperature model...")
forecaster.train_model('temperature', epochs=150)
    
print("\nTraining humidity model...")
forecaster.train_model('humidity', epochs=150)
    
forecaster.save_models()
    
# Generate forecast for the next day
last_data = df.iloc[-forecaster.look_back:]
forecast = forecaster.forecast_next_day(last_data)
    
print("\nForecast for the next 24 hours (6 readings at 4-hour intervals):")
print(forecast)
    
print("\nModel training and forecasting completed successfully!")

Loading and preprocessing data...

Training temperature model...
Epoch 1/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 956ms/step - loss: 0.2147 - r_square: -18.7366 - val_loss: 0.0168 - val_r_square: -3.5825 - learning_rate: 0.0010
Epoch 2/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 704ms/step - loss: 0.0207 - r_square: -0.8773 - val_loss: 0.0128 - val_r_square: -2.4863 - learning_rate: 0.0010
Epoch 3/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 746ms/step - loss: 0.0150 - r_square: -0.3573 - val_loss: 0.0170 - val_r_square: -3.6348 - learning_rate: 0.0010
Epoch 4/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 847ms/step - loss: 0.0128 - r_square: -0.1576 - val_loss: 0.0248 - val_r_square: -5.7675 - learning_rate: 0.0010
Epoch 5/150
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 665ms/step - loss: 0.0123 - r_square: -0.1119 - val_loss: 0.0130 - val_r_square: -2.5427 - learning_rate: 0.0010



humidity Test MSE: 0.007569835055619478
Saved temperature model to saved_models/temperature_model.h5
Saved humidity model to saved_models/humidity_model.h5


  forecast_df = pd.DataFrame(index=pd.date_range(


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 550ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 928ms/step

Forecast for the next 24 hours (6 readings at 4-hour intervals):
                     temperature   humidity
2025-03-21 04:00:00    28.502573  55.275482
2025-03-21 08:00:00    27.221292  58.577694
2025-03-21 12:00:00    26.277782  59.852825
2025-03-21 16:00:00    26.288931  60.318851
2025-03-21 20:00:00    26.438679  60.494957
2025-03-22 00:00:00    26.546877  60.551815

Model training and forecasting completed successfully!
