In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam

class WaterQualityLSTMForecaster:
    def __init__(self, data_path):
        # Load and preprocess the data
        self.df = pd.read_csv(data_path, parse_dates=['Date'])
        self.df.set_index('Date', inplace=True)
        
        # Check for missing values
        print("Missing values:\n", self.df.isnull().sum())
        
        # Fill missing values if any
        self.df.fillna(method='ffill', inplace=True)
        
        # Identify water quality parameters and exogenous variables
        self.quality_params = [
            'Biochemical Oxygen Demand', 'Conductivity', 
            'Turbidity', 'Dissolved Oxygen', 
            'Fecal Coliform', 'Fecal Streptococci', 
            'Nitrate', 'pH', 'Total Coliform'
        ]
        self.exogenous_vars = ['Temperature', 'Rainfall']
        
        # Create models directory
        self.model_dir = 'water_quality_models'
        os.makedirs(self.model_dir, exist_ok=True)
        
        # Preprocessing
        self.scalers = {}
        self.scaled_data = self._preprocess_data()
    
    def _preprocess_data(self):
        # Create copy of dataframe
        data = self.df.copy()
        
        # Scale each parameter separately
        scaled_data = {}
        for column in self.quality_params + self.exogenous_vars:
            scaler = MinMaxScaler()
            scaled_data[column] = scaler.fit_transform(data[[column]])
            self.scalers[column] = scaler
        
        return scaled_data
    
    def _create_sequences(self, data, exogenous, seq_length, forecast_horizon):
        X, y = [], []
        X_exo = []
        
        for i in range(len(data) - seq_length - forecast_horizon + 1):
            # Input sequence
            X.append(data[i:i+seq_length])
            
            # Target (forecast) sequence
            y.append(data[i+seq_length:i+seq_length+forecast_horizon])
            
            # Corresponding exogenous variables for forecast period
            X_exo.append(exogenous[i+seq_length-1:i+seq_length-1+forecast_horizon])
        
        return (np.array(X), np.array(y), np.array(X_exo))
    
    def build_multivariate_model(self, seq_length, n_features, exo_features, forecast_horizon):
        # Input for historical time series
        input_series = Input(shape=(seq_length, n_features))
        
        # Input for exogenous variables
        input_exo = Input(shape=(forecast_horizon, exo_features))
        
        # LSTM processing of time series
        x = LSTM(64, return_sequences=True)(input_series)
        x = Dropout(0.2)(x)
        x = LSTM(32, return_sequences=False)(x)
        x = Dropout(0.2)(x)
        
        # Reshape exogenous input
        exo_flat = Flatten()(input_exo)
        
        # Combine time series features with exogenous variables
        merged = Dense(64, activation='relu')(x)
        merged = tf.keras.layers.Concatenate()([merged, exo_flat])
        
        # Output layer
        output = Dense(forecast_horizon, activation='linear')(merged)
        
        # Create model
        model = Model(inputs=[input_series, input_exo], outputs=output)
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        
        return model
    
    def train_and_forecast(self, seq_length=10, forecast_horizon=5):
        results = {}
        
        for param in self.quality_params:
            # Prepare data for this parameter
            data = self.scaled_data[param]
            exogenous = np.column_stack([self.scaled_data[var] for var in self.exogenous_vars])
            
            # Create sequences
            X, y, X_exo = self._create_sequences(data, exogenous, seq_length, forecast_horizon)
            
            # Reshape inputs
            X = X.reshape(X.shape[0], X.shape[1], 1)
            y = y.reshape(y.shape[0], y.shape[1])
            X_exo = X_exo.reshape(X_exo.shape[0], X_exo.shape[1], len(self.exogenous_vars))
            
            # Split data
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            X_exo_train, X_exo_test = train_test_split(X_exo, test_size=0.2, random_state=42)
            
            # Build model
            model = self.build_multivariate_model(
                seq_length=seq_length, 
                n_features=1, 
                exo_features=len(self.exogenous_vars),
                forecast_horizon=forecast_horizon
            )
            
            # Train model
            model.fit(
                [X_train, X_exo_train], y_train, 
                epochs=100, 
                batch_size=32, 
                validation_split=0.2, 
                verbose=1
            )
            
            # Save model with a safe filename
            model_path = os.path.join(self.model_dir, f'{param.replace(" ", "_")}_water_quality_lstm_model.keras')
            model.save(model_path)
            print(f"Model for {param} saved to {model_path}")
            
            # Forecast
            last_sequence = data[-seq_length:].reshape(1, seq_length, 1)
            last_exo = exogenous[-forecast_horizon:].reshape(1, forecast_horizon, len(self.exogenous_vars))
            
            forecast = model.predict([last_sequence, last_exo])[0]
            
            # Inverse transform
            scaler = self.scalers[param]
            results[param] = {
                'forecast': scaler.inverse_transform(forecast.reshape(-1, 1)).flatten(),
                'historical': scaler.inverse_transform(data[-seq_length:].reshape(-1, 1)).flatten()
            }
        
        # Plotting
        self._plot_results(results)
        
        return results
    
    def _plot_results(self, results):
        # Create a subplot for each parameter
        fig, axes = plt.subplots(3, 3, figsize=(20, 15))
        axes = axes.ravel()
        
        for i, (param, data) in enumerate(results.items()):
            ax = axes[i]
            historical = data['historical']
            forecast = data['forecast']
            
            # Plot historical and forecast
            x_hist = range(len(historical))
            x_forecast = range(len(historical)-1, len(historical)+len(forecast)-1)
            
            ax.plot(x_hist, historical, label='Historical', color='blue')
            ax.plot(x_forecast, forecast, label='Forecast', color='red', linestyle='--')
            
            ax.set_title(param)
            ax.legend()
        
        plt.tight_layout()
        plt.savefig('water_quality_forecast.png')
        plt.close()
    
    def load_parameter_model(self, parameter):
        """
        Load a saved model for a specific parameter
        
        Args:
            parameter (str): Name of the water quality parameter
        
        Returns:
            Loaded Keras model
        """
        model_path = os.path.join(self.model_dir, f'{parameter.replace(" ", "_")}_water_quality_lstm_model.keras')
        return load_model(model_path)

# Usage
if __name__ == "__main__":
    # Path to your CSV file
    data_path = r"C:\Users\New User\OneDrive\Desktop\Ganga_Project\Devprayag.csv"
    
    # Create forecaster
    forecaster = WaterQualityLSTMForecaster(data_path)
    
    # Train and forecast
    results = forecaster.train_and_forecast()
    
    # Print forecast results
    for param, data in results.items():
        print(f"{param} Forecast:")
        print("Historical:", data['historical'])
        print("Forecast:", data['forecast'])
        print("\n")
    
    # Example of loading a specific model later
    try:
        # Load a specific parameter's model
        loaded_model = forecaster.load_parameter_model('Total Coliform')
        print("Model loaded successfully!")
    except Exception as e:
        print(f"Error loading model: {e}")

Missing values:
 Biochemical Oxygen Demand    0
Conductivity                 0
Temperature                  0
Turbidity                    0
Dissolved Oxygen             0
Fecal Coliform               0
Fecal Streptococci           0
Nitrate                      0
pH                           0
Rainfall                     0
Total Coliform               0
dtype: int64


  self.df.fillna(method='ffill', inplace=True)


Epoch 1/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 36ms/step - loss: 0.1213 - val_loss: 0.0154
Epoch 2/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 0.0163 - val_loss: 0.0105
Epoch 3/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0126 - val_loss: 0.0097
Epoch 4/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0110 - val_loss: 0.0083
Epoch 5/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0099 - val_loss: 0.0088
Epoch 6/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0098 - val_loss: 0.0074
Epoch 7/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0101 - val_loss: 0.0071
Epoch 8/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0092 - val_loss: 0.0073
Epoch 9/100
[1m36/36[0m [32m━━━━━━━━━

In [None]:
import os
print(os.getcwd())


C:\Users\New User
