In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt

# Set random seed for reproducibility
np.random.seed(42)

# Define simulation parameters
n_records = 1000
time_steps = pd.date_range(start="2024-01-01", periods=n_records, freq='H')

# Simulate sensor data for soil moisture, temperature, pressure, water flow, and rainfall
soil_moisture = np.random.normal(loc=50, scale=10, size=n_records)  # Soil moisture level in percentage
temperature = np.random.normal(loc=25, scale=5, size=n_records)  # Temperature in °C
rainfall = np.random.normal(loc=5, scale=2, size=n_records)  # Rainfall in mm
humidity = np.random.normal(loc=70, scale=15, size=n_records)  # Humidity in percentage
water_flow = np.random.normal(loc=100, scale=20, size=n_records)  # Water flow rate in liters/hour
pressure = np.random.normal(loc=3, scale=0.5, size=n_records)  # Pressure in bar

# Failure events: 1 means failure occurs at that time step, 0 means no failure
failure_event = np.random.choice([0, 1], size=n_records, p=[0.9, 0.1])  # 10% failure rate

# Create the dataframe
data = pd.DataFrame({
    'timestamp': time_steps,
    'soil_moisture': soil_moisture,
    'temperature': temperature,
    'rainfall': rainfall,
    'humidity': humidity,
    'water_flow': water_flow,
    'pressure': pressure,
    'failure_event': failure_event
})

# Save data to CSV
data.to_csv('enhanced_irrigation_data.csv', index=False)

# Display the first few records
print(data.head())

            timestamp  soil_moisture  temperature  rainfall   humidity  \
0 2024-01-01 00:00:00      54.967142    31.996777  3.649643  41.382887   
1 2024-01-01 01:00:00      48.617357    29.623168  4.710963  57.094225   
2 2024-01-01 02:00:00      56.476885    25.298152  3.415160  63.795917   
3 2024-01-01 03:00:00      65.230299    21.765316  4.384077  98.315315   
4 2024-01-01 04:00:00      47.658466    28.491117  1.212771  78.348297   

   water_flow  pressure  failure_event  
0   82.730128  2.788120              0  
1   99.375930  2.773293              0  
2  100.360337  2.102178              0  
3  109.452607  2.834955              0  
4   72.662833  3.366415              1  


  time_steps = pd.date_range(start="2024-01-01", periods=n_records, freq='H')


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the enhanced dataset
data = pd.read_csv('enhanced_irrigation_data.csv')

# Handle missing values (if any)
data = data.fillna(method='ffill')

# Feature engineering
data['temp_diff'] = data['temperature'].diff().fillna(0)  # Temperature difference from the previous step
data['soil_moisture_trend'] = data['soil_moisture'].rolling(window=5).mean().fillna(data['soil_moisture'])  # 5-hour rolling average of soil moisture

# Features and target variable
X = data[['soil_moisture', 'temperature', 'rainfall', 'humidity', 'water_flow', 'pressure', 'temp_diff', 'soil_moisture_trend']]
y = data['failure_event']

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape}")
print(f"Test set size: {X_test.shape}")


Training set size: (800, 8)
Test set size: (200, 8)


  data = data.fillna(method='ffill')


In [6]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

# Initialize models
rf_model = RandomForestClassifier(random_state=42)
gb_model = GradientBoostingClassifier(random_state=42)
xgb_model = XGBClassifier(random_state=42)

# Corrected param_grid for RandomForestClassifier
param_grid_rf = {
    'n_estimators': [100, 200, 300],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# GridSearchCV for RandomForestClassifier
grid_rf = GridSearchCV(estimator=rf_model, param_grid=param_grid_rf, cv=5, n_jobs=-1)
grid_rf.fit(X_train, y_train)

# GridSearchCV for GradientBoosting
grid_gb = GridSearchCV(estimator=gb_model, param_grid=param_grid, cv=5, n_jobs=-1)
grid_gb.fit(X_train, y_train)

# GridSearchCV for XGBoost
grid_xgb = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=5, n_jobs=-1)
grid_xgb.fit(X_train, y_train)

# Display best parameters
print("Best parameters for RandomForest:", grid_rf.best_params_)
print("Best parameters for GradientBoosting:", grid_gb.best_params_)
print("Best parameters for XGBoost:", grid_xgb.best_params_)

# Evaluate models
y_pred_rf = grid_rf.predict(X_test)
y_pred_gb = grid_gb.predict(X_test)
y_pred_xgb = grid_xgb.predict(X_test)

print("RandomForest Classifier Evaluation:\n", classification_report(y_test, y_pred_rf))
print("GradientBoosting Classifier Evaluation:\n", classification_report(y_test, y_pred_gb))
print("XGBoost Classifier Evaluation:\n", classification_report(y_test, y_pred_xgb))

1080 fits failed out of a total of 3240.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
417 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1466, in wrapper
    estimator._validate_params()
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(


Best parameters for RandomForest: {'bootstrap': True, 'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 300}
Best parameters for GradientBoosting: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50, 'subsample': 0.8}
Best parameters for XGBoost: {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50, 'subsample': 0.8}
RandomForest Classifier Evaluation:
               precision    recall  f1-score   support

           0       0.91      1.00      0.95       182
           1       0.00      0.00      0.00        18

    accuracy                           0.91       200
   macro avg       0.46      0.50      0.48       200
weighted avg       0.83      0.91      0.87       200

GradientBoosting Classifier Evaluation:
               precision    recall  f1-score   support

           0       0.91      1.00      0.95       182
           1       0.00      0.00      0.00        18

    accuracy                           0.91

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [7]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.preprocessing import MinMaxScaler

# Scale the data for Autoencoder
scaler = MinMaxScaler()
X_scaled_autoencoder = scaler.fit_transform(X)

# Build Autoencoder model
autoencoder = Sequential()
autoencoder.add(Dense(64, activation='relu', input_dim=X_scaled_autoencoder.shape[1]))
autoencoder.add(Dropout(0.2))
autoencoder.add(Dense(32, activation='relu'))
autoencoder.add(Dropout(0.2))
autoencoder.add(Dense(16, activation='relu'))
autoencoder.add(Dense(X_scaled_autoencoder.shape[1], activation='sigmoid'))  # Output layer with same dimension as input

autoencoder.compile(optimizer='adam', loss='mse')

# Train the Autoencoder model
autoencoder.fit(X_scaled_autoencoder, X_scaled_autoencoder, epochs=20, batch_size=64)

# Use the trained Autoencoder to detect anomalies (i.e., high reconstruction error)
reconstruction_error = autoencoder.evaluate(X_scaled_autoencoder, X_scaled_autoencoder)

print(f"Reconstruction error: {reconstruction_error}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0262
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0246
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0247
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0235
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0217
Epoch 6/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0206
Epoch 7/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0203
Epoch 8/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0193
Epoch 9/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0180
Epoch 10/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0168 
Epoch 11

In [12]:
from statsmodels.tsa.arima.model import ARIMA
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Build and train ARIMA model for forecasting
arima_model = ARIMA(data['soil_moisture'], order=(5,1,0))  # ARIMA(5,1,0)
arima_model_fit = arima_model.fit()

# Make ARIMA predictions
arima_forecast = arima_model_fit.forecast(steps=10)
print(f"ARIMA Forecast: {arima_forecast}")

# Build and train LSTM model for forecasting
X_lstm = data['soil_moisture'].values.reshape(-1, 1)
X_lstm = X_lstm.astype('float32')
X_lstm = MinMaxScaler(feature_range=(0, 1)).fit_transform(X_lstm)

# Prepare the data for LSTM input
X_lstm_data, y_lstm_data = [], []
for i in range(60, len(X_lstm)):
    X_lstm_data.append(X_lstm[i-60:i, 0])
    y_lstm_data.append(X_lstm[i, 0])
X_lstm_data = np.array(X_lstm_data) #Rename to avoid conflict
y_lstm_data = np.array(y_lstm_data)

# Check if X_lstm_data is empty and handle accordingly
if X_lstm_data.size == 0:
    print("Error: Not enough data points to create LSTM input.")
else:
    # Build LSTM model
    lstm_model = Sequential()
    lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(X_lstm_data.shape[1], 1)))
    lstm_model.add(LSTM(units=50, return_sequences=False))
    lstm_model.add(Dense(units=1))

    lstm_model.compile(optimizer='adam', loss='mean_squared_error')
    lstm_model.fit(X_lstm_data, y_lstm_data, epochs=10, batch_size=32)

    # Predict with LSTM
    lstm_forecast = lstm_model.predict(X_lstm_data)


ARIMA Forecast: 1000    50.677487
1001    53.818498
1002    55.653345
1003    52.349393
1004    52.204757
1005    53.357082
1006    52.835371
1007    53.494425
1008    53.380469
1009    52.881299
Name: predicted_mean, dtype: float64
Epoch 1/10


  super().__init__(**kwargs)


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 56ms/step - loss: 0.1026
Epoch 2/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 57ms/step - loss: 0.0187
Epoch 3/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 57ms/step - loss: 0.0208
Epoch 4/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 62ms/step - loss: 0.0198
Epoch 5/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 89ms/step - loss: 0.0188
Epoch 6/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 56ms/step - loss: 0.0211
Epoch 7/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 59ms/step - loss: 0.0203
Epoch 8/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 57ms/step - loss: 0.0210
Epoch 9/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 72ms/step - loss: 0.0195
Epoch 10/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 89ms/step - loss: 0.0202
[1m30/30