In [1]:
import pandas as pd
import numpy as np
import joblib

# Load new test dataset with basic columns only: timestamp, temperature, humidity, moisture
test_df = pd.read_csv('realistic_soil_moisture_scenario.csv', parse_dates=['timestamp'])

# ----------------------------
# Generate lag and rolling features (same as training)
# ----------------------------

N_LAGS = 6  # number of lag steps
FORECAST_HOURS = 6
INTERVAL_MINUTES = 30  # assuming frequency

for lag in range(1, N_LAGS + 1):
    test_df[f'moisture_lag_{lag}'] = test_df['moisture'].shift(lag)
    test_df[f'temperature_lag_{lag}'] = test_df['temperature'].shift(lag)
    test_df[f'humidity_lag_{lag}'] = test_df['humidity'].shift(lag)

test_df['moisture_ma_3'] = test_df['moisture'].rolling(window=3).mean()
test_df['moisture_var_3'] = test_df['moisture'].rolling(window=3).var()

# Create future moisture target for reference (not used for prediction)
steps_ahead = int((FORECAST_HOURS * 60) / INTERVAL_MINUTES)
test_df[f'moisture_future_{FORECAST_HOURS}h'] = test_df['moisture'].shift(-steps_ahead)

# Drop initial rows with NaNs caused by lagging
test_df = test_df.dropna().reset_index(drop=True)

# ----------------------------
# Load saved models
# ----------------------------
reg_model = joblib.load('random_forest_regressor_best.pkl')
clf_model = joblib.load('random_forest_classifier_best.pkl')

# ----------------------------
# Prepare regression features (exclude target, labels, timestamp)
# ----------------------------
reg_drop_cols = ['moisture', 'recommendation', 'timestamp', f'moisture_future_{FORECAST_HOURS}h']
reg_drop_cols = [col for col in reg_drop_cols if col in test_df.columns]
reg_feature_cols = [col for col in test_df.columns if col not in reg_drop_cols]

X_reg_test = test_df[reg_feature_cols]

# ----------------------------
# Predict soil moisture (regression)
# ----------------------------
regression_preds = reg_model.predict(X_reg_test)

# ----------------------------
# Prepare classification input by adding predicted moisture
# ----------------------------
X_clf_test = X_reg_test.copy()
X_clf_test['predicted_moisture'] = regression_preds

# ----------------------------
# Predict recommendation (classification)
# ----------------------------
clf_preds = clf_model.predict(X_clf_test)

# ----------------------------
# Output results
# ----------------------------
results = test_df[['timestamp', 'moisture', 'temperature', 'humidity']].copy()
results['predicted_moisture'] = regression_preds
results['predicted_recommendation'] = clf_preds

print(results.head(10))

# Save to CSV
results.to_csv('test_predictions_recommendations.csv', index=False)
print("Predictions & recommendations saved to 'test_predictions_recommendations.csv'")


            timestamp  moisture  temperature  humidity  predicted_moisture  \
0 2025-08-19 03:00:00    413.98        22.35     65.12          329.682367   
1 2025-08-19 03:30:00    409.19        21.27     72.96          404.621199   
2 2025-08-19 04:00:00    399.96        24.51     73.90          406.060354   
3 2025-08-19 04:30:00    411.19        21.64     75.10          408.374983   
4 2025-08-19 05:00:00    405.80        20.94     73.54          407.173586   
5 2025-08-19 05:30:00    397.49        22.45     76.25          403.630138   
6 2025-08-19 06:00:00    401.56        24.50     80.11          398.555479   
7 2025-08-19 06:30:00    394.94        23.24     71.32          396.314328   
8 2025-08-19 07:00:00    393.28        22.12     72.49          395.906551   
9 2025-08-19 07:30:00    396.02        23.25     80.09          396.591883   

  predicted_recommendation  
0  OK - No watering needed  
1  OK - No watering needed  
2  OK - No watering needed  
3  OK - No watering neede