In [1]:
# aqua_predict.py - Python script for ML Model Training

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import joblib
import numpy as np

In [2]:
# --- 1. Simulate Data Generation (Replace with actual data loading) ---
def generate_data(n_samples=1000):
    np.random.seed(42)
    data = {
        'Rainfall_Annual_mm': np.random.uniform(300, 1500, n_samples),
        'Temp_Avg_C': np.random.uniform(25, 35, n_samples),
        'Soil_Moisture_Index': np.random.uniform(0.1, 0.9, n_samples),
        'Population_Density': np.random.randint(50, 500, n_samples),
    }
    df = pd.DataFrame(data)
    
    # Target Variable: Water Stress Index (WSI) - lower is better (less stress)
    # WSI is primarily driven by low rainfall and high temperature/density
    df['WSI'] = (100 - (df['Rainfall_Annual_mm'] / 15 + (100 - df['Temp_Avg_C'] * 2) - df['Population_Density'] / 10))
    df['WSI'] = np.clip(df['WSI'] + np.random.normal(0, 5, n_samples), 0, 100).astype(int)
    
    return df

In [3]:
# --- 2. Train and Save Model ---
def train_and_save_model():
    df = generate_data()
    
    features = ['Rainfall_Annual_mm', 'Temp_Avg_C', 'Soil_Moisture_Index', 'Population_Density']
    X = df[features]
    y = df['WSI']
    
    # Use a robust regressor for prediction
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)
    
    # Save the model and feature list
    joblib.dump(model, 'water_stress_model.joblib')
    print("Model trained and saved as water_stress_model.joblib")
    return features

In [4]:
if __name__ == '__main__':
    features = train_and_save_model()
    print("Required features:", features)

Model trained and saved as water_stress_model.joblib
Required features: ['Rainfall_Annual_mm', 'Temp_Avg_C', 'Soil_Moisture_Index', 'Population_Density']
