**Model for predicting both Scrcity and month**

In [3]:
import os
import random
import numpy as np
import pickle
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from flask import Flask, request, jsonify, send_file
import matplotlib.pyplot as plt
import io
from datetime import datetime, timedelta

app = Flask(__name__)

# Load trained RandomForest model & scalers
try:
    with open("scarcity_classifier.pkl", "rb") as f:
        clf = pickle.load(f)

    with open("label_encoders.pkl", "rb") as f:
        label_encoders = pickle.load(f)

    with open("scaler.pkl", "rb") as f:
        scaler = pickle.load(f)
except Exception as e:
    app.logger.error(f"Error loading models: {e}")
    raise

# Dictionary to cache loaded LSTM models
lstm_models = {}

def train_lstm_model(state, district):
    """Train and save an LSTM model dynamically if not found."""
    app.logger.info(f"Training new LSTM model for {state}, {district}...")

    # Generate synthetic training data (replace with actual data)
    num_samples = 100
    time_steps = 6
    features = 4

    X_train = np.random.rand(num_samples, time_steps, features)
    y_train = np.random.randint(12, 24, num_samples)  # Random scarcity months

    # Define the LSTM model
    model = Sequential([
        LSTM(50, activation='relu', return_sequences=True, input_shape=(time_steps, features)),
        LSTM(50, activation='relu'),
        Dense(1)  # Predict scarcity months
    ])

    model.compile(optimizer='adam', loss='mse')

    # Train the model
    model.fit(X_train, y_train, epochs=10, batch_size=16, verbose=0)

    # Save model
    model_path = f"lstm_scarcity_{state}_{district}.h5"
    model.save(model_path)
    app.logger.info(f"Saved new LSTM model: {model_path}")

    return model

@app.route("/predict", methods=["POST"])
def predict_scarcity():
    """Predict water scarcity and forecast when it will occur."""
    data = request.get_json()

    required_fields = ["State", "District", "Rainfall (mm)", "Groundwater Level (m)", "Temperature (°C)", "River Water Level (m)"]
    if not all(field in data for field in required_fields):
        return jsonify({"error": "Missing required fields"}), 400

    state = data["State"]
    district = data["District"]

    try:
        features = np.array([[float(data["Rainfall (mm)"]), float(data["Groundwater Level (m)"]), 
                              float(data["Temperature (°C)"]), float(data["River Water Level (m)"])]])
    except ValueError:
        return jsonify({"error": "Invalid numerical data for features"}), 400

    # Convert to DataFrame to apply scaling
    features_df = pd.DataFrame(features, columns=["Rainfall (mm)", "Groundwater Level (m)", "Temperature (°C)", "River Water Level (m)"])
    scaled_features = scaler.transform(features_df)

    # Predict scarcity occurrence using the classifier
    try:
        scarcity_prediction = clf.predict(scaled_features)[0]
    except Exception as e:
        app.logger.error(f"Error predicting scarcity: {e}")
        return jsonify({"error": "Prediction failed"}), 500

    # If no scarcity, return response immediately
    if scarcity_prediction == 0:
        return jsonify({
            "state": state,
            "district": district,
            "scarcity": False
        })

    # Load or train LSTM model
    model_key = (state, district)
    model_path = f"lstm_scarcity_{state}_{district}.h5"

    if model_key not in lstm_models:
        if os.path.exists(model_path):
            lstm_models[model_key] = load_model(model_path)
        else:
            lstm_models[model_key] = train_lstm_model(state, district)

    # Prepare input sequence for LSTM
    past_data = np.tile(scaled_features, (6, 1))  # Mock: Repeat input for 6 time steps
    input_seq = np.expand_dims(past_data, axis=0)  # Shape: (1, 6, 4)

    # Predict scarcity months using LSTM
    try:
        scarcity_months = int(lstm_models[model_key].predict(input_seq)[0][0])
        scarcity_months = max(12, min(24, scarcity_months))  # Ensure range 12-24
    except Exception as e:
        app.logger.error(f"LSTM prediction error: {e}")
        scarcity_months = random.randint(12, 24)  # Fallback random value

    return jsonify({
        "state": state,
        "district": district,
        "scarcity": True,
        "scarcity_months": scarcity_months
    })

# Load the dataset
df = pd.read_csv('water_scarcity.csv')

@app.route('/get_graphs', methods=['POST'])
def get_graphs():
    # Get user inputs from JSON payload
    data = request.get_json()
    state = data.get('state')
    district = data.get('district')

    if not state or not district:
        return jsonify({'error': 'Please provide both state and district'}), 400

    # Filter the dataset by state and district
    filtered_df = df[(df['State'] == state) & (df['District'] == district)]

    # Get the last two months from today's date
    end_date = datetime.today()
    start_date = end_date - timedelta(days=60)  # Last 2 months

    # Convert 'Date' to datetime type
    filtered_df['Date'] = pd.to_datetime(filtered_df['Date'])

    # Filter data for the last 2 months
    recent_data = filtered_df[filtered_df['Date'] >= start_date]

    if recent_data.empty:
        return jsonify({'error': 'No data found for the given state and district for the last 2 months'}), 404

    # Initialize list to store image files
    img_files = []

    # Plot graphs for each parameter
    parameters = ['Rainfall (mm)', 'Groundwater Level (m)', 'Temperature (°C)', 'River Water Level (m)']
    for param in parameters:
        plt.figure(figsize=(8, 5))
        plt.plot(recent_data['Date'], recent_data[param], marker='o', linestyle='-', color='b')
        plt.title(f'{param} over Last 2 Months')
        plt.xlabel('Date')
        plt.ylabel(param)
        plt.xticks(rotation=45)
        plt.grid(True)

        # Save plot to a BytesIO object
        img = io.BytesIO()
        plt.savefig(img, format='png')
        img.seek(0)
        img_files.append(img)
        plt.close()

    # Return all images as a zip file
    from zipfile import ZipFile
    import os

    # Create a temporary folder to store images
    temp_folder = 'temp_images'
    os.makedirs(temp_folder, exist_ok=True)

    # Save the images to files in the temporary folder
    img_paths = []
    for idx, img in enumerate(img_files):
        img_path = os.path.join(temp_folder, f'graph_{idx+1}.png')
        with open(img_path, 'wb') as f:
            f.write(img.getvalue())
        img_paths.append(img_path)

    # Create a zip file containing the images
    zip_filename = 'graphs.zip'
    with ZipFile(zip_filename, 'w') as zipf:
        for img_path in img_paths:
            zipf.write(img_path, os.path.basename(img_path))

    # Cleanup: remove the temporary image files
    for img_path in img_paths:
        os.remove(img_path)
    os.rmdir(temp_folder)

    # Send the zip file as a response
    return send_file(zip_filename, mimetype='application/zip', as_attachment=True, download_name='graphs.zip')

if __name__ == "__main__":
    app.run(debug=False)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
INFO:werkzeug:[33mPress CTRL+C to quit[0m
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Date'] = pd.to_datetime(filtered_df['Date'])
127.0.0.1 - - [01/Feb/2025 18:25:34] "POST /get_graphs HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Feb/2025 18:25:34] "POST /get_graphs HTTP/1.1" 200 -
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Date'] = pd.to_datetime(filtered_df['Date'])
127.0.0.1 - - [01/Feb/2025 18:25:45] "POST /get_graphs HTTP/1

**API for separate Scarcity and timing prediction**

In [2]:
import numpy as np
import pickle
import pandas as pd
import tensorflow as tf
from flask import Flask, request, jsonify,send_file
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import io
from datetime import datetime, timedelta

# Initialize Flask app
app = Flask(__name__)

# Load saved models
with open("scarcity_classifier.pkl", "rb") as f:
    rf_model = pickle.load(f)

with open("label_encoders.pkl", "rb") as f:
    label_encoders = pickle.load(f)

with open("scaler.pkl", "rb") as f:
    scaler = pickle.load(f)

# Features used for predictions
features = ["Rainfall (mm)", "Groundwater Level (m)", "Temperature (°C)", "River Water Level (m)"]
target_col = "Scarcity"

# Function to map forecast to month
def map_forecast_to_month(scarcity_forecast):
    if scarcity_forecast < 0.2:
        return "No immediate scarcity, expected in 12+ months"
    elif 0.2 <= scarcity_forecast < 0.5:
        return "Scarcity expected in 6–12 months"
    elif 0.5 <= scarcity_forecast < 0.8:
        return "Scarcity expected in 3–6 months"
    elif scarcity_forecast >= 0.8:
        return "Scarcity expected within the next 3 months"
    else:
        return "Invalid forecast value"

# Function to create sequences for LSTM model
def create_lstm_sequence(data, n_steps=6):
    seq = []
    for i in range(n_steps):
        seq.append(data.iloc[i][features + [target_col]].values)
    return np.array([seq])

# Endpoint for scarcity classification
@app.route("/predict_scarcity", methods=["POST"])
def predict_scarcity():
    try:
        # Get JSON data from request
        data = request.get_json()

        # Extract features
        input_data = np.array([[data[feature] for feature in features]])

        # Scale input
        input_data = scaler.transform(input_data)

        # Predict with Random Forest model
        prediction = rf_model.predict(input_data)[0]

        return jsonify({"scarcity_prediction": int(prediction)})

    except Exception as e:
        return jsonify({"error": str(e)})

# Endpoint for scarcity timing prediction using LSTM
@app.route("/predict_timing", methods=["POST"])
def predict_timing():
    try:
        # Get JSON data
        data = request.get_json()

        # Extract state & district
        state = data["State"]
        district = data["District"]

        # Encode state & district
        if state in label_encoders["State"].classes_ and district in label_encoders["District"].classes_:
            state_encoded = label_encoders["State"].transform([state])[0]
            district_encoded = label_encoders["District"].transform([district])[0]
        else:
            return jsonify({"error": "Invalid state or district"})

        # Load LSTM model for the state-district
        model_path = f"lstm_scarcity_{state}_{district}.h5"
        try:
            lstm_model = load_model(model_path)
        except:
            return jsonify({"error": "No LSTM model found for this location"})

        # Convert input data to a DataFrame (for consistency)
        df_input = pd.DataFrame(data["historical_data"])

        # Scale numerical data
        df_input[features] = scaler.transform(df_input[features])

        # Generate sequence for LSTM model
        X_input = create_lstm_sequence(df_input)

        # Ensure shape matches model input
        X_input = X_input.reshape(1, 6, len(features) + 1)  # Adjust shape if needed

        # Predict scarcity timing using LSTM model
        lstm_prediction = lstm_model.predict(X_input)[0][0]

        # Map the scarcity forecast to months
        forecast_month = map_forecast_to_month(lstm_prediction)

        return jsonify({
            "scarcity_forecast": float(lstm_prediction),
            "forecast_month": forecast_month
        })

    except Exception as e:
        return jsonify({"error": str(e)})

# Load the dataset
df = pd.read_csv('water_scarcity.csv')

@app.route('/get_graphs', methods=['POST'])
def get_graphs():
    # Get user inputs from JSON payload
    data = request.get_json()
    state = data.get('state')
    district = data.get('district')

    if not state or not district:
        return jsonify({'error': 'Please provide both state and district'}), 400

    # Filter the dataset by state and district
    filtered_df = df[(df['State'] == state) & (df['District'] == district)]

    # Get the last two months from today's date
    end_date = datetime.today()
    start_date = end_date - timedelta(days=60)  # Last 2 months

    # Convert 'Date' to datetime type
    filtered_df['Date'] = pd.to_datetime(filtered_df['Date'])

    # Filter data for the last 2 months
    recent_data = filtered_df[filtered_df['Date'] >= start_date]

    if recent_data.empty:
        return jsonify({'error': 'No data found for the given state and district for the last 2 months'}), 404

    # Initialize list to store image files
    img_files = []

    # Plot graphs for each parameter
    parameters = ['Rainfall (mm)', 'Groundwater Level (m)', 'Temperature (°C)', 'River Water Level (m)']
    for param in parameters:
        plt.figure(figsize=(8, 5))
        plt.plot(recent_data['Date'], recent_data[param], marker='o', linestyle='-', color='b')
        plt.title(f'{param} over Last 2 Months')
        plt.xlabel('Date')
        plt.ylabel(param)
        plt.xticks(rotation=45)
        plt.grid(True)

        # Save plot to a BytesIO object
        img = io.BytesIO()
        plt.savefig(img, format='png')
        img.seek(0)
        img_files.append(img)
        plt.close()

    # Return all images as a zip file
    from zipfile import ZipFile
    import os

    # Create a temporary folder to store images
    temp_folder = 'temp_images'
    os.makedirs(temp_folder, exist_ok=True)

    # Save the images to files in the temporary folder
    img_paths = []
    for idx, img in enumerate(img_files):
        img_path = os.path.join(temp_folder, f'graph_{idx+1}.png')
        with open(img_path, 'wb') as f:
            f.write(img.getvalue())
        img_paths.append(img_path)

    # Create a zip file containing the images
    zip_filename = 'graphs.zip'
    with ZipFile(zip_filename, 'w') as zipf:
        for img_path in img_paths:
            zipf.write(img_path, os.path.basename(img_path))

    # Cleanup: remove the temporary image files
    for img_path in img_paths:
        os.remove(img_path)
    os.rmdir(temp_folder)

    # Send the zip file as a response
    return send_file(zip_filename, mimetype='application/zip', as_attachment=True, download_name='graphs.zip')

# Run Flask app
if __name__ == "__main__":
    app.run(debug=False)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Date'] = pd.to_datetime(filtered_df['Date'])
127.0.0.1 - - [01/Feb/2025 18:22:16] "POST /get_graphs HTTP/1.1" 200 -


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 407ms/step


127.0.0.1 - - [01/Feb/2025 18:22:29] "POST /predict_timing HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Feb/2025 18:22:29] "POST /predict_timing HTTP/1.1" 200 -
127.0.0.1 - - [01/Feb/2025 18:22:36] "POST /predict_scarcity HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Feb/2025 18:22:36] "POST /predict_scarcity HTTP/1.1" 200 -
127.0.0.1 - - [01/Feb/2025 18:22:44] "POST /predict_scarcity HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Feb/2025 18:22:44] "POST /predict_scarcity HTTP/1.1" 200 -


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step


127.0.0.1 - - [01/Feb/2025 18:23:10] "POST /predict_timing HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Feb/2025 18:23:10] "POST /predict_timing HTTP/1.1" 200 -
