In [1]:
import pandas as pd
import joblib


In [2]:
# Load the trained models
migration_model = joblib.load(r"C:\Users\Deshan\Documents\IIT LECS\DSGP Models\Migration model\models\migration_prediction_model.pkl")
location_model = joblib.load(r"C:\Users\Deshan\Documents\IIT LECS\DSGP Models\Migration model\models\location_prediction_model.pkl")
time_models = joblib.load(r"C:\Users\Deshan\Documents\IIT LECS\DSGP Models\Migration model\models\time_prediction_model.pkl")


In [3]:
def prepare_input_for_model(user_input, model_type):
    """
    Prepares user input based on model type by selecting only required features.
    """
    input_df = pd.DataFrame([user_input])

    # ✅ Define expected feature sets
    if model_type == "migration":
        required_features = migration_model.feature_names_in_
    elif model_type == "location":
        required_features = location_model.feature_names_in_
    elif model_type == "time":
        return user_input  # No preprocessing needed for time models

    # ✅ Ensure only trained features are passed to model
    input_df = input_df[required_features]

    return input_df


In [4]:
# ✅ Migration Prediction
def predict_migration(user_input):
    input_features = prepare_input_for_model(user_input, "migration")
    probability = migration_model.predict_proba(input_features)[:, 1]  # Get probability
    return {"migration_probability": probability[0]}

# ✅ Location Prediction
def predict_location(user_input):
    input_features = prepare_input_for_model(user_input, "location")
    probabilities = location_model.predict_proba(input_features)[:, 1]
    
    species_list = location_model.classes_  # Get species names
    sorted_species = sorted(zip(species_list, probabilities), key=lambda x: x[1], reverse=True)

    top_species = [f"{species} (Probability: {prob:.2f})" for species, prob in sorted_species[:5]]

    return {"top_species_predictions": top_species}

# ✅ Time Prediction
def predict_best_time(user_input):
    location_encoded = time_models['locality_encoder'].transform([user_input["location"]])[0]
    
    best_month = time_models['month_model'].predict([[location_encoded]])[0]
    best_day = time_models['day_model'].predict([[location_encoded]])[0]
    best_hour = time_models['hour_model'].predict([[location_encoded]])[0]

    return {"best_time": {"month": best_month, "day": best_day, "hour": best_hour}}


In [5]:
def process_user_query(user_input):
    response = {}

    # Check for Migration Prediction
    if "Year" in user_input and "LATITUDE" in user_input and "LONGITUDE" in user_input:
        response.update(predict_migration(user_input))

    # Check for Location Prediction (Bird Species)
    if "COMMON NAME_Blue-tailed Bee-eater" in user_input or "COMMON NAME_Red-vented Bulbul" in user_input:
        response.update(predict_location(user_input))

    # Check for Time Prediction (Best time for bird observation)
    if "location" in user_input:
        response.update(predict_best_time(user_input))

    return response


In [9]:
test_input_location = {
    "Year": 2025, "Month": 3, "Day": 12, "Day_of_Week": 2, "Hour": 7,
    "LATITUDE": 6.287, "LONGITUDE": 81.261,
    "COMMON NAME_Blue-tailed Bee-eater": 1, "COMMON NAME_White-throated Kingfisher": 0,"COMMON NAME_Red-vented Bulbul": 0
}

result_location = predict_location(test_input_location)
print("Location Prediction:", result_location)


Location Prediction: {'top_species_predictions': ['0 (Probability: 0.00)']}
