In [7]:
import joblib
import pandas as pd
import numpy as np
import re
import datetime
from difflib import get_close_matches
from rapidfuzz import process

# ✅ Load the trained model and encoders
model_path = r'C:\Users\Deshan\Documents\IIT LECS\Year 2 Sem 1\DSGP\Git hub\FeatherFind\Migration model\models\time_prediction_model.pkl'
model_data = joblib.load(model_path)

month_model = model_data['month_model']
hour_model = model_data['hour_model']
selected_features = model_data['selected_features']
label_encoders = model_data['label_encoders']

# ✅ Locality and Bird Name Handling
valid_localities = [
    "Buckingham Place Hotel Tangalle", "Bundala NP General", "Bundala National Park", 
    "Kalametiya", "Tissa Lake", "Yala National Park General", "Debarawewa Lake"
]

valid_bird_names = ["Blue-tailed Bee-eater", "Red-vented Bulbul", "White-throated Kingfisher"]

bird_aliases = {
    "blue tailed bird": "Blue-tailed Bee-eater",
    "bee eater": "Blue-tailed Bee-eater",
    "blue bird": "Blue-tailed Bee-eater",
    "red bird": "Red-vented Bulbul",
    "bulbul": "Red-vented Bulbul",
    "white bird": "White-throated Kingfisher",
    "kingfisher": "White-throated Kingfisher"
}

season_aliases = {
    "summer": "Is_Summer",
    "winter": "Is_Winter",
    "spring": "Is_Spring",
    "autumn": "Is_Autumn"
}

time_period_aliases = {
    "morning": "Is_Morning",
    "afternoon": "Is_Afternoon",
    "evening": "Is_Evening",
    "night": "Is_Night"
}

# ✅ Function: Handle Bird Name Variations
def correct_bird_name(name):
    name = name.lower()
    if name in bird_aliases:
        return bird_aliases[name]

    matches = get_close_matches(name, [b.lower() for b in valid_bird_names], n=1, cutoff=0.3)
    if matches:
        return next(b for b in valid_bird_names if b.lower() == matches[0])  
    return "Unknown Bird"

# ✅ Function: Handle Locality Variations
def correct_locality(user_input):
    """
    Improves locality matching with exact, partial, and fuzzy matches.
    """
    user_input = user_input.lower()

    # ✅ 1. Check for exact match
    for loc in valid_localities:
        if user_input == loc.lower():
            return loc

    # ✅ 2. Check for partial matches
    best_match = process.extractOne(user_input, valid_localities, score_cutoff=80)
    if best_match:
        return best_match[0]  # Return the closest valid match

    return "Unknown Location"
# ✅ Function: Convert Day Name to Integer
def day_name_to_int(day_name):
    days_map = {
        "monday": 0, "tuesday": 1, "wednesday": 2, "thursday": 3, 
        "friday": 4, "saturday": 5, "sunday": 6
    }
    return days_map.get(day_name.lower(), None)

# ✅ Function: Extract Features from Query
def extract_query_features(query):
    query = query.lower()
    
    # Extract Year
    year_match = re.search(r'\b(20[0-9]{2})\b', query)
    year = int(year_match.group()) if year_match else 2025

    # Extract Day of Week
    day_name_match = re.search(r'\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b', query)
    day_of_week = day_name_to_int(day_name_match.group()) if day_name_match else datetime.datetime.today().weekday()

    # Extract Bird Name
    bird_name_match = re.search(r'\b(?:' + '|'.join([b.lower().replace("-", ".*") for b in valid_bird_names]) + r')\b', query)
    bird_name = correct_bird_name(bird_name_match.group()) if bird_name_match else None

    locality_match = next((loc for loc in valid_localities if loc.lower() in query), None)
    locality = correct_locality(locality_match) if locality_match else "Unknown Location"

    # Extract Season
    season_flags = {season: 0 for season in season_aliases.values()}
    for season, flag in season_aliases.items():
        if season in query:
            season_flags[flag] = 1

    if sum(season_flags.values()) == 0:
        print("⚠️ The season you are expecting to see the bird is necessary to run the model. Please select a season:\nSummer\nWinter\nSpring\nAutumn")
        user_season = input("Enter the season: ").strip().lower()
        if user_season in season_aliases:
            season_flags[season_aliases[user_season]] = 1

    # Extract Time Period
    time_period_flags = {time: 0 for time in time_period_aliases.values()}
    for time, flag in time_period_aliases.items():
        if time in query:
            time_period_flags[flag] = 1

    if sum(time_period_flags.values()) == 0:
        print("⚠️ The time period you are expecting to see the bird is necessary to run the model. Please select a time period:\nMorning\nAfternoon\nEvening\nNight")
        user_time_period = input("Enter the time period: ").strip().lower()
        if user_time_period in time_period_aliases:
            time_period_flags[time_period_aliases[user_time_period]] = 1

    if locality == "Unknown Location":
        print("⚠️ A location should be entered to run the models. Please select a location from the list below:")
        print("\n".join(valid_localities))
        locality = input("Enter the correct location: ").strip()
        locality = correct_locality(locality)

    if bird_name is None or bird_name == "Unknown Bird":
        print("⚠️ A bird species should be entered to run the models. Please select a bird species from the list below:")
        print("\n".join(valid_bird_names))
        bird_name = input("Enter the correct bird species: ").strip()
        bird_name = correct_bird_name(bird_name)

    return {
        "year": year,
        "day_of_week": day_of_week,
        "locality": locality,
        "bird_name": bird_name,
        **season_flags,
        **time_period_flags
    }

# ✅ Function: Predict Best Time for Birdwatching
def predict_best_time(query):
    features = extract_query_features(query)

    # Encode categorical features
    locality_encoded = label_encoders['LOCALITY'].transform([features["locality"]])[0]
    bird_name_encoded = label_encoders['COMMON NAME'].transform([features["bird_name"]])[0]

    # Create input DataFrame
    input_data = pd.DataFrame([[1, features["year"], features["day_of_week"],
                                 locality_encoded, bird_name_encoded, 
                                 features["Is_Summer"], features["Is_Winter"], features["Is_Spring"], features["Is_Autumn"],
                                 features["Is_Morning"], features["Is_Afternoon"], features["Is_Evening"], features["Is_Night"]]],
                              columns=selected_features)

    # Predict best month & hour
    predicted_month = int(round(month_model.predict(input_data)[0]))
    predicted_hour = int(round(hour_model.predict(input_data)[0]))

    return f"✅ Best Time for Birdwatching:\n📅 Month: {predicted_month}\n⏰ Hour: {predicted_hour}:00"




In [9]:
test_query = "When is the best time to see a Blue-tailed Bee-eater in Bundala NP General  during a summer morning on sunday in 2025?"
result = predict_best_time(test_query)
print(result)

✅ Best Time for Birdwatching:
📅 Month: 7
⏰ Hour: 7:00
