### Goals of this notebook

first, we need to define clearly how the user inputs will look like,
 
then, we will be able to code a function that takes the user inputs and returns a pandas df with an extra ["country_user_score"] column

but before coding the actual python scripts, it's easier to just simulate that flow in a notebook

user inputs structure:
1. climate
    climate preference -> selects category between [cold, warm, hot]
    climate - how important is this for you? -> slider between 0 and 10
2. safety
    safety -> [average, safe, very safe]
    safety - how important is this for you? -> slider
3. healthcare
    healthcare quality -> [average, good, great]
    healthcare quality - how important is this for you? -> slider
4. cost of living
    Monthly cost preference -> [cheap, medium, expensive]
    cost of living - how important is this for you? -> slider
    optional: filter by a monthly cost budget -> give a max monthly budget in dollars
5. Internet speed
    Internet speed preference -> [medium, fast, very fast]
    internet speed - how important is this for you? -> slider


------

*structure of the API endpoint*
1. user sends get request to the API with user inputs as parameters
2. API function calls the "process_user_input" class / module
3. "process_user_input" module takes user inputs and processes to make them "understandable" to the dataset data and ready to insert in the weighted sum
4. API function calls the "weighted_sum" class / module
5. "weighted_sum" class / module takes the numerical inputs and calculates the the ["country_user_score"] column for each country - returns the pandas df
6. lastly, API function returns the head(5) of the df sorted by country_user_score

## Pipeline

In [3]:
example_user_inputs = {
    "climate_preference":"hot",
    "climate_importance":10,
    "cost_of_living_preference": None,
    "cost_of_living_importance": 10,
    "max_monthly_budget": None,
    "healthcare_preference":None, 
    "healthcare_importance":5,
    "safety_preference": None,
    "safety_importance": 8,
    "internet_speed_preference": None,
    "internet_speed_importance": 8
}

In [None]:
def map_preference(preference: str, preference_type: str) -> float:
    """
    Maps a user preference to a normalized value based on its type.
    
    Args:
        preference: The user preference as a string.
        preference_type: The type of preference (e.g., "climate", "cost_of_living", etc.)
        
    Returns:
        A normalized float value for the preference.
    """
    if preference_type == "climate":
        if preference == "hot":
            return 25.0
        elif preference == "mild":
            return 18.0
        elif preference == "cold":
            return 11.0
        else:
            raise ValueError(f"Invalid climate preference: '{preference}'. Expected one of: 'hot', 'mild', 'cold'")

    elif preference_type == "cost_of_living":
        if preference == "low":
            return 400.0
        elif preference == "moderate":
            return 800.0
        elif preference == "high":
            return 2500.0
        else:
            raise ValueError(f"Invalid cost of living preference: '{preference}'. Expected one of: 'low', 'moderate', 'high'")

    elif preference_type == "healthcare":
        if preference == "excellent":
            return 75.0
        elif preference == "good":
            return 65.0
        elif preference == "fair":
            return 50.0
        else:
            raise ValueError(f"Invalid healthcare preference: '{preference}'. Expected one of: 'excellent', 'good', 'fair'")

    elif preference_type == "safety":
        if preference == "very_safe":
            return 75.0
        elif preference == "safe":
            return 65.0
        elif preference == "moderate":
            return 50.0
        else:
            raise ValueError(f"Invalid safety preference: '{preference}'. Expected one of: 'very_safe', 'safe', 'moderate'")

    elif preference_type == "internet_speed":
        if preference == "fast":
            return 200.0
        elif preference == "moderate":
            return 100.0
        elif preference == "slow":
            return 50.0
        else:
            raise ValueError(f"Invalid internet speed preference: '{preference}'. Expected one of: 'fast', 'moderate', 'slow'")
    else:
        raise ValueError(f"Invalid preference type: '{preference_type}'. Did you mean 'climate', 'cost_of_living', 'healthcare', 'safety', or 'internet_speed'?")
    return None  # Fallback for any unknown preference types

In [None]:
import numpy as np
from typing import Dict, Any, Optional

def transform_user_inputs(user_input_dict: Dict[str, Any]) -> Dict[str, float]:
    """
    Transform user input dictionary recieved from the frontend into normalized values suitable for model / simple algorithm prediction.
    
    Args:
        user_input_dict: Dictionary with user preferences and importance ratings
        
    Returns:
        Dictionary with normalized preference values and importance weights
    """
    normalized_inputs = {}
    
    # Step 1: Transform categorical preferences to numerical values
    # Mapping each preference using the helper function. We  use get() with a default value for each preference in case the key is missing
    normalized_inputs["climate_preference"] = map_preference(
        user_input_dict.get("climate_preference", 20.7), "climate"
    )
    normalized_inputs["cost_of_living_preference"] = map_preference(
        user_input_dict.get("cost_of_living_preference", 300.0), "cost_of_living"
    )
    normalized_inputs["healthcare_preference"] = map_preference(
        user_input_dict.get("healthcare_preference", 82.78), "healthcare"
    )
    normalized_inputs["safety_preference"] = map_preference(
        user_input_dict.get("safety_preference", 84.5), "safety"
    )
    normalized_inputs["internet_speed_preference"] = map_preference(
        user_input_dict.get("internet_speed_preference", 345.3), "internet_speed"
    )
    
    # Step 2: Apply transformation pipeline to all preference values
    # This will apply pre-fitted min-max scaling to the preference values
    # transform_pipeline is assumed to be imported from scaling_pipeline.py , but no script coded yet
    preferences_to_transform = {
         "climate_preference": normalized_inputs["climate_preference"],
         "cost_of_living_preference": normalized_inputs["cost_of_living_preference"],
         "healthcare_preference": normalized_inputs["healthcare_preference"],
         "safety_preference": normalized_inputs["safety_preference"],
         "internet_speed_preference": normalized_inputs["internet_speed_preference"]
    }
     
    # Apply the transformation
    transformed_preferences = transform_pipeline.transform(preferences_to_transform)
    
    # Update normalized inputs with transformed values
    for key, value in transformed_preferences.items():
        normalized_inputs[key] = value
    
    # Step 3: Scale importance values between 0 and 1
    importance_keys = [k for k in user_input_dict.keys() if k.endswith("_importance")]
    for key in importance_keys:
        if user_input_dict[key] is not None:
            normalized_inputs[key] = user_input_dict[key] / 10.0
        else:
            normalized_inputs[key] = 0.5  # Default importance if None
    
    # Step 4: Handle max_monthly_budget 
    # if user_input_dict.get("max_monthly_budget") is not None:
    #     # Apply same transformation as cost_of_living but remember pipeline
    #     # was not fitted on this column
    #     # Would need additional logic to properly scale this value
    #     # normalized_inputs["max_monthly_budget"] = transform_cost_of_living(
    #     #     user_input_dict["max_monthly_budget"]
    #     # )
    # else:
    #     # Default to a reasonable budget
    #     # normalized_inputs["max_monthly_budget"] = 0.5  # Mid-range scaled value
    
    return normalized_inputs   


In [None]:
# Save for later
    # internet_speed_pref = user_input_dict["internet_speed_preference"]
    
    # if internet_speed_pref == 'average':
    #     normalized_inputs["safety_preference"] = 0.5
    # elif internet_speed_pref == 'safe':
    #     normalized_inputs["safety_preference"] = 0.75
    # elif internet_speed_pref == 'very safe':
    #     normalized_inputs["safety_preference"] = 1.0
    # else:
    #     normalized_inputs["safety_preference"] = 1.0  # Default value if no preference is provided

    # return normalized_inputs