In [1]:
import sys
import os
# Add the directory containing 'package_folder' to the Python path
sys.path.append(os.path.abspath('..'))


In [2]:
# The package_folder is a package, so we don't need to import it directly.
# Instead, we will import the specific modules we need.
from package_folder.scaling_pipeline import transform_user_inputs  # Make sure these are implemented
from package_folder.weighted_sum import weighted_sum  # Make sure these are implemented

In [3]:
#real example of user inputs passed fron the frontend
user_inputs = {
"climate_preference":"hot",
"climate_importance":2,
"cost_of_living_importance":3,
"max_monthly_budget":2000,
"healthcare_importance":4,
"safety_importance":5,
"internet_speed_importance":6}

In [4]:
import pickle
# After loading the pipeline
with open('../models/scaling_pipeline.pkl', 'rb') as f:
    pipe = pickle.load(f)
        
    # Inspect pipeline components
print("Pipeline steps:", pipe.named_steps)
    # If it's a ColumnTransformer, check the columns it expects
if hasattr(pipe, 'transformers_'):
    for name, transformer, columns in pipe.transformers_:
        print(f"Transformer '{name}' applied to columns: {columns}")

Pipeline steps: {'column_transformer': ColumnTransformer(transformers=[('minmax', MinMaxScaler(),
                                 Index(['average_monthly_cost_$', 'average_yearly_temperature',
       'internet_speed_mbps', 'safety_index', 'Healthcare Index'],
      dtype='object'))])}


In [5]:
processed_inputs = transform_user_inputs(user_inputs)
processed_inputs

{'climate_preference': 0.7702265372168284,
 'cost_of_living_preference': 0.053697885877613705,
 'healthcare_preference': 1.0,
 'safety_preference': 1.0,
 'internet_speed_preference': 0.9999123063431746,
 'climate_importance': 0.2,
 'cost_of_living_importance': 0.3,
 'healthcare_importance': 0.4,
 'safety_importance': 0.5,
 'internet_speed_importance': 0.6}

In [8]:
import pandas as pd
data = pd.read_csv("../raw_data/merged_country_level/scaled_merged_data_after_imputation.csv")
result_df = weighted_sum(data, processed_inputs)


In [9]:
result_df

Unnamed: 0,country,country_score
0,singapore,0.89783
1,united arab emirates,0.880842
2,hong kong,0.854372
3,taiwan,0.846616
4,japan,0.784453


In [17]:
def recommend_countries(user_inputs):
    
    # Step 1: Convert user inputs to numerical weights
    processed_inputs = transform_user_inputs(user_inputs)

    # Step 2: Score countries based on user preferences
    data = pd.read_csv("../raw_data/merged_country_level/scaled_merged_data_after_imputation.csv")
    result_df = weighted_sum(data, processed_inputs)

    # Step 3: Return top 5 countries (currently the weighted sum is already returning the top 5 countries sorted)
    #top_5 = result_df.sort_values(by="country_user_score", ascending=False).head(5)

    return result_df.to_dict(orient="records")

recommend_countries(user_inputs)

[{'country': 'singapore', 'country_score': 0.8978296204706148},
 {'country': 'united arab emirates', 'country_score': 0.8808420792797913},
 {'country': 'hong kong', 'country_score': 0.854371564744755},
 {'country': 'taiwan', 'country_score': 0.84661585175841},
 {'country': 'japan', 'country_score': 0.7844529538547678}]