In [None]:
import pandas as pd

raw_data = pd.read_csv('../data/Worldwide Travel Cities Dataset (Ratings and Climate).csv')
print("Cities dataset loaded successfully")
display(raw_data.head())

In [None]:
# Check for missing values
print("=== Missing Values ===")
print(raw_data.isnull().sum())

In [None]:
import numpy as np
# Define weight configuration for different travel themes
weight_config = {
    'Relaxation': {'wellness': 0.4, 'seclusion': 0.4, 'beaches': 0.2},
    'Shopping': {'urban': 0.5, 'cuisine': 0.3, 'nightlife': 0.2},
    'Adventure': {'adventure': 0.6, 'nature': 0.3, 'seclusion': 0.1},
    'Cultural': {'culture': 0.7, 'urban': 0.2, 'cuisine': 0.1},
    'Food': {'cuisine': 0.7, 'nightlife': 0.2, 'urban': 0.1},
    'Nature': {'nature': 0.6, 'seclusion': 0.3, 'adventure': 0.1}
}

def build_ideal_vector(theme, features_list, config):
    vec = np.zeros(len(features_list))
    
    theme_weights = config.get(theme, {})
    
    for i, feat in enumerate(features_list):
        if feat in theme_weights:
            vec[i] = theme_weights[feat] * 10
    
    return vec

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors

# Select features for recommendation
features = ['culture', 'adventure', 'nature', 'beaches', 'nightlife', 'cuisine', 'wellness', 'urban', 'seclusion']
X_cities = raw_data[features]

# Normalize features
scaler = MinMaxScaler()
X_cities_scaled = scaler.fit_transform(X_cities)

knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(X_cities_scaled)

budget_map = {
    'Low': 1, 'Medium': 2, 'High': 3, # From theme model
    'Budget': 1, 'Mid-range': 2, 'Luxury': 3
}

def get_initial_recommendations(predicted_theme, user_budget, top_n=5):
    # Define ideal vector based on theme weights
    ideal_vector = build_ideal_vector(predicted_theme, features, weight_config)
    ideal_scales = scaler.transform([ideal_vector])
    distance, indices = knn.kneighbors(ideal_scales, n_neighbors=30)
    
    candidates = raw_data.iloc[indices[0]].copy()
    
    def calculate_budget_fit(city_budget):
        city_val = budget_map.get(city_budget, 2)
        user_budget_val = budget_map.get(user_budget, 2)

        if city_val == user_budget_val:
            return 1.5
        elif user_budget_val > city_val:
            return 1.2
        else:
            return 0.8
    
    candidates['budget_score'] = candidates['budget_level'].apply(calculate_budget_fit)
    
    candidates['similarity'] = 1 - distance[0]
    candidates['final_score'] = candidates['similarity'] * candidates['budget_score']
    
    return candidates.sort_values(by='final_score', ascending=False).head(top_n)


In [None]:
import json

# Include temperature data

climate_modes = {
    'Cold': (float('-inf'), 5),
    'Cool': (5, 18),
    'Pleasant': (18, 26),
    'Hot': (26, float('inf'))
}

def get_monthly_temp(temp_json, month):
    try:
        data = json.loads(temp_json)
        return data.get(str(month), {}).get('avg', None)
    except:
        return None
    
def get_climate_label(temp, modes):
    if temp is None: return 'Unknown'
    
    label = next((k for k , (low, high) in modes.items() if low <= temp < high), 'Other')
    return label
    
def get_city_climate_calendar(city_row, climate_modes):
    calendar = {'Cold': [], 'Cool': [], 'Pleasant': [], 'Hot': []}
    
    for m in range(1, 13):
        temp = get_monthly_temp(city_row['avg_temp_monthly'], m)
        if temp is None: continue
        
        label = get_climate_label(temp, climate_modes)
        if label in calendar:
            calendar[label].append({'month': m, 'temp': temp})
    
    return calendar

In [None]:
def get_complete_recommendations(predicted_theme, user_budget, top_n=5):
    initial_recs = get_initial_recommendations(predicted_theme, user_budget, top_n=top_n)
    
    city_recommendations = []
    for _, row in initial_recs.iterrows():
        climate_calendar = get_city_climate_calendar(row)
        rec = {
            'city': row['city'],
            'country': row['country'],
            'region': row['region'],
            'short_description': row['short_description'],
            'budget_level': row['budget_level'],
            'climate_calendar': climate_calendar
        }
        city_recommendations.append(rec)

    return city_recommendations

In [None]:
get_complete_recommendations('Cultural', 'Medium')

In [None]:
import joblib
import os

os.makedirs('../../ml_logic/models/city_recommend/', exist_ok=True)

joblib.dump(scaler, '../../ml_logic/models/city_recommend/city_scaler.pkl')
joblib.dump(knn, '../../ml_logic/models/city_recommend/city_knn_model.pkl')

print('City recommendation components are saved successfully!')