In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
from collections import defaultdict
import time
import ast
from typing import List, Dict, Set, Tuple
from sklearn.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

from IPython.display import display
from IPython.display import Image

class PlaceRecommender:
    def __init__(self):
        self.users_df = None
        self.places_df = None
        self.interactions_df = None
        self.place_features = None
        self.user_profiles = None
        # self.mlb = MultiLabelBinarizer()
        self.train_interactions = pd.DataFrame()
        self.test_interactions = pd.DataFrame()
        self.svd_model = None
        self.interaction_matrix = None
        self.user_factors = None
        self.place_factors = None
        # self.popularity = None ???? We can add this too for recommending highly popular places
    
    
    def preprocess_places_data(self, places_df):
        tags = list(set([i.strip() for i in ','.join(places_df['tags']).split(',')]))
        for tag in tags:
            places_df[tag] = places_df['tags'].apply(lambda x: 1 if tag in x else 0)
        category_features = pd.get_dummies(places_df['category'], prefix='category', dtype=int)
        tags.extend(list(category_features.columns.values))
        places_df = pd.concat([places_df, category_features], axis=1)
        scaler = StandardScaler()
        place_features = places_df[tags]
        place_features = pd.DataFrame(
            scaler.fit_transform(place_features),
            index = place_features.index,
            columns=place_features.columns
        )
        
        return places_df, place_features


    def preprocess_user_interaction_data(self, user_interactions_df):
        interest_strength = {"like": 1,
                             "visit" : 3,
                             "add_to_list": 2
                             }
        
        user_interactions_df['weighted_interaction'] = user_interactions_df['interaction_type'].apply(lambda x: interest_strength[x])
        # Maybe interaction matrix?
        return user_interactions_df
    
    def preprocess_users_data(self, user_df):
        user_df['list_of_places'] = user_df['list_of_places'].apply(lambda x: ast.literal_eval(x))
        return user_df


    def load_and_preprocess_data(self, users_data, places_data, interactions_data, test_size=0.2):
        """Load and preprocess all necessary data."""
        # Load dataframes
        self.users_df = self.preprocess_users_data(users_data)
        self.places_df, self.place_features = self.preprocess_places_data(places_data)
        self.interactions_df = self.preprocess_user_interaction_data(interactions_data)
        grouped_user_interactions_df = self.interactions_df.groupby('user_id')
        for _, group in grouped_user_interactions_df:
            if len(group) > 2: #We would need to worry about zero start users.
                train, test = train_test_split(group, test_size=test_size, random_state=42)
                # display(train, test)
                self.train_interactions = pd.concat([self.train_interactions, train])
                self.test_interactions = pd.concat([self.test_interactions, test])
                
        user_indices = self.interactions_df['user_id'].astype('category').cat.codes #we might need this for data that is not sequential, right now is fine
        place_indices = self.interactions_df['place_id'].astype('category').cat.codes
        # display(place_indices, place_indices.unique().shape)

        self.interaction_matrix = csr_matrix((self.interactions_df['weighted_interaction'], (user_indices, place_indices)), shape=(len(user_indices.unique()), len(place_indices)))
        # display(self.interaction_matrix.shape)
        self.svd_model = TruncatedSVD(n_components=50, random_state=42)
        self.user_factors = self.svd_model.fit_transform(self.interaction_matrix)
        self.place_factors = self.svd_model.components_.T
        
        ### We can also add how popular a place is?
        

    
        
    
    def build_user_profiles(self):
        
        self.user_profiles = defaultdict(lambda: np.zeros(self.place_features.shape[1]))
        
        # Process explicit interactions (likes and add_to_list)
        for _, row in self.interactions_df.iterrows():
            user_id = row['user_id']
            place_id = row['place_id']
            interaction_type = row['interaction_type']
            
            # Weight different interaction types
            weight = row['weighted_interaction']
            
            # Update user profile
            if place_id in self.places_df.index:
                place_feature_values = self.place_features.iloc[place_id-1].values.astype(np.float64)
                self.user_profiles[user_id] += weight * place_feature_values
        
        # Normalize
        for user_id in self.user_profiles:
            profile = self.user_profiles[user_id]
            if np.any(profile):
                self.user_profiles[user_id] = profile / np.linalg.norm(profile)
    
    def get_recommendations(self, user_id, n_recommendations= 1):
        """Weight to cf and similarities is 0.4 and 0.6 respectively, these hyperparameters can be tuned to find a better answer"""
        start_time = time.time()
        
        if user_id not in self.user_profiles:
            return []
        
        # Calculate similarity between user profile and all places
        cf_scores = np.zeros(len(self.places_df))
        user_profile = self.user_profiles[user_id]
        similarities = cosine_similarity([user_profile], self.place_features)[0]
        place_indices = self.interactions_df['place_id'].astype('category').cat.codes.unique()
        user_idx = self.interactions_df['user_id'].astype('category').cat.codes[self.interactions_df['user_id'] == user_id].iloc[0]
        cf_predictions = np.dot(self.user_factors[user_idx], self.place_factors.T)
        for idx, score in zip(place_indices, cf_predictions):
            cf_scores[idx] = score
        
        # display(similarities.shape, cf_scores.shape)
        combined_scores = 0.5 * similarities + 0.5 * cf_scores
        
        # Get places the user hasn't interacted with
        user_interactions = set(self.train_interactions[self.train_interactions['user_id'] == user_id]['place_id'])
        
        # Create list of (place_id, similarity) tuples for places user hasn't interacted with
        place_similarities = [
            (place_id, sim) for place_id, sim in enumerate(combined_scores, 1)
            if place_id not in user_interactions
        ]
        
        # Sorting by similarity
        recommendations = sorted(place_similarities, key=lambda x: x[1], reverse=True)[:n_recommendations]
        # print(recommendations)
        
        detailed_recommendations = []
        for place_id, similarity in recommendations:
            place = self.places_df.iloc[place_id-1]
            detailed_recommendations.append({
                'place_id': place_id,
                'place_name': place['place_name'],
                'category': place['category'],
                'tags': place['tags'],
                'location': place['location'],
                'similarity_score': similarity,
                'response_time': time.time() - start_time
            })
        # display(detailed_recommendations)
        return detailed_recommendations

    def evaluate_precision_at_k(self, test_users, k):
        precisions = []
        
        for user_id in self.test_interactions['user_id'].unique():
            recommendations = self.get_recommendations(user_id, k)
            if not recommendations:
                continue
                
            # Get test set places for this user
            test_places = set(self.test_interactions[self.test_interactions['user_id'] == user_id]['place_id'])
            
            # Calculate precision
            recommended_places = {rec['place_id'] for rec in recommendations}
            if recommended_places:
                precision = len(test_places.intersection(recommended_places)) / len(recommended_places)
                precisions.append(precision)
        return np.mean(precisions) if precisions else 0.0

    def evaluate_recall_at_k(self, test_users, k):
        recalls = []
        
        for user_id in self.test_interactions['user_id'].unique():
            recommendations = self.get_recommendations(user_id, k)
            if not recommendations:
                continue
                
            # Get test set places for this user
            test_places = set(self.test_interactions[self.test_interactions['user_id'] == user_id]['place_id'])
            
            if test_places:
                recommended_places = {rec['place_id'] for rec in recommendations}
                recall = len(test_places.intersection(recommended_places)) / len(test_places)
                recalls.append(recall)
        
        return np.mean(recalls) if recalls else 0.0

    def evaluate_map(self, test_users, k):
        ap_scores = []
        
        for user_id in self.test_interactions['user_id'].unique():
            recommendations = self.get_recommendations(user_id, k)
            if not recommendations:
                continue
                
            test_places = set(self.test_interactions[self.test_interactions['user_id'] == user_id]['place_id'])
            
            if not test_places:
                continue
            relevant_count = 0
            precisions = []
            
            for i, rec in enumerate(recommendations, 1):
                if rec['place_id'] in test_places:
                    relevant_count += 1
                    precisions.append(relevant_count / i)
            
            if precisions:
                ap_scores.append(sum(precisions) / len(test_places))
        
        return np.mean(ap_scores) if ap_scores else 0.0


    def evaluate_response_time(self, test_users, k):
        response_times = []
        
        for user_id in test_users:
            start_time = time.time()
            self.get_recommendations(user_id, n_recommendations=k)
            response_time = time.time() - start_time
            response_times.append(response_time)
        
        return {
            'mean_response_time': np.mean(response_times),
            'max_response_time': np.max(response_times),
            'min_response_time': np.min(response_times),
            'std_response_time': np.std(response_times)
        }

    def run_comprehensive_evaluation(self, test_users, k=1):
        results = {
            'precision_at_k': self.evaluate_precision_at_k(test_users, k),
            'recall_at_k': self.evaluate_recall_at_k(test_users, k),
            'mean_average_precision': self.evaluate_map(test_users, k),
            'response_time_metrics': self.evaluate_response_time(test_users, k)
        }
        return results

def main():
    # Initialize recommender
    k = 4
    recommender = PlaceRecommender()
    
    # Load and preprocess data
    places_df = pd.read_csv("/kaggle/input/travel/places.csv")
    user_interactions_df = pd.read_csv("/kaggle/input/travel/user_interactions.csv")
    users_df = pd.read_csv("/kaggle/input/travel/users.csv")
    
    recommender.load_and_preprocess_data(users_df, places_df, user_interactions_df)
    recommender.build_user_profiles()
    test_users = list(range(1,5)) 
    
    evaluation_results = recommender.run_comprehensive_evaluation(test_users, k)
    # display(evaluation_results)
    # Print evaluation results
    print("\nEvaluation Results:")
    print(f"Precision@{k}: {evaluation_results['precision_at_k']:.3f}")
    print(f"Recall@{k} {evaluation_results['recall_at_k']:.3f}")
    print(f"Mean Average Precision: {evaluation_results['mean_average_precision']:.3f}")
    print("\nResponse Time Metrics:")
    for metric, value in evaluation_results['response_time_metrics'].items():
        print(f"{metric}: {value*1000:.2f}ms")

if __name__ == "__main__":
    main()    