In [None]:
# Tourism Attraction Recommendation System using Collaborative Filtering (SVD), 
# Content-Based Filtering (TF-IDF), and Hybrid Approach

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

# Load Dataset
df = pd.read_csv("tourism_data.csv")

# Preprocessing: Select Relevant Columns
selected_columns = ['UserId', 'AttractionId', 'Attraction', 'AttractionType', 'VisitModeName', 'Rating']
df = df[selected_columns]

# Handling Missing Values
df.dropna(inplace=True)

### COLLABORATIVE FILTERING (USING SVD) ###

# Prepare Data for Surprise Library
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['UserId', 'AttractionId', 'Rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Train SVD Model
svd = SVD()
svd.fit(trainset)

# Collaborative Filtering Recommendation
def collaborative_recommend(user_id, df, model, top_n=5):
    user_attractions = df[df['UserId'] == user_id]['AttractionId'].unique()
    all_attractions = df['AttractionId'].unique()
    unseen_attractions = [a for a in all_attractions if a not in user_attractions]
    
    predictions = [(a, model.predict(user_id, a).est) for a in unseen_attractions]
    recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)[:top_n]
    
    return df[df['AttractionId'].isin([rec[0] for rec in recommendations])][['Attraction', 'AttractionType']]

### CONTENT-BASED FILTERING ###

def content_based_recommend(user_attractions, df, top_n=5):
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(df['AttractionType'])
    similarity_matrix = cosine_similarity(tfidf_matrix)
    
    attraction_indices = df[df['Attraction'].isin(user_attractions)].index.tolist()
    scores = similarity_matrix[attraction_indices].mean(axis=0)
    
    recommended_indices = scores.argsort()[-top_n:][::-1]
    return df.iloc[recommended_indices][['Attraction', 'AttractionType']]

### HYBRID RECOMMENDATION ###

def hybrid_recommend(user_id, df, model, top_n=5):
    content_rec = content_based_recommend(df[df['UserId'] == user_id]['Attraction'].tolist(), df, top_n=top_n)
    collab_rec = collaborative_recommend(user_id, df, model, top_n=top_n)
    
    hybrid_rec = pd.concat([content_rec, collab_rec]).drop_duplicates().head(top_n)
    return hybrid_rec

# Example Usage
user_id = 101  # Replace with actual user ID

print("\nCollaborative Filtering Recommendations:")
print(collaborative_recommend(user_id, df, svd))

print("Content-Based Recommendations:")
print(content_based_recommend(['Eiffel Tower', 'Louvre Museum'], df))

print("\nHybrid Recommendations:")
print(hybrid_recommend(user_id, df, svd))
