In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_squared_error
from scipy.sparse import csr_matrix

# -------------------------------
# 1. Load data
# -------------------------------
df = pd.read_csv(r"C:\Users\C Sutharsan\Downloads\GUVI class notes AIML\Capstone_project\Project_4\Final\Final_dataset.csv")
df = df.sample(frac=0.2, random_state=42).reset_index(drop=True)

# -------------------------------
# 2. Collaborative Filtering (CF)
# -------------------------------
user_type_matrix = df.pivot_table(index='UserId',
                                  columns='AttractionType',
                                  values='Rating',
                                  aggfunc='mean').fillna(0)

user_type_sparse = csr_matrix(user_type_matrix.values)
user_similarity = cosine_similarity(user_type_sparse, dense_output=False)

user_index_map = {uid: idx for idx, uid in enumerate(user_type_matrix.index)}

def recommend_cf(user_id, top_n=5):
    if user_id not in user_index_map:
        return []
    idx = user_index_map[user_id]
    sim_scores = list(enumerate(user_similarity[idx].toarray()[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:]
    similar_users = [user_type_matrix.index[i] for i, _ in sim_scores[:10]]
    similar_users_data = df[df['UserId'].isin(similar_users)]
    avg_ratings = similar_users_data.groupby('AttractionType')['Rating'].mean()
    top_attractions = avg_ratings.sort_values(ascending=False).head(top_n).index
    return df[df['AttractionType'].isin(top_attractions)]['Attraction'].unique().tolist()

# -------------------------------
# 3. Content-Based Filtering (CBF)
# -------------------------------
df['combined_features'] = df['AttractionType'].fillna('') + " " + df['Attraction'].fillna('')
tfidf = TfidfVectorizer()
feature_matrix = tfidf.fit_transform(df['combined_features'])

attraction_similarity = cosine_similarity(feature_matrix, dense_output=False)
attr_index_map = {atype: idx for idx, atype in enumerate(df['AttractionType'])}

def recommend_cbf(user_id, top_n=5):
    user_data = df[df['UserId'] == user_id]
    visited_ids = user_data['AttractionType'].tolist()
    sim_scores = np.zeros(attraction_similarity.shape[0])
    for aid in visited_ids:
        if aid in attr_index_map:
            idx = attr_index_map[aid]
            sim_scores += attraction_similarity[idx].toarray()[0]
    visited_idx = [attr_index_map[aid] for aid in visited_ids if aid in attr_index_map]
    sim_scores[visited_idx] = 0
    top_indices = sim_scores.argsort()[::-1][:top_n]
    return df.iloc[top_indices]['Attraction'].unique().tolist()

# -------------------------------
# 4. Hybrid Recommendation
# -------------------------------
def recommend_hybrid(user_id, top_n=5):
    cf_recs = recommend_cf(user_id, top_n=top_n*2)
    cbf_recs = recommend_cbf(user_id, top_n=top_n*2)
    combined = list(dict.fromkeys(cf_recs + cbf_recs))
    return combined[:top_n]

# -------------------------------
# 5. Evaluation Metrics
# -------------------------------
def mean_average_precision(actual_items, recommended_items):
    hits = 0
    sum_precisions = 0
    for i, rec in enumerate(recommended_items):
        if rec in actual_items:
            hits += 1
            sum_precisions += hits / (i + 1)
    return sum_precisions / max(1, min(len(actual_items), len(recommended_items)))

def evaluate_rmse(user_ids, recommend_func):
    y_true, y_pred = [], []
    for uid in user_ids:
        actual_ratings = df[df['UserId'] == uid]['Rating'].values
        # Simulate prediction: here using mean rating from recommendations
        recs = recommend_func(uid, top_n=5)
        if not recs:
            continue
        pred_rating = np.mean(actual_ratings)  # placeholder for CF prediction
        y_true.extend(actual_ratings)
        y_pred.extend([pred_rating] * len(actual_ratings))
    return np.sqrt(mean_squared_error(y_true, y_pred))

def evaluate_map(user_ids, recommend_func):
    scores = []
    for uid in user_ids:
        actual_items = df[df['UserId'] == uid]['Attraction'].tolist()
        recs = recommend_func(uid, top_n=5)
        if not recs:
            continue
        scores.append(mean_average_precision(actual_items, recs))
    return np.mean(scores)

# -------------------------------
# 6. Example Usage & Evaluation
# -------------------------------
test_user = df['UserId'].iloc[0]
print("CF Recommendations:", recommend_cf(test_user))
print("CBF Recommendations:", recommend_cbf(test_user))
print("Hybrid Recommendations:", recommend_hybrid(test_user))

test_users = df['UserId'].unique()[:20]  # sample users for eval
print("\nEvaluation Results:")
print("CF  - MAP:", evaluate_map(test_users, recommend_cf), "RMSE:", evaluate_rmse(test_users, recommend_cf))
print("CBF - MAP:", evaluate_map(test_users, recommend_cbf), "RMSE:", evaluate_rmse(test_users, recommend_cbf))
print("Hybrid - MAP:", evaluate_map(test_users, recommend_hybrid), "RMSE:", evaluate_rmse(test_users, recommend_hybrid))

# -------------------------------
# 6. Save Models
# -------------------------------
import pickle
with open("cf_data.pkl", "wb") as f:
    pickle.dump((user_type_matrix, user_similarity, user_index_map), f)

with open("cbf_data.pkl", "wb") as f:
    pickle.dump((tfidf, feature_matrix, attraction_similarity, attr_index_map), f)

print("✅ Models saved as cf_data.pkl and cbf_data.pkl")


CF Recommendations: ['Sacred Monkey Forest Sanctuary', 'Tegalalang Rice Terrace', 'Water Castle (Tamansari)', 'Malang City Square', 'Sempu Island']
CBF Recommendations: ['Sacred Monkey Forest Sanctuary', 'Tegalalang Rice Terrace']
Hybrid Recommendations: ['Sacred Monkey Forest Sanctuary', 'Tegalalang Rice Terrace', 'Water Castle (Tamansari)', 'Malang City Square', 'Sempu Island']

Evaluation Results:
CF  - MAP: 0.8267261904761904 RMSE: 0.46210568776705907
CBF - MAP: 0.6625 RMSE: 0.46210568776705907
Hybrid - MAP: 0.8133333333333332 RMSE: 0.46210568776705907
✅ Models saved as cf_data.pkl and cbf_data.pkl
