In [1]:
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np

In [2]:
events = pickle.load(open("../model/events.pkl", 'rb'))
users = pickle.load(open("../model/users.pkl", 'rb'))

In [3]:
users_event = pickle.load(open("../model/users_event.pkl", 'rb'))

In [5]:
event_similarity = pickle.load(open("../model/events_similarity.pkl", 'rb'))
users_similarity = pickle.load(open("../model/users_similarity.pkl", 'rb'))

In [6]:
events_vector = pickle.load(open("../model/events_vector.pkl", 'rb'))
users_vector = pickle.load(open("../model/users_vector.pkl", 'rb'))

In [7]:
users.head(2)

Unnamed: 0,user_id,name,key
0,68e0e01c6fbdb8a90b76129b,Jacob Jennings,Game Development Blockchain FinTech Cloud Comp...
1,68e0e01c6fbdb8a90b76129c,Mrs. Julie Martinez,Sports Entrepreneurship DevOps Robotics Cultur...


In [8]:
events.head(2)

Unnamed: 0,event_id,key
0,68e0d68abee8a0e226e6fe96,versatile homogeneous interface jaclynborough ...
1,68e0d68abee8a0e226e6fe97,intuitive empowering orchestration new mariabe...


In [9]:
def similar_user(user_id):
    index = users[users['user_id']== user_id].index[0]
    distances = users_similarity[index]
    users_list = sorted(list(enumerate(distances)),reverse=True , key=lambda x:x[1])[1:6]

    similar_users=[]
    for i in users_list:
        similar_users.append(users.iloc[i[0]].user_id)

    return similar_users

In [10]:
a=similar_user('68e0e01c6fbdb8a90b76129b')

In [11]:
print(a)

['68e0e01c6fbdb8a90b761330', '68e0e01c6fbdb8a90b7613cf', '68e0e01c6fbdb8a90b761434', '68e0e01c6fbdb8a90b761319', '68e0e01c6fbdb8a90b7612cc']


In [12]:
users_event.head(4)

Unnamed: 0,user_id,events
0,68e0e01c6fbdb8a90b76129b,"[Game Development, Blockchain, FinTech, Cloud ..."
1,68e0e01c6fbdb8a90b76129c,"[Sports, Entrepreneurship, DevOps, Robotics, C..."
2,68e0e01c6fbdb8a90b76129d,"[Robotics, DevOps, EdTech, Leadership, React]"
3,68e0e01c6fbdb8a90b76129e,"[AI/ML, FinTech, DevOps, EdTech, Game Developm..."


In [13]:
def events_of_user(user_id):
    for i in range(len(users_event)):
        if users_event['user_id'][i]==user_id:
            return users_event['events'][i]
    return None


In [14]:
#Collaborative Filtering

def recommend(user_id):
    similar_users = similar_user(user_id)
    my_events = events_of_user(user_id)
    other_events = []
    for i in similar_users:
        ev = events_of_user(i)
        for j in ev:
            if j not in my_events:
                other_events.append(j)
    return set(other_events)
    

In [15]:
len(recommend('68e0e01c6fbdb8a90b76129b'))

11

In [16]:
# Content-Based Filtering Implementation

def content_based_filtering(user_id, num_recommendations=5):
    
    # Get user index
    user_index = users[users['user_id'] == user_id].index[0]
    
    # Create TF-IDF vectors for events (fit on events data)
    event_tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
    event_vectors = event_tfidf.fit_transform(events['key'])
    
    # Transform user profile using the same vectorizer
    user_profile = users.iloc[user_index]['key']
    user_vector = event_tfidf.transform([user_profile])
    
    # Calculate cosine similarity between user profile and all events
    similarity_scores = cosine_similarity(user_vector, event_vectors).flatten()
    
    # Get top recommendations
    top_indices = similarity_scores.argsort()[-num_recommendations:][::-1]
    
    # Create results dataframe
    recommended_events = events.iloc[top_indices].copy()
    recommended_events['similarity_score'] = similarity_scores[top_indices]
    
    return recommended_events[['event_id', 'similarity_score']]

In [17]:
# Enhanced Content-Based Filtering with event filtering

def content_based_filtering_enhanced(user_id, num_recommendations=5, exclude_attended=True):
    """
    Enhanced content-based filtering that optionally excludes events the user has already attended
    
    Args:
        user_id: ID of the user to recommend events for
        num_recommendations: Number of events to recommend
        exclude_attended: Whether to exclude events the user has already attended
    
    Returns:
        DataFrame containing recommended events with similarity scores
    """
    # Get user index
    user_index = users[users['user_id'] == user_id].index[0]
    
    # Get events the user has already attended (if excluding)
    attended_events = []
    if exclude_attended:
        attended_events = events_of_user(user_id) or []
    
    # Create TF-IDF vectors for events
    event_tfidf = TfidfVectorizer(stop_words='english', max_features=5000, ngram_range=(1, 2))
    event_vectors = event_tfidf.fit_transform(events['key'])
    
    # Transform user profile using the same vectorizer
    user_profile = users.iloc[user_index]['key']
    user_vector = event_tfidf.transform([user_profile])
    
    # Calculate cosine similarity between user profile and all events
    similarity_scores = cosine_similarity(user_vector, event_vectors).flatten()
    
    # Filter out attended events if requested
    if exclude_attended and attended_events:
        for event_id in attended_events:
            event_idx = events[events['event_id'] == event_id].index
            if len(event_idx) > 0:
                similarity_scores[event_idx[0]] = -1  # Set to -1 to exclude from top recommendations
    
    # Get top recommendations
    top_indices = similarity_scores.argsort()[-num_recommendations:][::-1]
    
    # Create results dataframe
    recommended_events = events.iloc[top_indices].copy()
    recommended_events['similarity_score'] = similarity_scores[top_indices]
    
    return recommended_events[['event_id', 'similarity_score']]

In [18]:
# Hybrid Recommendation System (Collaborative + Content-Based)

def hybrid_recommendation(user_id, num_recommendations=10, collaborative_weight=0.6, content_weight=0.4):
    """
    Hybrid recommendation system combining collaborative filtering and content-based filtering
    
    Args:
        user_id: ID of the user to recommend events for
        num_recommendations: Number of events to recommend
        collaborative_weight: Weight for collaborative filtering (0-1)
        content_weight: Weight for content-based filtering (0-1)
    
    Returns:
        DataFrame containing recommended events with combined scores
    """
    # Get collaborative filtering recommendations
    collab_events = recommend(user_id)  # This returns a set of event_ids
    
    # Get content-based recommendations  
    content_recs = content_based_filtering_enhanced(user_id, num_recommendations=20, exclude_attended=True)
    
    # Create a scoring system
    event_scores = {}
    
    # Score content-based recommendations
    for _, row in content_recs.iterrows():
        event_id = row['event_id']
        content_score = row['similarity_score']
        event_scores[event_id] = content_weight * content_score
    
    # Boost scores for collaborative filtering recommendations
    collab_boost = collaborative_weight / len(collab_events) if collab_events else 0
    for event_id in collab_events:
        if event_id in event_scores:
            event_scores[event_id] += collab_boost
        else:
            event_scores[event_id] = collab_boost
    
    # Sort by combined score and get top recommendations
    sorted_events = sorted(event_scores.items(), key=lambda x: x[1], reverse=True)[:num_recommendations]
    
    # Create results dataframe
    recommended_event_ids = [event_id for event_id, _ in sorted_events]
    recommended_scores = [score for _, score in sorted_events]
    
    # Get event details
    result_events = events[events['event_id'].isin(recommended_event_ids)].copy()
    
    # Add scores to the result
    score_dict = dict(sorted_events)
    result_events['hybrid_score'] = result_events['event_id'].map(score_dict)
    result_events = result_events.sort_values('hybrid_score', ascending=False)
    
    return result_events[['event_id']]

In [19]:
hybrid_recs = hybrid_recommendation('68e0e01c6fbdb8a90b76129b', num_recommendations=8)
print(hybrid_recs)

                     event_id
226  68e0d79a3048252621b7d89f
358  68e0d79a3048252621b7d923
38   68e0d7821f7dc4e22f39d51a
301  68e0d79a3048252621b7d8ea
27   68e0d7821f7dc4e22f39d50f
60   68e0d7821f7dc4e22f39d530
378  68e0d79a3048252621b7d937
324  68e0d79a3048252621b7d901
