In [1]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

ratings = pd.read_csv("Event-ratings.csv")
ratings.head()

Unnamed: 0,User_ID,Event_ID,Rating
0,1,2,4.3
1,1,3,5.0
2,1,4,3.4
3,1,6,4.6
4,1,8,4.7


In [3]:
events = pd.read_csv("Events.csv")
events.head()

Unnamed: 0,Event_ID,Event_description,Event_reward
0,1,Machine learning and model development,15
1,2,Backend Discussion,10
2,3,Frontend Discussion,10
3,4,Fullstack Discussion,13
4,5,Algorithm development,12


In [4]:
user_freq = ratings[['User_ID', 'Event_ID']].groupby('User_ID').count().reset_index()
user_freq.columns = ['User_ID', 'n_ratings']
user_freq.head()

Unnamed: 0,User_ID,n_ratings
0,1,6
1,2,5
2,3,7
3,4,7
4,5,7


In [5]:
mean_rating = ratings.groupby('Event_ID')[['Rating']].mean()

In [6]:
lowest_rated = mean_rating['Rating'].idxmin()
events.loc[events['Event_ID'] == lowest_rated]

Unnamed: 0,Event_ID,Event_description,Event_reward
1,2,Backend Discussion,10


In [7]:
highest_rated = mean_rating['Rating'].idxmax()
events.loc[events['Event_ID'] == highest_rated]

Unnamed: 0,Event_ID,Event_description,Event_reward
6,7,System architecture discussion,13


In [8]:
ratings[ratings['Event_ID']==highest_rated]

Unnamed: 0,User_ID,Event_ID,Rating
9,2,7,4.0
16,3,7,4.9
24,4,7,4.5
29,5,7,5.0
36,6,7,4.5
51,8,7,5.0
58,9,7,4.5
66,10,7,5.0


In [9]:
ratings[ratings['Event_ID']==lowest_rated]

Unnamed: 0,User_ID,Event_ID,Rating
0,1,2,4.3
7,2,2,4.5
12,3,2,4.0
19,4,2,3.5
25,5,2,2.0
32,6,2,3.4
47,8,2,3.4
53,9,2,2.9


In [10]:
event_stats = ratings.groupby('Event_ID')[['Rating']].agg(['count', 'mean'])
event_stats.columns = event_stats.columns.droplevel()

In [11]:
from scipy.sparse import csr_matrix

In [12]:
def create_matrix(df):

    N = len(df['User_ID'].unique())
    M = len(df['Event_ID'].unique())

    user_mapper = dict(zip(np.unique(df["User_ID"]), list(range(N))))
    event_mapper = dict(zip(np.unique(df["Event_ID"]), list(range(M))))
    
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["User_ID"])))
    event_inv_mapper = dict(zip(list(range(M)), np.unique(df["Event_ID"])))

    user_index = [user_mapper[i] for i in df['User_ID']]
    event_index = [event_mapper[i] for i in df['User_ID']]

    X = csr_matrix((df["Rating"], (event_index, user_index)), shape=(M, N))
    
    return X, user_mapper, event_mapper, user_inv_mapper, event_inv_mapper


In [13]:
X, user_mapper, event_mapper, user_inv_mapper, event_inv_mapper = create_matrix(ratings)


In [14]:
from sklearn.neighbors import NearestNeighbors


In [15]:
def find_similar_events(event_id, X, k, metric='cosine', show_distance=False):

    neighbour_ids = []
    
    event_ind = event_mapper[event_id]
    event_vec = X[event_ind]
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    event_vec = event_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(event_vec, return_distance=show_distance)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(event_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids



In [16]:
event_titles = dict(zip(events['Event_ID'], events['Event_description']))

event_id = 1

similar_ids = find_similar_events(event_id, X, k=5)
event_title = event_titles[event_id]

In [17]:
print(f"Since you liked : {event_title}  ")
print("You should try : ")
for i in similar_ids:
    print(event_titles[i],",")

Since you liked : Machine learning and model development  
You should try : 
System architecture discussion ,
Debugging principles  ,
Ethics Discussion ,
Algorithm development ,
CyberSecurity ,
