In [2]:
import pandas as pd
import string
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [3]:
# Load the datasets
events = pd.read_json("Datasets/events.json")

In [4]:
events.head(1)

Unnamed: 0,event_id,title,description,tags,location,duration,organizer_name,event_type,domain,price,free_entry,Expected_attendance
0,e111,Deep Learning Bootcamp,Hands-on deep learning with transformers and P...,"[AI, ML, Workshop, deep learning, transformers...",Delhi,1 day / 24 hr,CcpC,Workshop,Artificial Intelligence,499.0,False,100


In [5]:
#combining all strings column
events['Keywords']= events['title'] + '_' + events['location'] + '_' + events['organizer_name'] + '_' + events['event_type'] + '_' + events['domain'] 

In [6]:
#deleting all these added columns
events.drop(columns=['title', 'location', 'organizer_name', 'event_type', 'Expected_attendance', 'price', 'duration', 'domain'], inplace=True)

In [7]:
#converting the string column into a list column
events['Keywords'] = events['Keywords'].apply(lambda x: x.split('_'))


In [8]:
#combining the keywords and tags columns
#then deleting them
events['keywords']= events['Keywords'] + events['tags'] 
events.drop(columns=['Keywords', 'tags'], inplace=True)

In [9]:
#deleting the free_entry column
events.drop(columns=['free_entry'], inplace=True)

In [10]:
events.head(5)

Unnamed: 0,event_id,description,keywords
0,e111,Hands-on deep learning with transformers and P...,"[Deep Learning Bootcamp, Delhi, CcpC, Workshop..."
1,e113,Explore the transformative impact of AI in hea...,"[AI in Healthcare Symposium, Bangalore, Health..."
2,e114,Join us for a conference focused on leveraging...,"[AI for Social Good Conference, Mumbai, Global..."
3,e115,This workshop will delve into the ethical impl...,"[AI Ethics and Governance Workshop, Chennai, E..."
4,e116,Discover the latest advancements in AI and rob...,"[AI and Robotics Expo, Hyderabad, Tech Innovat..."


In [11]:
#converting the string column into a list column
events['description'] = events['description'].apply(lambda x: x.split(' '))

In [12]:
#combining the keywords and tags columns
#then deleting them
events['key']= events['keywords'] + events['description'] 
events.drop(columns=['keywords', 'description'], inplace=True)

In [13]:
events

Unnamed: 0,event_id,key
0,e111,"[Deep Learning Bootcamp, Delhi, CcpC, Workshop..."
1,e113,"[AI in Healthcare Symposium, Bangalore, Health..."
2,e114,"[AI for Social Good Conference, Mumbai, Global..."
3,e115,"[AI Ethics and Governance Workshop, Chennai, E..."
4,e116,"[AI and Robotics Expo, Hyderabad, Tech Innovat..."
...,...,...
95,e285,"[DevOps Meetup, Gurgaon, Tech Titans, Meetup, ..."
96,e286,"[Blockchain Forum, Chennai, Tech Titans, Forum..."
97,e287,"[Augmented Reality Workshop, Gurgaon, Cloud Ni..."
98,e288,"[Game Development Bootcamp, Pune, Quantum Leap..."


In [14]:
#converting it string
events['key']= events['key'].apply(lambda x:" ".join(x))



In [15]:
events

Unnamed: 0,event_id,key
0,e111,Deep Learning Bootcamp Delhi CcpC Workshop Ar...
1,e113,AI in Healthcare Symposium Bangalore HealthTec...
2,e114,AI for Social Good Conference Mumbai Global AI...
3,e115,AI Ethics and Governance Workshop Chennai Ethi...
4,e116,AI and Robotics Expo Hyderabad Tech Innovators...
...,...,...
95,e285,DevOps Meetup Gurgaon Tech Titans Meetup DevOp...
96,e286,Blockchain Forum Chennai Tech Titans Forum Blo...
97,e287,Augmented Reality Workshop Gurgaon Cloud Ninja...
98,e288,Game Development Bootcamp Pune Quantum Leap In...


In [16]:
events['key'] = events['key'].str.lower()
events['key'] = events['key'].str.translate(str.maketrans('', '', string.punctuation))

In [17]:
cv = CountVectorizer(max_features=5000,stop_words='english')

In [18]:
event_vector = cv.fit_transform(events['key']).toarray()

In [19]:
event_vector

array([[0, 0, 0, ..., 2, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 4, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [20]:
event_similarity = cosine_similarity(event_vector)

In [None]:
list(enumerate(event_similarity[0]))

[(0, np.float64(1.0)),
 (1, np.float64(0.10144749970923667)),
 (2, np.float64(0.10988845115895121)),
 (3, np.float64(0.22200819711023184)),
 (4, np.float64(0.09177788727679406)),
 (5, np.float64(0.10302042296151677)),
 (6, np.float64(0.21028955778451666)),
 (7, np.float64(0.15959855297967945)),
 (8, np.float64(0.08453958309103057)),
 (9, np.float64(0.681475042298805)),
 (10, np.float64(0.01886792452830189)),
 (11, np.float64(0.14882641804174754)),
 (12, np.float64(0.016536268671305282)),
 (13, np.float64(0.01886792452830189)),
 (14, np.float64(0.01886792452830189)),
 (15, np.float64(0.14882641804174754)),
 (16, np.float64(0.016536268671305282)),
 (17, np.float64(0.016536268671305282)),
 (18, np.float64(0.01886792452830189)),
 (19, np.float64(0.016536268671305282)),
 (20, np.float64(0.01886792452830189)),
 (21, np.float64(0.01886792452830189)),
 (22, np.float64(0.08268134335652641)),
 (23, np.float64(0.01607683798406202)),
 (24, np.float64(0.169811320754717)),
 (25, np.float64(0.0165362

In [26]:
def recommend_event(event_id):
    index = events[events['event_id']== event_id].index[0]
    distances = event_similarity[index]
    events_list = sorted(list(enumerate(distances)),reverse=True , key=lambda x:x[1])[1:6]

    for i in events_list:
        print(events.iloc[i[0]].event_id)

    return

In [27]:
recommend_event('e220')

e203
e208
e238
e267
e230


In [24]:
import pickle
pickle.dump(event_similarity, open('Models/events_similarity.pkl', 'wb'))

In [25]:
pickle.dump(events, open('Models/events.pkl', 'wb'))