In [5]:
import pandas as pd

In [11]:
exhibitor_final=pd.read_csv("exhibitor_final.csv")
visitors_final=pd.read_csv("final_analysis_data_visitors.csv")

In [13]:
print(visitors_final.columns)
print(exhibitor_final.columns)

Index(['visitor_id', 'email', 'gender', 'question', 'answer', 'answer.1',
       'answerType'],
      dtype='object')
Index(['exhibitorid', 'Name', 'categoryId', 'categoryName'], dtype='object')


In [15]:
visitors_final.head(2)

Unnamed: 0,visitor_id,email,gender,question,answer,answer.1,answerType
0,67b70a9f2d21f543a1096602,emilija@bss.mk,F,Reason for Attending the Event,To obtain general information,To obtain general information,Answer
1,67b70a9f2d21f543a1096602,emilija@bss.mk,F,Which of the following best describes your job...,Media,Media,Answer


In [17]:
def get_visitor_answers_by_email(email):
    row = visitors_final[visitors_final['email'] == email]
    if row.empty:
        print("Visitor email not found.")
        return None, None

    visitor_id = row['visitor_id'].values[0]
    answers = visitors_final[visitors_final['visitor_id'] == visitor_id]['answer'].dropna().astype(str).tolist()
    return visitor_id, " ".join(answers)


In [19]:
# Group all category names into a single string per exhibitor
exhibitor_profiles = exhibitor_final.groupby(['exhibitorid', 'Name'])['categoryName'] \
                                    .apply(lambda x: ' '.join(x.astype(str))) \
                                    .reset_index()



In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity



In [37]:
def recommend_exhibitors(visitor_email, top_n=7):
    visitor_id, visitor_text = get_visitor_answers_by_email(visitor_email)
    if visitor_text is None:
        return pd.DataFrame()

    corpus = [visitor_text] + exhibitor_profiles['categoryName'].tolist()

    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(corpus)

    similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
    exhibitor_profiles['similarity_score'] = similarities

    return exhibitor_profiles.sort_values(by='similarity_score', ascending=False).head(top_n)



In [39]:
# Example
recommend_exhibitors("emilija@bss.mk")


Unnamed: 0,exhibitorid,Name,categoryName,similarity_score
32,97818,Sunny Trips Adventures,3.2 Authorized travel agency 12.1 TIC: Travel ...,0.230892
8,27827,Exotic Tours Holidays,14.8 Other type of specialized tourism 12.1 TI...,0.187253
21,74870,Exotic Tours Journeys,6. Motorhomes & caravans 3.3 Independent trave...,0.111944
20,72153,Elite Adventures Holidays,3.3 Independent travel agency 5.2 Bus services...,0.108708
17,55311,Sunny Adventures Journeys,15.1 Print media 1.4 Boarding house 5.3 Railwa...,0.084167
26,92492,Asia Tourism,1.1 Hotel / Hotel chain / Inn 1.5 Resort hotel...,0.071852
6,18960,Elite Travel Services,10.3 Cultural heritage site 15.1 Print media 1...,0.065562


In [43]:
# Running test for the recommendation function based on visitor email
print("Starting test for: recommend_exhibitors_by_visitor_email")

# Sample visitor email taken from the dataset
sample_email = visitors_final['email'].dropna().iloc[0]

# Get recommendations for exhibitors using the sample visitor email
recommendations_email = recommend_exhibitors(sample_email, top_n=5)

# Ensure that recommendations are returned and not empty
assert not recommendations_email.empty, "❌ No recommendations returned for a valid visitor email"

# Check if the 'similarity_score' column exists in the returned recommendations
assert 'similarity_score' in recommendations_email.columns, "❌ 'similarity_score' column is missing in the recommendations"

# If all checks pass, print a success message
print("✅ Test passed for recommend_exhibitors_by_visitor_email\n")


Starting test for: recommend_exhibitors_by_visitor_email
✅ Test passed for recommend_exhibitors_by_visitor_email

