In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the final cleaned CSVs
exhibitor_final = pd.read_csv("exhibitor_final.csv")
visitors_final = pd.read_csv("final_analysis_data_visitors.csv")


In [2]:
exhibitor_final.head(2)

Unnamed: 0,exhibitorid,Name,categoryId,categoryName
0,90556,Turkey Travels,52276,1.5 Resort hotel
1,90556,Turkey Travels,52280,2.1 Inbound tour operator


In [3]:
# Group exhibitor categories
exhibitor_profiles = exhibitor_final.groupby(['exhibitorid', 'Name'])['categoryName'] \
                                    .apply(lambda x: ' '.join(x.astype(str))) \
                                    .reset_index()


In [7]:
# Group visitor answers
visitor_profiles = visitors_final.groupby(['visitor_id', 'email'])['answer'] \
                                 .apply(lambda x: ' '.join(x.dropna().astype(str))) \
                                 .reset_index()


In [9]:
def get_exhibitor_category_text_by_id(exhibitor_id):
    row = exhibitor_profiles[exhibitor_profiles['exhibitorid'] == exhibitor_id]
    if row.empty:
        print("Exhibitor ID not found.")
        return None
    return row['categoryName'].values[0]


In [11]:
def recommend_visitors(exhibitor_id, top_n=7):
    exhibitor_text = get_exhibitor_category_text_by_id(exhibitor_id)
    if exhibitor_text is None:
        return pd.DataFrame()

    corpus = [exhibitor_text] + visitor_profiles['answer'].tolist()

    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(corpus)

    similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
    visitor_profiles['similarity_score'] = similarities

    return visitor_profiles.sort_values(by='similarity_score', ascending=False).head(top_n)


In [14]:
recommend_visitors(90556)  


Unnamed: 0,visitor_id,email,answer,similarity_score
81,sl0pqtnqavydiqidf8nxzrea,3990147_sens_09hr@gmail.com,To source products and services Tour Operator ...,0.393862
23,67b5e0f7774d9e718c7541db,3990147_sens@gmail.com,To source products and services Tour Operator ...,0.393862
91,wgf8glx8axdaq94290uynav9,3990147_sens_mvzi@gmail.com,To source products and services Tour Operator ...,0.393862
45,bqg1ucjqk14u5av22n0jfhjb,aleksandar.dimkov@bss.com.mk,To source products and services Visa support T...,0.262447
87,unfq2oxirwh1d7iqfgqxwdcd,aleksandar.dimkov@bss.com.mk,To source products and services Visa support T...,0.262447
4,3p80z1iocd67z0qvg8ju1cc0,aleksandar.dimkov@bss.com.mk,To source products and services Visa support T...,0.262447
25,67b5f1392d21f543a10965f1,aleksandar.dimkov@bss.com.mk,To source products and services Visa support T...,0.262447


## Unit TEST

In [21]:
# Running test for the visitor recommendation based on exhibitor ID
print("Starting test for: recommend_visitors_by_exhibitorID")

# Pick a valid exhibitor ID from the dataset
sample_exhibitor_id = exhibitor_final['exhibitorid'].dropna().iloc[0]

# Get recommended visitors for that exhibitor
recommendations = recommend_visitors(sample_exhibitor_id, top_n=5)

# Check if we received any recommendations
assert not recommendations.empty, "❌ No recommendations returned for a valid exhibitor ID"

# Check if similarity scores are present
assert 'similarity_score' in recommendations.columns, "❌ 'similarity_score' column is missing in the recommendations"

# If all checks pass
print("✅ Test passed for recommend_visitors_by_exhibitorID\n")


Starting test for: recommend_visitors_by_exhibitorID
✅ Test passed for recommend_visitors_by_exhibitorID

