# recommend_exhibitors_by_answers

In [60]:

visitors_answers = pd.read_csv("visitors_answers.csv")

exhibitor_final=pd.read_csv("exhibitor_final.csv")

In [62]:
visitors_answers.head(2)

Unnamed: 0,id,answer,questionId
0,5c8a78336d41a10da4f73103,Personal interest,5c8a78336d41a10da4f730fe
1,5c8a78336d41a10da4f73100,To obtain general information,5c8a78336d41a10da4f730fe


In [39]:
exhibitor_final.head(2)

Unnamed: 0,exhibitorid,Name,categoryId,categoryName
0,90556,Turkey Travels,52276,1.5 Resort hotel
1,90556,Turkey Travels,52280,2.1 Inbound tour operator


In [64]:
# Extract unique category names
category_texts = exhibitor_final['categoryName'].dropna().unique()

# Extract unique visitor answers
answer_texts = visitors_answers['answer'].dropna().unique()


In [55]:
# Clean up and deduplicate category names
category_set = set()
for cat_string in category_texts:
    for cat in str(cat_string).split(','):
        category_set.add(cat.strip().lower())

category_list = list(category_set)


In [66]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Lowercase and clean answer texts
answer_list = [str(ans).strip().lower() for ans in answer_texts]

# Combine all text for fitting vectorizer
all_text = answer_list + category_list

# TF-IDF
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(all_text)

# Split TF-IDF into answers and categories
answer_vecs = tfidf_matrix[:len(answer_list)]
category_vecs = tfidf_matrix[len(answer_list):]

# Cosine similarity
similarity_matrix = cosine_similarity(answer_vecs, category_vecs)


In [68]:
# For each answer, get top matching category
top_matches = []
for i, ans in enumerate(answer_list):
    sims = similarity_matrix[i]
    best_idx = sims.argmax()
    top_category = category_list[best_idx]
    top_score = sims[best_idx]
    top_matches.append({'Answer': ans, 'Matched_Category': top_category, 'Score': round(top_score, 4)})

mapping_df = pd.DataFrame(top_matches)


In [70]:
mapping_df

Unnamed: 0,Answer,Matched_Category,Score
0,personal interest,5.4 transfer services,0.0
1,to obtain general information,12.1 tic: travel information centre,0.2388
2,to source products and services,8.6 medical products and machinery manufacture...,0.3112
3,to promote products and services,8.6 medical products and machinery manufacture...,0.3112
4,educational purposes,5.4 transfer services,0.0
5,formation of tourist products,12. tourist board / government / nto,0.2377
6,guided tour services,2.6 specialized tour operator,0.2727
7,event management,11. mice and event management,0.6375
8,marketing,5.4 transfer services,0.0
9,visa support,5.4 transfer services,0.0


In [72]:
mapping_df.to_csv('mapping_answers_to_categories.csv', index=False)
