In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


students_data = pd.read_csv(r"students_data.csv")
workshops_data = pd.read_csv(r"workshop.csv")


In [5]:

def preprocess_interests(data_column):
    return data_column.str.replace(r"[\[\]]", "", regex=True).str.lower()

students_data["Interests"] = preprocess_interests(students_data["Interests"])
workshops_data["tags"] = preprocess_interests(workshops_data["tags"])


tfidf_vectorizer = TfidfVectorizer()
student_interest_vectors = tfidf_vectorizer.fit_transform(students_data["Interests"])
workshop_tag_vectors = tfidf_vectorizer.transform(workshops_data["tags"])


similarity_scores = cosine_similarity(student_interest_vectors, workshop_tag_vectors)


recommendations = {}
for student_idx, student in students_data.iterrows():
    student_name = student["Name"]
    student_scores = similarity_scores[student_idx]
    top_workshop_indices = student_scores.argsort()[-3:][::-1]  
    recommended_workshops = workshops_data.iloc[top_workshop_indices]["title"].tolist()
    recommendations[student_name] = recommended_workshops

for student, workshop_list in recommendations.items():
    print(f"Recommendations for {student}:")
    for workshop in workshop_list:
        print(f"- {workshop}")
    print()

Recommendations for Quincy Brown:
- Introduction to Game Development
- Creating Mobile Apps with Flutter
- Data Structures and Algorithms

Recommendations for Quincy Taylor:
- Quantum Computing Basics
- Exploring Artificial Neural Networks
- Deep Dive into Robotics

Recommendations for Alice Martinez:
- Exploring Cloud Computing
- Building Scalable Web Applications
- Introduction to Web Development

Recommendations for Nathan Miller:
- Mastering Python for Data Science
- Quantum Computing Basics
- Understanding Blockchain Technology

Recommendations for Jane Johnson:
- Deep Dive into Robotics
- Robotics for Beginners
- Building Your First Robot

Recommendations for Nathan Martin:
- Introduction to Artificial Intelligence
- Building Scalable Web Applications
- Introduction to Web Development

Recommendations for Charlie White:
- Quantum Computing Basics
- Building Scalable Web Applications
- Introduction to Web Development

Recommendations for Hannah Miller:
- AI in Healthcare
- Data Vi

In [6]:
similarity_scores

array([[0.        , 0.        , 0.        , ..., 0.        , 0.53260357,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.63529861, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.34651903, 0.30945368, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.63529861, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.32583897, 0.29098566, ..., 0.        , 0.34872807,
        0.        ]])

In [8]:
def get_average_embedding(text, model, embedding_dim):
    words = text.split(", ")
    embeddings = [model[word] for word in words if word in model]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(embedding_dim)


tfidf_vectorizer = TfidfVectorizer()
student_interest_vectors_tfidf = tfidf_vectorizer.fit_transform(students_data["Interests"])
workshop_tag_vectors_tfidf = tfidf_vectorizer.transform(workshops_data["tags"])

similarity_scores_tfidf = cosine_similarity(student_interest_vectors_tfidf, workshop_tag_vectors_tfidf)

# word2vec 
word2vec_model = KeyedVectors.load_word2vec_format("GoogleNews-vectors-negative300.bin", binary=True)
embedding_dim_word2vec = word2vec_model.vector_size

students_data["word2vec"] = students_data["Interests"].apply(lambda x: get_average_embedding(x, word2vec_model, embedding_dim_word2vec))
workshops_data["word2vec"] = workshops_data["tags"].apply(lambda x: get_average_embedding(x, word2vec_model, embedding_dim_word2vec))

student_interest_vectors_word2vec = np.stack(students_data["word2vec"])
workshop_tag_vectors_word2vec = np.stack(workshops_data["word2vec"])

similarity_scores_word2vec = cosine_similarity(student_interest_vectors_word2vec, workshop_tag_vectors_word2vec)

# glove 
glove_model = KeyedVectors.load_word2vec_format("glove.6B.300d.txt", binary=False, no_header=True)
embedding_dim_glove = glove_model.vector_size

students_data["glove"] = students_data["Interests"].apply(lambda x: get_average_embedding(x, glove_model, embedding_dim_glove))
workshops_data["glove"] = workshops_data["tags"].apply(lambda x: get_average_embedding(x, glove_model, embedding_dim_glove))

student_interest_vectors_glove = np.stack(students_data["glove"])
workshop_tag_vectors_glove = np.stack(workshops_data["glove"])

similarity_scores_glove = cosine_similarity(student_interest_vectors_glove, workshop_tag_vectors_glove)


def generate_recommendations(similarity_scores, method_name):
    recommendations = {}
    for student_idx, student in students_data.iterrows():
        student_name = student["Name"]
        student_scores = similarity_scores[student_idx]
        top_workshop_indices = student_scores.argsort()[-3:][::-1]  # Top 3 workshops
        recommended_workshops = workshops_data.iloc[top_workshop_indices]["title"].tolist()
        recommendations[student_name] = recommended_workshops

    print(f"\nRecommendations using {method_name}:")
    for student, workshop_list in recommendations.items():
        print(f"Recommendations for {student}:")
        for workshop in workshop_list:
            print(f"- {workshop}")
        print()

# Display results
print("TF-IDF Recommendations:")
generate_recommendations(similarity_scores_tfidf, "TF-IDF")

print("Word2Vec Recommendations:")
generate_recommendations(similarity_scores_word2vec, "Word2Vec")

print("GloVe Recommendations:")
generate_recommendations(similarity_scores_glove, "GloVe")


NameError: name 'KeyedVectors' is not defined