In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import KeyedVectors
import numpy as np


students_data = pd.read_csv("/kaggle/input/student-data/students_data.csv")
workshops_data = pd.read_csv("/kaggle/input/student-data/workshop.csv")


def preprocess_interests(data_column):
    return data_column.str.replace(r"[\[\]]", "", regex=True).str.lower()

students_data["Interests"] = preprocess_interests(students_data["Interests"])
workshops_data["tags"] = preprocess_interests(workshops_data["tags"])


def get_average_embedding(text, model, embedding_dim):
    words = text.split(", ")
    embeddings = [model[word] for word in words if word in model]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(embedding_dim)


# glove
glove_model = KeyedVectors.load_word2vec_format("/kaggle/input/glove6b300dtxt/glove.6B.300d.txt", binary=False, no_header=True)
embedding_dim_glove = glove_model.vector_size

students_data["glove"] = students_data["Interests"].apply(lambda x: get_average_embedding(x, glove_model, embedding_dim_glove))
workshops_data["glove"] = workshops_data["tags"].apply(lambda x: get_average_embedding(x, glove_model, embedding_dim_glove))

student_interest_vectors_glove = np.stack(students_data["glove"])
workshop_tag_vectors_glove = np.stack(workshops_data["glove"])

similarity_scores_glove = cosine_similarity(student_interest_vectors_glove, workshop_tag_vectors_glove)

# recommendations
def generate_recommendations(similarity_scores, method_name):
    recommendations = {}
    for student_idx, student in students_data.iterrows():
        student_name = student["Name"]
        student_scores = similarity_scores[student_idx]
        top_workshop_indices = student_scores.argsort()[-3:][::-1]  # Top 3 workshops
        recommended_workshops = workshops_data.iloc[top_workshop_indices]["title"].tolist()
        recommendations[student_name] = recommended_workshops

    print(f"\nRecommendations using {method_name}:")
    for student, workshop_list in recommendations.items():
        print(f"Recommendations for {student}:")
        for workshop in workshop_list:
            print(f"- {workshop}")
        print()

print("GloVe Recommendations:")
generate_recommendations(similarity_scores_glove, "GloVe")


GloVe Recommendations:

Recommendations using GloVe:
Recommendations for Quincy Brown:
- Quantum Computing Basics
- Introduction to Game Development
- Data Structures and Algorithms

Recommendations for Quincy Taylor:
- Quantum Computing Basics
- Deep Dive into Robotics
- Robotics for Beginners

Recommendations for Alice Martinez:
- Robotics for Beginners
- Deep Dive into Robotics
- Building Your First Robot

Recommendations for Nathan Miller:
- Mastering Python for Data Science
- Data Visualization with Python
- Building Scalable Web Applications

Recommendations for Jane Johnson:
- Deep Dive into Robotics
- Building Your First Robot
- Robotics for Beginners

Recommendations for Nathan Martin:
- Exploring Artificial Neural Networks
- Quantum Computing Basics
- Deep Dive into Robotics

Recommendations for Charlie White:
- Data Visualization with Python
- Mastering Python for Data Science
- Exploring Cloud Computing

Recommendations for Hannah Miller:
- Exploring Artificial Neural Netwo

In [4]:
similarity_scores_glove

array([[0.13821168, 0.45320291, 0.25052392, ..., 0.38688249, 0.69841899,
        0.13821168],
       [0.13523568, 0.4716388 , 0.26324975, ..., 0.33974557, 0.29960725,
        0.13523568],
       [0.08543238, 0.17698846, 1.        , ..., 0.18358636, 0.14104156,
        0.08543238],
       ...,
       [0.143864  , 0.61901546, 0.65521752, ..., 0.47498734, 0.32131789,
        0.143864  ],
       [0.08543238, 0.17698846, 1.        , ..., 0.18358636, 0.14104156,
        0.08543238],
       [0.12933957, 0.56019571, 0.65930506, ..., 0.48423478, 0.60088142,
        0.12933957]])