In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import KeyedVectors
import numpy as np


students_data = pd.read_csv("/kaggle/input/student-data/students_data.csv")
workshops_data = pd.read_csv("/kaggle/input/student-data/workshop.csv")


def preprocess_interests(data_column):
    return data_column.str.replace(r"[\[\]]", "", regex=True).str.lower()

students_data["Interests"] = preprocess_interests(students_data["Interests"])
workshops_data["tags"] = preprocess_interests(workshops_data["tags"])


def get_average_embedding(text, model, embedding_dim):
    words = text.split(", ")
    embeddings = [model[word] for word in words if word in model]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(embedding_dim)


tfidf_vectorizer = TfidfVectorizer()
student_interest_vectors_tfidf = tfidf_vectorizer.fit_transform(students_data["Interests"])
workshop_tag_vectors_tfidf = tfidf_vectorizer.transform(workshops_data["tags"])

similarity_scores_tfidf = cosine_similarity(student_interest_vectors_tfidf, workshop_tag_vectors_tfidf)

# word2vec
word2vec_model = KeyedVectors.load_word2vec_format("/kaggle/input/googlenewsvectorsnegative300/GoogleNews-vectors-negative300.bin", binary=True)
embedding_dim_word2vec = word2vec_model.vector_size

students_data["word2vec"] = students_data["Interests"].apply(lambda x: get_average_embedding(x, word2vec_model, embedding_dim_word2vec))
workshops_data["word2vec"] = workshops_data["tags"].apply(lambda x: get_average_embedding(x, word2vec_model, embedding_dim_word2vec))

student_interest_vectors_word2vec = np.stack(students_data["word2vec"])
workshop_tag_vectors_word2vec = np.stack(workshops_data["word2vec"])

similarity_scores_word2vec = cosine_similarity(student_interest_vectors_word2vec, workshop_tag_vectors_word2vec)


# recommendations
def generate_recommendations(similarity_scores, method_name):
    recommendations = {}
    for student_idx, student in students_data.iterrows():
        student_name = student["Name"]
        student_scores = similarity_scores[student_idx]
        top_workshop_indices = student_scores.argsort()[-3:][::-1]  # Top 3 workshops
        recommended_workshops = workshops_data.iloc[top_workshop_indices]["title"].tolist()
        recommendations[student_name] = recommended_workshops

    print(f"\nRecommendations using {method_name}:")
    for student, workshop_list in recommendations.items():
        print(f"Recommendations for {student}:")
        for workshop in workshop_list:
            print(f"- {workshop}")
        print()


print("TF-IDF Recommendations:")
generate_recommendations(similarity_scores_tfidf, "TF-IDF")

print("Word2Vec Recommendations:")
generate_recommendations(similarity_scores_word2vec, "Word2Vec")



TF-IDF Recommendations:

Recommendations using TF-IDF:
Recommendations for Quincy Brown:
- Introduction to Game Development
- Creating Mobile Apps with Flutter
- Data Structures and Algorithms

Recommendations for Quincy Taylor:
- Quantum Computing Basics
- Exploring Artificial Neural Networks
- Deep Dive into Robotics

Recommendations for Alice Martinez:
- Exploring Cloud Computing
- Building Scalable Web Applications
- Introduction to Web Development

Recommendations for Nathan Miller:
- Mastering Python for Data Science
- Quantum Computing Basics
- Understanding Blockchain Technology

Recommendations for Jane Johnson:
- Deep Dive into Robotics
- Robotics for Beginners
- Building Your First Robot

Recommendations for Nathan Martin:
- Introduction to Artificial Intelligence
- Building Scalable Web Applications
- Introduction to Web Development

Recommendations for Charlie White:
- Quantum Computing Basics
- Building Scalable Web Applications
- Introduction to Web Development

Recommen

In [6]:
print("TF-IDF Recommendations:",similarity_scores_tfidf)


print("Word2Vec Recommendations:",similarity_scores_word2vec)

TF-IDF Recommendations: [[0.         0.         0.         ... 0.         0.53260357 0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.63529861 ... 0.         0.         0.        ]
 ...
 [0.         0.34651903 0.30945368 ... 0.         0.         0.        ]
 [0.         0.         0.63529861 ... 0.         0.         0.        ]
 [0.         0.32583897 0.29098566 ... 0.         0.34872807 0.        ]]
Word2Vec Recommendations: [[0.09277781 0.46457559 0.13026545 ... 0.29316622 0.6539642  0.09277781]
 [0.03850658 0.46637317 0.14322483 ... 0.25822956 0.27694068 0.03850658]
 [0.04203538 0.10429342 1.         ... 0.06314851 0.09804105 0.04203538]
 ...
 [0.05955985 0.60567836 0.59565216 ... 0.33211403 0.2936772  0.05955985]
 [0.04203538 0.10429342 1.         ... 0.06314851 0.09804105 0.04203538]
 [0.10023169 0.53077381 0.62132624 ... 0.32020048 0.55566521 0.10023169]]
