Glove Using Stop-word removal,stemming and tokenization.


In [3]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import KeyedVectors
import numpy as np
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import nltk

nltk.download('punkt')
nltk.download('stopwords')

stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))


students_data = pd.read_csv("students_data.csv")
workshops_data = pd.read_csv("workshop.csv")

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hrith\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hrith\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:


def preprocess_interests(data_column):
    return data_column.str.replace(r"[\[\]]", "", regex=True).str.lower()

def apply_stemming_and_stopword_removal(text):
    tokens = word_tokenize(text)
    filtered_tokens = [token for token in tokens if token not in stop_words]
    stemmed_tokens = [stemmer.stem(token) for token in filtered_tokens]
    return ', '.join(stemmed_tokens)

students_data["Interests"] = preprocess_interests(students_data["Interests"])
students_data["Interests"] = students_data["Interests"].apply(apply_stemming_and_stopword_removal)

workshops_data["tags"] = preprocess_interests(workshops_data["tags"])
workshops_data["tags"] = workshops_data["tags"].apply(apply_stemming_and_stopword_removal)

def get_average_embedding(text, model, embedding_dim):
    words = text.split(", ")
    embeddings = [model[word] for word in words if word in model]
    if embeddings:
        return np.mean(embeddings, axis=0)
    else:
        return np.zeros(embedding_dim)



In [5]:
glove_model = KeyedVectors.load_word2vec_format(
    r"C:\Users\hrith\Downloads\glove.6B.300d.txt\glove.6B.300d.txt", binary=False, no_header=True)
embedding_dim_glove = glove_model.vector_size

students_data["glove"] = students_data["Interests"].apply(lambda x: get_average_embedding(x, glove_model, embedding_dim_glove))
workshops_data["glove"] = workshops_data["tags"].apply(lambda x: get_average_embedding(x, glove_model, embedding_dim_glove))

student_interest_vectors_glove = np.stack(students_data["glove"])
workshop_tag_vectors_glove = np.stack(workshops_data["glove"])

similarity_scores_glove = cosine_similarity(student_interest_vectors_glove, workshop_tag_vectors_glove)


def generate_recommendations(similarity_scores, method_name):
    recommendations = {}
    for student_idx, student in students_data.iterrows():
        student_name = student["Name"]
        student_scores = similarity_scores[student_idx]
        top_workshop_indices = student_scores.argsort()[-3:][::-1]  
        recommended_workshops = workshops_data.iloc[top_workshop_indices]["title"].tolist()
        recommendations[student_name] = recommended_workshops

    print(f"\nRecommendations using {method_name}:")
    for student, workshop_list in recommendations.items():
        print(f"Recommendations for {student}:")
        for workshop in workshop_list:
            print(f"- {workshop}")
        print()

print("GloVe Recommendations:")
generate_recommendations(similarity_scores_glove, "GloVe")

GloVe Recommendations:

Recommendations using GloVe:
Recommendations for Quincy Brown:
- Quantum Computing Basics
- Introduction to Game Development
- Mastering Python for Data Science

Recommendations for Quincy Taylor:
- Quantum Computing Basics
- Building Your First Robot
- Robotics for Beginners

Recommendations for Alice Martinez:
- Exploring Cloud Computing
- Building Scalable Web Applications
- Introduction to Web Development

Recommendations for Nathan Miller:
- Mastering Python for Data Science
- Understanding Blockchain Technology
- Introduction to Cybersecurity

Recommendations for Jane Johnson:
- Deep Dive into Robotics
- Building Your First Robot
- Quantum Computing Basics

Recommendations for Nathan Martin:
- Building Scalable Web Applications
- Introduction to Web Development
- Introduction to Game Development

Recommendations for Charlie White:
- Introduction to Cybersecurity
- Mastering Python for Data Science
- Building Scalable Web Applications

Recommendations for H

In [6]:
similarity_scores_glove

array([[ 0.4094774 ,  0.32413885,  0.36299271, ...,  0.46292319,
         0.55912699,  0.45419971],
       [-0.11098897, -0.11748979, -0.1472138 , ..., -0.19257822,
        -0.17903839, -0.07370431],
       [-0.10704012, -0.1327812 ,  0.27740602, ..., -0.21335175,
        -0.21222696, -0.12117955],
       ...,
       [ 0.47642455,  0.56133925,  0.53035468, ...,  0.49166105,
         0.32649132,  0.39768337],
       [-0.10704012, -0.1327812 ,  0.27740602, ..., -0.21335175,
        -0.21222696, -0.12117955],
       [ 0.56586623,  0.61913659,  0.65846672, ...,  0.6685367 ,
         0.65243174,  0.53813895]])