# GIT LOST 

## Centralized Student-Alumni Engagement Platform 

### Feed Personalization

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# I am going to use id keys for the post titles as the model would be applied in backend and we dont need to print the content everytime in the code output

data = {
    'id': ['item-1', 'item-2', 'item-3', 'item-4', 'item-5'],
    'title': [
        'Introduction to Competitive Programming',
        'Hackathons are a platform for competitive programming',
        'Introduction to Machine Learning',
        'Programming made easy using python. Helps in competitions',
        'Understanding Neural Networks'
    ],
    'content': [
        'Competitive Programming is all about speed and accuracy',
        'Learn to code in competitons with higher accuracy and better precision',
        'Machine learning is a field of AI. This introduces key concepts and algorithms',
        'Python is heavily used in the industry. Here is a tutorial on how to use python and use it in competitive hackathons',
        'Neural networks are a key part of deep learning, a subfield of machine learning'
    ]
}

df = pd.DataFrame(data) #our dataset

# Create the TF-IDF Vectorizer
# We ignore common English "stop words" (like 'the','a','is')
tfidf = TfidfVectorizer(stop_words='english')

# Learn the vocabulary from all content and create the matrix
tfidf_matrix = tfidf.fit_transform(df['content'])

# You can check the shape: (number of items, number of unique words)
# print(tfidf_matrix.shape) 

# 4. Compute the Similarity Matrix
# We use linear_kernel (a fast way to compute cosine similarity)
# This matrix compares every item with every other item. (columns vs rows)
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Get Recommendations for a Specific Item
def get_recommendations(item_id, cosine_sim=cosine_sim, df=df):
    try:
        # Get the index of the item from its ID
        idx = df[df['id'] == item_id].index[0]
    except IndexError:
        return f"Error: Item with ID '{item_id}' not found."

    # Get the pairwise similarity scores for this item
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sorting the items based on their similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar items (top one is itself, so we skip it)
    sim_scores = sim_scores[1:11]

    # Get the item index of all
    item_indices = [i[0] for i in sim_scores]

    # Return the IDs of the top most similar items
    return df['id'].iloc[item_indices]

print("Recommendations for 'item-1':")
# This item is about Competitive Programing
recs_1 = get_recommendations('item-1')
print(recs_1, "\n")
print("Recommendations for 'item-3':")
# This item is about Machine Learning
recs_3 = get_recommendations('item-3')
print(recs_3)

Recommendations for 'item-1':
1    item-2
3    item-4
2    item-3
4    item-5
Name: id, dtype: object 

Recommendations for 'item-3':
4    item-5
0    item-1
1    item-2
3    item-4
Name: id, dtype: object
