In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
# Load processed data and user vector
news_df = pd.read_csv('../data/processed_news.csv')
user_vector = np.load('../data/user_vector.npy')


In [3]:
# Load TF-IDF matrix
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(news_df['processed_content'])


In [4]:
# Compute cosine similarity
similarities = cosine_similarity(user_vector, tfidf_matrix).flatten()


In [5]:
# Add similarity scores to the DataFrame
news_df['similarity'] = similarities

In [6]:
# Sort by similarity (descending)
news_df = news_df.sort_values(by='similarity', ascending=False)

In [7]:
# Save top results
news_df.to_csv('../results/sample_recommendations.csv', index=False)

news_df[['title', 'category', 'similarity']].head(10)

Unnamed: 0,title,category,similarity
4835,Best Looks: Queen Maxima of the Netherlands,lifestyle,0.516158
104,Queen Elizabeth's Cousin Says Royal Family 'Do...,lifestyle,0.458662
46107,The Queen Will No Longer Wear Real Fur,lifestyle,0.44528
36134,The Queen is apparently a fan of this royal,video,0.412667
28486,The Latest Celebrity to Go Fur Free? Queen Eli...,lifestyle,0.412479
27854,9 royal tour rules the royal family has to follow,lifestyle,0.39993
44399,British royals attend Remembrance event,news,0.399813
30025,Best looks: Queen Letizia of Spain,lifestyle,0.398844
102,"When royals lose their tempers, from the Queen...",lifestyle,0.392164
39,25 Photos of the Royal Family at Balmoral Cast...,lifestyle,0.383096
