In [16]:
import pandas as pd
from scipy import sparse
import pickle

# Load cleaned news data (DataFrame)
news_df = pd.read_pickle("../data/cleaned_news_df.pkl")

# Load the TF-IDF matrix
tfidf_matrix = sparse.load_npz("../data/tfidf_matrix.npz")

# Load the TF-IDF vectorizer (if needed)
with open("../data/tfidf_vectorizer.pkl", "rb") as f:
    tfidf_vectorizer = pickle.load(f)

# Load the user profile vector (assuming it was saved earlier in step 02)
with open('../results/user_profile_vector.pkl', 'rb') as f:
    user_profile_vector = pickle.load(f)

# Load the similarity scores computed in step 03 (cosine similarity)
with open('../results/similarity_scores.pkl', 'rb') as f:
    similarity_scores = pickle.load(f)


In [17]:
# 04_ranking_and_recommendation.ipynb
# Rank and display recommendations

import pandas as pd
import numpy as np

# Ensure user_profile_vector is loaded from previous steps
# Ensure tfidf_matrix and similarity_scores are loaded as well

# Get top N article indices (top_n can be adjusted as needed)
top_n = 10

# Ensure similarity_scores is a 2D array and reshape if necessary
similarity_scores = similarity_scores.flatten()

# Sort the similarity scores in descending order and get the indices
top_indices = similarity_scores.argsort()[::-1][:top_n]

# Retrieve recommended articles based on sorted indices
recommended_articles = news_df.iloc[top_indices]

# Export the recommendations to a CSV file
recommended_articles[['news_id', 'title', 'category', 'abstract']].to_csv("../results/sample_recommendations.csv", index=False)

# Display the top recommended articles (with news_id, title, and category)
print(recommended_articles[['news_id', 'title', 'category']])


      news_id                                              title      category
51184  N47901  10 things that didn't suck in the Lions' loss ...        sports
3507   N46478  Man Charged With Assault For Punch Outside Tru...          news
50296  N19646                  The Bear's Den, November 12, 2019        sports
45517  N21519  RHOP's Monique Samuels Charged With Assault Af...            tv
27388  N52316  Man Facing Charges In North Side Assault, Accu...          news
10893  N62234  Protesters burn MAGA hats, police use pepper s...          news
20332    N256  Minneapolis Man Charged With Absentee Ballot F...          news
10380  N23604  State corrections officer from Foley charged i...  foodanddrink
27055  N40103  Man Killed In Minneapolis Assault; Homicide In...          news
49116  N10759  Bears vs. Lions: How to get tickets, channel i...        sports


In [18]:
# Print similarity scores for all articles (flattened)
print("Similarity Scores:")
print(similarity_scores.flatten())


Similarity Scores:
[0.         0.         0.01972448 ... 0.         0.         0.        ]
