
# 📌 04 - Ranking and Recommendation (Simplified Notebook)
This notebook performs the full pipeline of a content-based news recommendation system:
- Load and preprocess data
- Extract TF-IDF features
- Build a user profile
- Compute cosine similarity
- Rank and recommend top articles


In [None]:

# 📚 Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:

# 📥 Load and Preprocess News Data
news_df = pd.read_csv('../data/news.csv')

# Fill missing values
news_df['title'] = news_df['title'].fillna('')
news_df['abstract'] = news_df['abstract'].fillna('')

# Combine title and abstract
news_df['content'] = news_df['title'] + ' ' + news_df['abstract']


In [None]:

# 🧠 TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = vectorizer.fit_transform(news_df['content'])


In [None]:

# 🧑‍💻 Create a User Profile Based on Interests
user_input_text = """
    politics economy international government education
"""

user_profile_vector = vectorizer.transform([user_input_text])


In [None]:

# 📈 Compute Cosine Similarity
similarity_scores = cosine_similarity(user_profile_vector, tfidf_matrix).flatten()
news_df['similarity_score'] = similarity_scores


In [None]:

# 🏆 Rank and Display Top Recommendations
top_n = 10
recommendations = news_df.sort_values(by='similarity_score', ascending=False).head(top_n)

print("✅ Top News Recommendations for You:\n")
for idx, row in recommendations.iterrows():
    print(f"📰 Title: {row['title']}")
    print(f"📂 Category: {row['category']} - {row['subcategory']}")
    print(f"⭐ Similarity Score: {row['similarity_score']:.4f}")
    print("------")

# Save to file (optional)
recommendations.to_csv('../results/sample_recommendations.csv', index=False)
