# Ranking and Recommendation

In [4]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics.pairwise import cosine_similarity

# Load similarity scores
similarity_df_category = pd.read_pickle(r'C:\Users\PC\Desktop\CONTENT FILTER\results\similarity_by_category.pkl')
similarity_df_keywords = pd.read_pickle(r'C:\Users\PC\Desktop\CONTENT FILTER\results\similarity_by_keywords.pkl')

In [5]:
# Function to get top-N recommendations
def get_recommendations(similarity_df, top_n=10):
    # Sort by similarity score in descending order
    recommendations = similarity_df.sort_values('similarity_score', ascending=False).head(top_n)
    return recommendations[['nid', 'title', 'category', 'similarity_score']]

# Get recommendations based on category profile
category_recommendations = get_recommendations(similarity_df_category)

# Get recommendations based on keyword profile
keyword_recommendations = get_recommendations(similarity_df_keywords)

print("Top 10 recommendations by category profile:")
print(category_recommendations.to_markdown(index=False))

print("\nTop 10 recommendations by keyword profile:")
print(keyword_recommendations.to_markdown(index=False))

# Save recommendations to CSV
category_recommendations.to_csv(r'C:\Users\PC\Desktop\CONTENT FILTER\results\category_recommendations.csv', index=False)
keyword_recommendations.to_csv(r'C:\Users\PC\Desktop\CONTENT FILTER\results\keyword_recommendations.csv', index=False)

Top 10 recommendations by category profile:
| nid    | title                                                                     | category   |   similarity_score |
|:-------|:--------------------------------------------------------------------------|:-----------|-------------------:|
| N25490 | Former Michigan players rack up the sacks in NFL Week 7                   | sports     |           0.31883  |
| N44162 | Patriots vs. Ravens: 3 keys to the game, fun facts, prediction            | sports     |           0.316671 |
| N59403 | Texas college football rankings: Watch out for Baylor                     | sports     |           0.309627 |
| N16339 | John McClain's 2019 NFL power rankings: Week 10                           | sports     |           0.298919 |
| N35408 | Iowa State Football: JaQuan Bailey expected to return to Cyclones in 2020 | sports     |           0.298356 |
| N13476 | Cal's win over Washington State prompts new hope for postseason berth     | sports     |          

In [7]:
# Interactive function to get personalized recommendations
def interactive_recommendations():
    print("Welcome to the News Recommendation System!")
    print("How would you like to get recommendations?")
    print("1. By categories")
    print("2. By keywords")
    print("3. By article IDs")
    
    choice = input("Enter your choice (1-3): ")
    
    if choice == '1':
        # Get unique categories
        news_df = pd.read_pickle(r'C:\Users\PC\Desktop\CONTENT FILTER\results\processed_news.pkl')
        categories = news_df['category'].unique()
        print("Available categories:")
        print(", ".join(categories))
        
        user_categories = input("Enter your preferred categories (comma separated): ").split(',')
        user_categories = [cat.strip().lower() for cat in user_categories]
        
        # Build user profile
        category_news = news_df[news_df['category'].str.lower().isin(user_categories)]
        if len(category_news) == 0:
            print("No articles found for the selected categories.")
            return
            
        tfidf_matrix = pd.read_pickle(r'C:\Users\PC\Desktop\CONTENT FILTER\results\tfidf_features.pkl').values
        category_indices = category_news.index.tolist()
        user_profile = np.mean(tfidf_matrix[category_indices], axis=0)
        
        # Compute similarity
        similarity_scores = cosine_similarity(user_profile.reshape(1, -1), tfidf_matrix)[0]
        
        # Create and show recommendations
        recommendations_df = pd.DataFrame({
            'title': news_df['title'],
            'category': news_df['category'],
            'score': similarity_scores
        })
        
        top_n = min(20, int(input("How many recommendations do you want? (max 20) ")))
        recommendations = recommendations_df.sort_values('score', ascending=False).head(top_n)
        
        print(f"\nTop {top_n} recommendations:")
        print(recommendations.to_markdown(index=False))
    
    elif choice == '2':
        keywords = input("Enter your keywords (comma separated): ").split(',')
        keywords = [kw.strip().lower() for kw in keywords]
        
        # Convert keywords to vector
        with open(r'C:\Users\PC\Desktop\CONTENT FILTER\results\tfidf_vectorizer.pkl', 'rb') as f:
            vectorizer = pickle.load(f)
        keywords_vec = vectorizer.transform([' '.join(keywords)]).toarray()[0]
        
        # Compute similarity
        tfidf_matrix = pd.read_pickle(r'C:\Users\PC\Desktop\CONTENT FILTER\results\tfidf_features.pkl').values
        similarity_scores = cosine_similarity([keywords_vec], tfidf_matrix)[0]
        
        # Create and show recommendations
        news_df = pd.read_pickle('data/processed_news.pkl')
        recommendations_df = pd.DataFrame({
            'title': news_df['title'],
            'category': news_df['category'],
            'score': similarity_scores
        })
        
        top_n = min(20, int(input("How many recommendations do you want? (max 20) ")))
        recommendations = recommendations_df.sort_values('score', ascending=False).head(top_n)
        
        print(f"\nTop {top_n} recommendations:")
        print(recommendations.to_markdown(index=False))
    
    elif choice == '3':
        print("Feature coming soon!")
    else:
        print("Invalid choice!")

interactive_recommendations()

Welcome to the News Recommendation System!
How would you like to get recommendations?
1. By categories
2. By keywords
3. By article IDs
Available categories:
lifestyle, health, news, sports, weather, entertainment, autos, travel, foodanddrink, tv, finance, movies, video, music, kids, middleeast, northamerica

Top 10 recommendations:
| title                                                                     | category   |    score |
|:--------------------------------------------------------------------------|:-----------|---------:|
| Former Michigan players rack up the sacks in NFL Week 7                   | sports     | 0.31883  |
| Patriots vs. Ravens: 3 keys to the game, fun facts, prediction            | sports     | 0.316671 |
| Texas college football rankings: Watch out for Baylor                     | sports     | 0.309627 |
| John McClain's 2019 NFL power rankings: Week 10                           | sports     | 0.298919 |
| Iowa State Football: JaQuan Bailey expected to retu