In [343]:
# For Building recommender systems
!pip install scikit-surprise



In [344]:
import pandas as pd
import numpy as np
import joblib
from surprise import SVD, Reader, Dataset, accuracy

In [345]:
news_df = pd.read_csv('/content/news.csv')
rec_items_df = pd.read_csv('/content/rec_items.csv')
rec_feedback_df = pd.read_csv('/content/rec_feedback.csv')
users_df = pd.read_csv('/content/users.csv')
rec_users_df = pd.read_csv('/content/rec_users.csv')

In [346]:
# Load the CBF model (TF-IDF and Cosine Similarity Matrix)
tfidf = joblib.load('/content/tfidf_vectorizer.pkl')
cosine_sim = joblib.load('/content/cosine_similarity_matrix.pkl')

# Load the CF model (e.g., a pre-trained collaborative filtering model)
cf_model = joblib.load('/content/svd_recommender_model.pkl')

In [347]:
# Define custom interaction weights
interaction_weights = {'click': 1, 'read': 2, 'like': 3}

# Map interaction types to their corresponding weights
rec_feedback_df['rating'] = rec_feedback_df['feedback_type'].map(interaction_weights)

rec_feedback_df['rating'] = rec_feedback_df['rating'].fillna(0)

In [348]:
# Function to get CBF (Content-Based Filtering) recommendations
def get_cbf_recommendations(user_id, top_n=5):
    user_interactions = rec_feedback_df[rec_feedback_df['user_id'] == user_id]
    interacted_article_ids = user_interactions['item_id'].unique()

    if len(interacted_article_ids) == 0:
        return "No interactions found for this user."

    interacted_article_indices = news_df[news_df['id'].isin(interacted_article_ids)].index.tolist()
    if len(interacted_article_indices) == 0:
        return "No articles found for user interactions."

    sim_scores = []
    for idx in interacted_article_indices:
        sim_scores.extend(list(enumerate(cosine_sim[idx])))

    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    recommended_articles = []

    # Ensure diversity by checking different topics (optional)
    recommended_articles_set = set()
    for score in sim_scores:
        article_idx = score[0]
        if news_df.iloc[article_idx]['id'] not in interacted_article_ids:
            recommended_articles.append(news_df.iloc[article_idx][['id', 'title', 'description']])
            recommended_articles_set.add(news_df.iloc[article_idx]['id'])
        if len(recommended_articles) == top_n:
            break

    return pd.DataFrame(recommended_articles)

In [349]:
# Function to get CF (Collaborative Filtering) recommendations
def get_cf_recommendations(user_id, top_n=5):
    user_interactions = rec_feedback_df[rec_feedback_df['user_id'] == user_id]
    if user_interactions.empty:
        return "No interactions found for this user."

    reader = Reader(rating_scale=(0, 3))  # Custom rating scale
    data = Dataset.load_from_df(rec_feedback_df[['user_id', 'item_id', 'rating']], reader)
    trainset = data.build_full_trainset()

    all_items = news_df['id'].tolist()
    predictions = []
    for item_id in all_items:
        prediction = cf_model.predict(user_id, item_id)  # Predict the rating for user and item
        predictions.append((item_id, prediction.est))  # Collect item and predicted rating

    predictions.sort(key=lambda x: x[1], reverse=True)
    recommended_articles = []
    for item_id, _ in predictions[:top_n]:
        article = news_df[news_df['id'] == item_id][['id', 'title', 'description']].iloc[0]
        recommended_articles.append(article)

    return pd.DataFrame(recommended_articles)

In [350]:
# Hybrid Recommendation Function (combining CF and CBF)
def get_hybrid_recommendations(user_id, top_n=5, alpha=0.5):
    """
    Combine CF and CBF recommendations with a weight parameter alpha.
    alpha: weight to combine CF and CBF (0.5 for equal weight).
    """
    # Get CBF recommendations
    cbf_recs = get_cbf_recommendations(user_id, top_n)

    # If no CBF recommendations, return CF recommendations
    if isinstance(cbf_recs, str):
        return get_cf_recommendations(user_id, top_n)

    # Get CF recommendations
    cf_recs = get_cf_recommendations(user_id, top_n)

    # Merge and add score for hybrid model
    cbf_recs['score'] = alpha
    cf_recs['score'] = 1 - alpha

    combined_recommendations = pd.concat([cbf_recs, cf_recs], ignore_index=True)
    combined_recommendations = combined_recommendations.drop_duplicates(subset='id', keep='first')

    # Sorting by score (hybrid recommendation)
    combined_recommendations = combined_recommendations.sort_values(by='score', ascending=False)

    # Get the top-N recommendations
    return combined_recommendations.head(top_n)

In [351]:
user_ids = [2329, 645, 678]

for user_id in user_ids:
    print(f"Recommendations for User {user_id}:\n")
    hybrid_recommendations = get_hybrid_recommendations(user_id, top_n=5, alpha=0.5)
    print(hybrid_recommendations)
    print("\n---\n")

Recommendations for User 2329:

      id                                              title  \
0  23269  ලාංකිකයින් 3065ක් අමෙරිකාවෙන් පිටුවහල් කිරීමට ...   
1  23484            ඉන්ධන සැපයූ සමාගමක් ලංකාවෙන් ඉවත්ව යයි.   
2  23848                   ට්‍රම්ප්ගෙන් 25%ක අලුත් බද්දක්\n   
3  23521           සංචාරකයින්ගේ පැමිණීමේ වාර්තාගත වැඩිවීමක්   
4  23826        ජනපතිගේ ඩුබායි නිල සංචාරයේ දෙවැනි දිනය අදයි   

                                         description  score  
0  නව අමෙරිකානු පරිපාලනය විසින් පිටුවහල් කිරීමට න...    0.5  
1  මෙරට ඉන්ධන වෙලදාම සදහා පැමිණි ඕස්ට්‍රේලියාවේ ...    0.5  
2  අමෙරිකාවට ආනයනය කරන සියලු‍ම වානේ සහ ඇලු‍මිනියම...    0.5  
3  මෙරටට පැමිණි සංචාරකයින්ගේ වාර්තාගත වැඩි වීමක් ...    0.5  
4   ජනාධිපති අනුර කුමාර දිසානායක මහතාගේ එක්සත් අර...    0.5  

---

Recommendations for User 645:

      id                                              title  \
0  23854  පරිභෝජනයට නුසුදුසු කඩල පරිප්පු වෙළෙඳ පොළට නිකු...   
1  23852        CID වැඩවලට අලුත් විමර්ශන කොට්ඨාසයක් එක