<a href="https://colab.research.google.com/github/RedietNegash/Machine-Learning/blob/main/Collaborative_Filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
df_articles = pd.read_csv("/content/drive/MyDrive/Machine-Learning/Recommendation-Systems/articles_metadata.csv", low_memory=False)
df_ratings=pd.read_csv("/content/drive/MyDrive/Machine-Learning/Recommendation-Systems/article_ratings_small.csv")
df_ratings.head(10)

Unnamed: 0,userId,articleId,rating,timestamp
0,1,1,4.0,1622555555
1,1,2,5.0,1622555556
2,1,3,4.0,1622555557
3,1,4,3.0,1622555558
4,1,5,5.0,1622555559
5,2,1,3.0,1622555560
6,2,3,4.0,1622555561
7,2,6,2.0,1622555562
8,2,7,5.0,1622555563
9,2,10,4.0,1622555564


In [5]:
df_merged = pd.merge(df_ratings, df_articles, on="articleId", how="left")
print(df_merged.head())

   userId  articleId  rating   timestamp                           title  \
0       1          1     4.0  1622555555     The Impact of AI on Society   
1       1          2     5.0  1622555556  Understanding Machine Learning   
2       1          3     4.0  1622555557         Deep Learning Explained   
3       1          4     3.0  1622555558          The Future of Robotics   
4       1          5     5.0  1622555559                       AI Ethics   

                                             content  
0              AI is transforming various sectors...  
1              Machine learning is a subset of AI...  
2  Deep learning is a technique in machine learni...  
3  Robots are increasingly becoming part of our d...  
4  Ethical considerations in AI development are c...  


In [6]:
df_merged.head()

Unnamed: 0,userId,articleId,rating,timestamp,title,content
0,1,1,4.0,1622555555,The Impact of AI on Society,AI is transforming various sectors...
1,1,2,5.0,1622555556,Understanding Machine Learning,Machine learning is a subset of AI...
2,1,3,4.0,1622555557,Deep Learning Explained,Deep learning is a technique in machine learni...
3,1,4,3.0,1622555558,The Future of Robotics,Robots are increasingly becoming part of our d...
4,1,5,5.0,1622555559,AI Ethics,Ethical considerations in AI development are c...


In [7]:

pivoted_data =df_ratings.pivot(index='userId', columns='articleId', values='rating')
pivoted_data



articleId,1,2,3,4,5,6,7,8,9,10,11
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,4.0,5.0,4.0,3.0,5.0,,,,,,
2,3.0,,4.0,,,2.0,5.0,,,4.0,
3,,,,5.0,2.0,4.0,,5.0,,,3.0
4,,4.0,5.0,,,,4.0,,3.0,,
5,4.0,,,,,5.0,,,,,


In [8]:
pivoted_data_filled = pivoted_data.fillna(0)


In [9]:
user_similarity = cosine_similarity(pivoted_data_filled)
user_similarity

array([[1.        , 0.35082321, 0.29485315, 0.51613977, 0.26194334],
       [0.35082321, 1.        , 0.10757898, 0.58848989, 0.41065937],
       [0.29485315, 0.10757898, 1.        , 0.        , 0.35141842],
       [0.51613977, 0.58848989, 0.        , 1.        , 0.        ],
       [0.26194334, 0.41065937, 0.35141842, 0.        , 1.        ]])

In [10]:
user_similarity_df = pd.DataFrame(user_similarity, index=pivoted_data_filled.index, columns=pivoted_data_filled.index)
user_similarity_df

userId,1,2,3,4,5
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1.0,0.350823,0.294853,0.51614,0.261943
2,0.350823,1.0,0.107579,0.58849,0.410659
3,0.294853,0.107579,1.0,0.0,0.351418
4,0.51614,0.58849,0.0,1.0,0.0
5,0.261943,0.410659,0.351418,0.0,1.0


In [11]:
def get_k_nearest_neighbors(similarity_matrix, target_user, k):
    similarities = similarity_matrix.loc[target_user]
    similar_users = similarities.sort_values(ascending=False).index[1:k+1]
    return similar_users

In [12]:
def recommend_articles(ratings, similarity_matrix, target_user, k):

    if target_user not in similarity_matrix.index:
        print(f"Target user {target_user} not found in similarity matrix.")
        return None


    nearest_neighbors = get_k_nearest_neighbors(similarity_matrix, target_user, k)
    neighbor_ratings = ratings.loc[nearest_neighbors]
    avg_ratings = neighbor_ratings.mean()
    user_ratings = ratings.loc[target_user]



    recommendations = avg_ratings

    print("Recommendations including the user seen articles")
    print(recommendations)

    recommendations = avg_ratings[user_ratings == 0]
    print("Filtered recommendations (excluding articles already rated by target user):")
    print(recommendations)




    return recommendations


In [13]:
k = 2
target_user = 3
recommended_articles = recommend_articles(pivoted_data_filled, user_similarity_df, target_user, k)


Recommendations including the user seen articles
articleId
1     4.0
2     2.5
3     2.0
4     1.5
5     2.5
6     2.5
7     0.0
8     0.0
9     0.0
10    0.0
11    0.0
dtype: float64
Filtered recommendations (excluding articles already rated by target user):
articleId
1     4.0
2     2.5
3     2.0
7     0.0
9     0.0
10    0.0
dtype: float64


In [14]:
def evaluate_collaborative_filtering(k=10, relevance_threshold=3.5):
    precision_sum = 0
    recall_sum = 0
    user_count = 0

    for user_id in user_similarity_df.index:
        user_count += 1
        recommended_articles = recommend_articles(pivoted_data_filled, user_similarity_df, user_id, k)
        if recommended_articles is None:
            continue

        # Relevant articles for the user
        user_ratings = df_ratings[df_ratings['userId'] == user_id]
        relevant_articles = set(user_ratings[user_ratings['rating'] >= relevance_threshold]['articleId'])

        # Extracting the list of recommended article IDs from the recommended articles
        recommended_ids = set(recommended_articles.index)
        relevant_and_recommended = recommended_ids & relevant_articles

        # Precision and Recall calculation
        precision = len(relevant_and_recommended) / k
        recall = len(relevant_and_recommended) / len(relevant_articles) if len(relevant_articles) > 0 else 0

        precision_sum += precision
        recall_sum += recall

    precision_at_k = precision_sum / user_count
    recall_at_k = recall_sum / user_count
    f1_at_k = (2 * precision_at_k * recall_at_k) / (precision_at_k + recall_at_k) if (precision_at_k + recall_at_k) > 0 else 0

    return precision_at_k, recall_at_k, f1_at_k
precision_at_k, recall_at_k, f1_at_k = evaluate_collaborative_filtering(k=10)

print(f"Precision@{k}: {precision_at_k}")
print(f"Recall@{k}: {recall_at_k}")
print(f"F1 Score@{k}: {f1_at_k}")


Recommendations including the user seen articles
articleId
1     1.75
2     1.00
3     2.25
4     1.25
5     0.50
6     2.75
7     2.25
8     1.25
9     0.75
10    1.00
11    0.75
dtype: float64
Filtered recommendations (excluding articles already rated by target user):
articleId
6     2.75
7     2.25
8     1.25
9     0.75
10    1.00
11    0.75
dtype: float64
Recommendations including the user seen articles
articleId
1     2.00
2     2.25
3     2.25
4     2.00
5     1.75
6     2.25
7     1.00
8     1.25
9     0.75
10    0.00
11    0.75
dtype: float64
Filtered recommendations (excluding articles already rated by target user):
articleId
2     2.25
4     2.00
5     1.75
8     1.25
9     0.75
11    0.75
dtype: float64
Recommendations including the user seen articles
articleId
1     2.75
2     2.25
3     3.25
4     0.75
5     1.25
6     1.75
7     2.25
8     0.00
9     0.75
10    1.00
11    0.00
dtype: float64
Filtered recommendations (excluding articles already rated by target user):
artic