In [7]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
import numpy as np

In [9]:
data = pd.read_csv('combined_data_with_sentiment.csv', nrows=20000)
data = data[['name', 'course_url', 'course_id', 'reviewers', 'rating', 'avg_rating','review_count']]

In [11]:
data.head()

Unnamed: 0,name,course_url,course_id,reviewers,rating,avg_rating,review_count
0,Machine Learning,https://www.coursera.org/learn/machine-learning,machine-learning,By Deleted A,1,4.750522,35895
1,Machine Learning,https://www.coursera.org/learn/machine-learning,machine-learning,By Bruno C,1,4.750522,35895
2,Machine Learning,https://www.coursera.org/learn/machine-learning,machine-learning,By Fadi,1,4.750522,35895
3,Machine Learning,https://www.coursera.org/learn/machine-learning,machine-learning,By Mathew L,1,4.750522,35895
4,Machine Learning,https://www.coursera.org/learn/machine-learning,machine-learning,By Rui C,1,4.750522,35895


In [20]:
# Create User-Item Interaction Matrix
user_item_matrix = data.pivot_table(
    index='reviewers', columns='course_id', values='rating'
)
#user_item_matrix = user_item_matrix.fillna(0)
user_item_matrix = user_item_matrix.apply(lambda x: x.fillna(x.mean()), axis=1)
#print(user_item_matrix.head())

In [22]:
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, 
                                   index=user_item_matrix.index, 
                                   columns=user_item_matrix.index)

In [24]:
def recommend_courses(user_id, num_recommendations=5):
    if user_id not in user_item_matrix.index:
        print(f"User {user_id} not found in the dataset.")
        return []

    # Get the user's ratings
    user_ratings = user_item_matrix.loc[user_id]

    # Check for unrated courses
    unrated_courses = user_ratings[user_ratings == 0]
    if unrated_courses.empty:
        print(f"User {user_id} has already rated all courses.")
        return []

    # Similarity scores
    similarity_scores = user_similarity_df[user_id]

    # Weighted average of ratings from similar users
    weighted_ratings = np.zeros(user_item_matrix.shape[1])
    for similar_user in similarity_scores.index:
        if similar_user == user_id:
            continue
        weighted_ratings += similarity_scores[similar_user] * user_item_matrix.loc[similar_user].values

    # Normalize by similarity
    weighted_ratings /= sum(similarity_scores)

    # Recommend only unrated courses
    recommendations = {
        course: score for course, score in zip(user_item_matrix.columns, weighted_ratings)
        if course in unrated_courses.index
    }

    # Sort and return the top N recommendations
    recommendations = sorted(recommendations.items(), key=lambda x: x[1], reverse=True)[:num_recommendations]
    return recommendations


In [26]:
#Recommendations for a Target User
user_id = 'By A.S.M. P'  
recommendations = recommend_courses(user_id)
print(f"Recommendations for {user_id}: {recommendations}")

User By A.S.M. P has already rated all courses.
Recommendations for By A.S.M. P: []


In [103]:
# Check how many courses the user has rated
rated_courses = user_item_matrix.loc[user_id]
print(f"Rated courses for {user_id}:\n{rated_courses}")

# Check for unrated courses
unrated_courses = rated_courses[rated_courses == 0]
if unrated_courses.empty:
    print(f"User {user_id} has rated all available courses. No recommendations can be generated.")
else:
    print(f"Unrated courses for {user_id}: {list(unrated_courses.index)}")


Rated courses for By A.S.M. P:
course_id
machine-learning    4.0
Name: By A.S.M. P, dtype: float64
User By A.S.M. P has rated all available courses. No recommendations can be generated.


In [105]:
print(user_similarity_df['By AMARJEET K'].sort_values(ascending=False).head())


reviewers
By   M D M      1.0
By Rahul p      1.0
By Rahul M      1.0
By Rahul N R    1.0
By Rahul P      1.0
Name: By AMARJEET K, dtype: float64
