In [8]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Step 1: Create Sample Data

# Sample user profiles
user_profiles = pd.DataFrame({
    'user_id': [1, 2, 3],
    'interests': ['Data Science', 'Web Development', 'Machine Learning'],
    'skills': ['Python, SQL', 'HTML, CSS, JavaScript', 'Python, R'],
    'learning_preference': ['Video', 'Text', 'Video']
})

# Sample courses
courses = pd.DataFrame({
    'course_id': [101, 102, 103],
    'title': ['Introduction to Data Science', 'Web Development Bootcamp', 'Machine Learning Basics'],
    'subject': ['Data Science', 'Web Development', 'Machine Learning'],
    'difficulty_level': ['Beginner', 'Intermediate', 'Beginner']
})

# Sample user-course interactions
user_course_interactions = pd.DataFrame({
    'user_id': [1, 1, 2, 3, 3],
    'course_id': [101, 103, 102, 101, 103],
    'rating': [5, 4, 5, 3, 4]
})

# Step 2: Load and Preprocess Data

# One-hot encoding for categorical features
encoder = OneHotEncoder(sparse_output=False)  # Use sparse_output=False for dense output

# Encode user interests
user_interests_encoded = encoder.fit_transform(user_profiles[['interests']])
user_interests_df = pd.DataFrame(user_interests_encoded, columns=encoder.get_feature_names_out(['interests']))

# Encode course subjects
course_subjects_encoded = encoder.fit_transform(courses[['subject']])
course_subjects_df = pd.DataFrame(course_subjects_encoded, columns=encoder.get_feature_names_out(['subject']))

# Create user-item interaction matrix
interaction_matrix = user_course_interactions.pivot(index='user_id', columns='course_id', values='rating').fillna(0)

# Step 3: Collaborative Filtering

# Calculate user similarity
user_similarity = cosine_similarity(interaction_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=interaction_matrix.index, columns=interaction_matrix.index)

# Step 4: Content-Based Filtering

# Combine user interests and skills for content-based filtering
user_features = pd.concat([user_profiles[['user_id']], user_interests_df], axis=1)

# Combine course subjects and difficulty level for content-based filtering
course_features = pd.concat([courses[['course_id']], course_subjects_df], axis=1)

# Step 5: Recommendation Function

def recommend_courses(user_id, num_recommendations=2):
    # Get similar users
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:num_recommendations + 1]

    # Get courses rated by similar users
    recommended_courses = interaction_matrix.loc[similar_users].mean(axis=0).sort_values(ascending=False).index[:num_recommendations]

    # Get course titles
    recommended_course_titles = courses[courses['course_id'].isin(recommended_courses)]['title'].tolist()

    return recommended_course_titles

# Example usage
user_id = 1
recommended_courses = recommend_courses(user_id)
print(f"Recommended Courses for User {user_id}: {recommended_courses}")

Recommended Courses for User 1: ['Web Development Bootcamp', 'Machine Learning Basics']
