In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv('Datasets/online_courses_updated.csv')
df = df.drop(columns=['Unnamed: 0'])

In [3]:
max_enrollments, min_enrollments = df['enrollment_numbers'].max(), df['enrollment_numbers'].min()
threshold_score = max_enrollments * 0.80
threshold_score

39999.200000000004

In [4]:
df = df[df['enrollment_numbers']>threshold_score]
df.shape

(20041, 16)

In [6]:
df['course_name'].unique(), df['course_name'].nunique(), df['instructor'].unique(), df['instructor'].nunique()

(array(['Project Management Fundamentals',
        'Networking and System Administration',
        'Photography and Video Editing', 'Python for Beginners',
        'Fitness and Nutrition Coaching', 'Graphic Design with Canva',
        'Data Visualization with Tableau', 'Advanced Machine Learning',
        'Stock Market and Trading Strategies',
        'Cybersecurity for Professionals',
        'DevOps and Continuous Deployment',
        'Mobile App Development with Swift',
        'Personal Finance and Wealth Building',
        'Game Development with Unity', 'AI for Business Leaders',
        'Ethical Hacking Masterclass', 'Cloud Computing Essentials',
        'Public Speaking Mastery',
        'Blockchain and Decentralized Applications',
        'Fundamentals of Digital Marketing'], dtype=object),
 20,
 array(['Benjamin Lewis', 'Dr. Robert Davis', 'Daniel White',
        'Charlotte King', 'Prof. Emily Johnson', 'James Clark',
        'Olivia Taylor', 'Michael Brown', 'Jessica Martinez

In [7]:
df['text_features'] = df['course_name'] + ' with ' + df['instructor']
df['text_features'] = df['text_features'].str.lower().str.strip()

In [8]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['text_features'])

In [9]:
collab_features = ['rating', 'course_price', 'feedback_score', 'time_spent_hours']
scaler = MinMaxScaler()
numeric_matrix = scaler.fit_transform(df[collab_features])

In [16]:
from scipy.sparse import hstack
combined_features = hstack([tfidf_matrix, numeric_matrix])
combined_features = combined_features.tocsr()

In [47]:
combined_features

<20041x104 sparse matrix of type '<class 'numpy.float64'>'
	with 182308 stored elements in Compressed Sparse Row format>

In [17]:
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(combined_features)

In [45]:
def recommend_courses(course_name, instructor, top_n=5):
    input_str = f"{course_name.strip()} with {instructor.strip()}".lower()
    ## match the course
    match = df[df['text_features'] == input_str]
    if match.empty:
        return "Course not found."

    idx = match.index[0]
    distances, indices = knn.kneighbors(combined_features[idx], n_neighbors=len(df))

    ## Flatten and remove self
    recommended_indices = indices.flatten()
    recommended_indices = recommended_indices[recommended_indices != idx]

    ## Build unique recommendations by course name + instructor
    seen = set()
    unique_recommendations = []
    for rec_idx in recommended_indices:
        row = df.iloc[rec_idx]
        key = (row['course_name'].strip().lower(), row['instructor'].strip().lower())
        if key != (course_name.strip().lower(), instructor.strip().lower()) and key not in seen:
            seen.add(key)
            unique_recommendations.append(row)
        if len(unique_recommendations) == top_n:
            break

    rec_df =  pd.DataFrame(unique_recommendations)[[
        'course_name','instructor','course_duration_hours','certification_offered','difficulty_level',	'rating','enrollment_numbers','course_price','feedback_score','study_material_available','time_spent_hours','previous_courses_taken','course_images','instructor_images'
    ]]
    return rec_df.sort_values(by='rating', ascending=False).head(top_n)


In [46]:
recommend_courses('Networking and System Administration', 'Dr. Mia Walker', top_n=5)

Unnamed: 0,course_name,instructor,course_duration_hours,certification_offered,difficulty_level,rating,enrollment_numbers,course_price,feedback_score,study_material_available,time_spent_hours,previous_courses_taken,course_images,instructor_images
74677,Mobile App Development with Swift,Sophia Anderson,8.2,Yes,Intermediate,4.0,44443,230.3,0.886,No,28.89,3,https://images.unsplash.com/photo-163335612254...,https://images.unsplash.com/photo-143876168103...
2446,Mobile App Development with Swift,Ethan Hall,81.9,Yes,Advanced,3.8,40941,359.53,1.0,Yes,32.61,2,https://images.unsplash.com/photo-163335612254...,https://images.pexels.com/photos/1043471/pexel...
86597,Mobile App Development with Swift,William Thomas,14.0,Yes,Beginner,3.6,41154,461.8,1.0,Yes,47.39,3,https://images.unsplash.com/photo-163335612254...,https://images.unsplash.com/photo-1557862921-3...
52147,Mobile App Development with Swift,Emma Harris,46.3,Yes,Beginner,3.6,41244,297.77,0.948,Yes,44.52,4,https://images.unsplash.com/photo-163335612254...,https://images.pexels.com/photos/712521/pexels...
80825,Mobile App Development with Swift,Liam Adams,53.9,No,Advanced,3.0,47439,396.98,0.878,Yes,48.3,7,https://images.unsplash.com/photo-163335612254...,https://images.pexels.com/photos/220453/pexels...


In [37]:
df.sample(5)

Unnamed: 0,user_id,course_id,course_name,instructor,course_duration_hours,certification_offered,difficulty_level,rating,enrollment_numbers,course_price,feedback_score,study_material_available,time_spent_hours,previous_courses_taken,course_images,instructor_images,text_features
7346,29512,3673,Data Visualization with Tableau,Dr. Mia Walker,59.0,Yes,Beginner,4.6,47798,162.99,0.848,Yes,1.0,5,https://images.pexels.com/photos/265087/pexels...,https://images.pexels.com/photos/733872/pexels...,data visualization with tableau with dr. mia w...
70169,8650,4708,Personal Finance and Wealth Building,Ethan Hall,93.3,No,Beginner,3.5,46346,215.48,0.616,Yes,26.24,3,https://images.unsplash.com/photo-1554224155-6...,https://images.pexels.com/photos/1043471/pexel...,personal finance and wealth building with etha...
95827,36882,1478,Stock Market and Trading Strategies,Jessica Martinez,96.8,Yes,Beginner,3.6,43597,255.75,0.711,Yes,1.0,7,https://images.unsplash.com/photo-161197478985...,https://images.pexels.com/photos/1181686/pexel...,stock market and trading strategies with jessi...
28947,1512,9504,Photography and Video Editing,James Clark,85.5,Yes,Beginner,4.3,42433,236.09,0.762,Yes,17.84,4,https://images.unsplash.com/photo-151603506937...,https://images.pexels.com/photos/1222271/pexel...,photography and video editing with james clark
34922,19076,9655,Game Development with Unity,Michael Brown,52.6,Yes,Advanced,3.8,47625,427.67,0.653,Yes,31.58,5,https://images.unsplash.com/photo-1542751371-a...,https://images.unsplash.com/photo-1560250097-0...,game development with unity with michael brown
