In [18]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, Concatenate, Flatten, Dropout
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [19]:
df = pd.read_csv('Datasets/online_courses_updated.csv')
df = df.drop(columns=['Unnamed: 0'])
df.head()

Unnamed: 0,user_id,course_id,course_name,instructor,course_duration_hours,certification_offered,difficulty_level,rating,enrollment_numbers,course_price,feedback_score,study_material_available,time_spent_hours,previous_courses_taken,course_images,instructor_images
0,15796,9366,Python for Beginners,Emma Harris,39.1,Yes,Beginner,5.0,21600,317.5,0.797,Yes,17.6,4,https://images.unsplash.com/photo-152637909509...,https://images.pexels.com/photos/712521/pexels...
1,861,1928,Cybersecurity for Professionals,Alexander Young,36.3,Yes,Beginner,4.3,15379,40.99,0.77,Yes,28.97,9,https://images.pexels.com/photos/577585/pexels...,https://images.unsplash.com/photo-150064876779...
2,38159,9541,DevOps and Continuous Deployment,Dr. Mia Walker,13.4,Yes,Beginner,3.9,6431,380.81,0.772,Yes,52.44,4,https://images.pexels.com/photos/270404/pexels...,https://images.pexels.com/photos/733872/pexels...
3,44733,3708,Project Management Fundamentals,Benjamin Lewis,58.3,Yes,Beginner,3.1,48245,342.8,0.969,No,22.29,6,https://images.unsplash.com/photo-157316471371...,https://images.unsplash.com/photo-151908536075...
4,11285,3361,Ethical Hacking Masterclass,Daniel White,30.8,Yes,Beginner,2.8,34556,381.01,0.555,Yes,22.01,5,https://images.unsplash.com/photo-156398676860...,https://images.pexels.com/photos/2379004/pexel...


In [20]:
max_enrollments, min_enrollments = df['enrollment_numbers'].max(), df['enrollment_numbers'].min()
threshold_score = max_enrollments * 0.80
threshold_score

39999.200000000004

In [21]:
df = df[df['enrollment_numbers']>threshold_score]
df.shape

(20041, 16)

In [22]:
df.head()

Unnamed: 0,user_id,course_id,course_name,instructor,course_duration_hours,certification_offered,difficulty_level,rating,enrollment_numbers,course_price,feedback_score,study_material_available,time_spent_hours,previous_courses_taken,course_images,instructor_images
3,44733,3708,Project Management Fundamentals,Benjamin Lewis,58.3,Yes,Beginner,3.1,48245,342.8,0.969,No,22.29,6,https://images.unsplash.com/photo-157316471371...,https://images.unsplash.com/photo-151908536075...
6,16851,7887,Networking and System Administration,Dr. Robert Davis,44.9,Yes,Beginner,4.9,41050,389.32,0.893,Yes,15.66,3,https://images.unsplash.com/photo-157316471398...,https://images.unsplash.com/photo-1545167622-3...
14,770,534,Photography and Video Editing,Daniel White,74.0,Yes,Advanced,4.1,40437,388.7,0.62,Yes,14.13,3,https://images.unsplash.com/photo-151603506937...,https://images.pexels.com/photos/2379004/pexel...
16,5312,3455,Python for Beginners,Charlotte King,11.1,Yes,Beginner,4.6,43655,426.0,0.966,Yes,22.8,5,https://images.unsplash.com/photo-152637909509...,https://images.pexels.com/photos/774909/pexels...
22,6397,1759,Fitness and Nutrition Coaching,Prof. Emily Johnson,88.5,No,Beginner,3.6,44312,178.6,0.598,No,14.45,1,https://images.unsplash.com/photo-157101961345...,https://images.pexels.com/photos/38554/girl-pe...


In [24]:
original_df = df.copy()

In [25]:
# Encode categorical columns
course_le = LabelEncoder()
instructor_le = LabelEncoder()
difficulty_le = LabelEncoder()

df['course_name_enc'] = course_le.fit_transform(df['course_name'])
df['instructor_enc'] = instructor_le.fit_transform(df['instructor'])
df['difficulty_enc'] = difficulty_le.fit_transform(df['difficulty_level'])

# Normalize numeric features
scaler = MinMaxScaler()
num_cols = ['course_duration_hours', 'rating', 'feedback_score',
            'course_price', 'enrollment_numbers', 'time_spent_hours',
            'previous_courses_taken']
df[num_cols] = scaler.fit_transform(df[num_cols])


In [26]:
X_cat = df[['course_name_enc', 'instructor_enc', 'difficulty_enc']]
X_num = df[num_cols]

In [27]:
# Inputs
input_course = Input(shape=(1,))
input_instructor = Input(shape=(1,))
input_difficulty = Input(shape=(1,))
input_numeric = Input(shape=(X_num.shape[1],))

# Embeddings
emb_course = Embedding(input_dim=df['course_name_enc'].nunique()+1, output_dim=8)(input_course)
emb_instr = Embedding(input_dim=df['instructor_enc'].nunique()+1, output_dim=8)(input_instructor)
emb_diff = Embedding(input_dim=df['difficulty_enc'].nunique()+1, output_dim=4)(input_difficulty)

# Flatten embeddings
flat_course = Flatten()(emb_course)
flat_instr = Flatten()(emb_instr)
flat_diff = Flatten()(emb_diff)

# Concatenate all
x = Concatenate()([flat_course, flat_instr, flat_diff, input_numeric])

# Dense Layers
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
embedding_output = Dense(32, activation='relu', name='embedding')(x)

# Final model
model = Model(inputs=[input_course, input_instructor, input_difficulty, input_numeric], outputs=embedding_output)


In [28]:
# Get embeddings for all courses
course_embeddings = model.predict([
    df['course_name_enc'],
    df['instructor_enc'],
    df['difficulty_enc'],
    df[num_cols]
], verbose=0)


In [48]:
course_embeddings.shape

(20041, 32)

In [49]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_dl(course_name, instructor_name, top_n=6):
    try:
        course_idx = df[
            (original_df['course_name'] == course_name) &
            (original_df['instructor'] == instructor_name)
        ].index[0]
    except IndexError:
        return "Course not found."

    input_vec = course_embeddings[course_idx].reshape(1, -1)
    sims = cosine_similarity(input_vec, course_embeddings).flatten()

    similar_idxs = np.argsort(sims)[::-1]
    similar_idxs = [i for i in similar_idxs if i != course_idx][:top_n]

    final_df = original_df.iloc[similar_idxs][['course_name', 'instructor', 'rating', 'course_images', 'instructor_images']].reset_index()
    final_df = final_df.drop(columns=['index'])
    return final_df


In [53]:
recommends = recommend_dl("Advanced Machine Learning", "Liam Adams", top_n=6)
recommends[['course_name', 'instructor']]

Unnamed: 0,course_name,instructor
0,Data Visualization with Tableau,Ethan Hall
1,Photography and Video Editing,Isabella Scott
2,Graphic Design with Canva,Liam Adams
3,Project Management Fundamentals,Emma Harris
4,Networking and System Administration,David Wilson
5,Graphic Design with Canva,Benjamin Lewis


In [36]:
df.sample(5)

Unnamed: 0,user_id,course_id,course_name,instructor,course_duration_hours,certification_offered,difficulty_level,rating,enrollment_numbers,course_price,feedback_score,study_material_available,time_spent_hours,previous_courses_taken,course_images,instructor_images,course_name_enc,instructor_enc,difficulty_enc
3079,18323,5349,AI for Business Leaders,David Wilson,0.683158,Yes,Intermediate,0.6,0.307331,0.918373,0.638452,No,0.0,0.157895,https://images.pexels.com/photos/8438974/pexel...,https://images.unsplash.com/photo-150700321116...,0,4,2
43999,7211,4543,Fitness and Nutrition Coaching,Jessica Martinez,0.336842,Yes,Intermediate,0.775,0.257626,0.994708,0.669891,Yes,0.417962,0.263158,https://images.unsplash.com/photo-157101961345...,https://images.pexels.com/photos/1181686/pexel...,8,12,2
85548,32597,1805,Mobile App Development with Swift,Charlotte King,0.66,No,Intermediate,1.0,0.265627,0.47676,0.76058,Yes,0.38164,0.210526,https://images.unsplash.com/photo-163335612254...,https://images.pexels.com/photos/774909/pexels...,12,2,2
72776,1387,8092,AI for Business Leaders,Jessica Martinez,0.015789,No,Intermediate,0.875,0.610661,0.159795,0.688029,Yes,0.184476,0.368421,https://images.pexels.com/photos/8438974/pexel...,https://images.pexels.com/photos/1181686/pexel...,0,12,2
43089,15232,1433,Fundamentals of Digital Marketing,Ethan Hall,0.224211,No,Intermediate,0.675,0.478948,0.041584,0.713422,No,0.215077,0.421053,https://images.unsplash.com/photo-1551288049-b...,https://images.pexels.com/photos/1043471/pexel...,9,9,2


In [44]:
recommend_dl("DevOps and Continuous Deployment", "Olivia Taylor", top_n=5)

Unnamed: 0,course_name,instructor,rating,course_images,instructor_images
0,Cloud Computing Essentials,Jessica Martinez,3.5,https://images.pexels.com/photos/19867468/pexe...,https://images.pexels.com/photos/1181686/pexel...
1,Cloud Computing Essentials,Emma Harris,3.7,https://images.pexels.com/photos/19867468/pexe...,https://images.pexels.com/photos/712521/pexels...
2,Networking and System Administration,William Thomas,3.1,https://images.unsplash.com/photo-157316471398...,https://images.unsplash.com/photo-1557862921-3...
3,Project Management Fundamentals,Jessica Martinez,3.3,https://images.unsplash.com/photo-157316471371...,https://images.pexels.com/photos/1181686/pexel...
4,Personal Finance and Wealth Building,Liam Adams,3.9,https://images.unsplash.com/photo-1554224155-6...,https://images.pexels.com/photos/220453/pexels...


In [45]:
df.head()

Unnamed: 0,user_id,course_id,course_name,instructor,course_duration_hours,certification_offered,difficulty_level,rating,enrollment_numbers,course_price,feedback_score,study_material_available,time_spent_hours,previous_courses_taken,course_images,instructor_images,course_name_enc,instructor_enc,difficulty_enc
3,44733,3708,Project Management Fundamentals,Benjamin Lewis,0.561053,Yes,Beginner,0.525,0.824582,0.672493,0.962515,No,0.264834,0.315789,https://images.unsplash.com/photo-157316471371...,https://images.unsplash.com/photo-151908536075...,16,1,1
6,16851,7887,Networking and System Administration,Dr. Robert Davis,0.42,Yes,Beginner,0.975,0.105011,0.769412,0.870617,Yes,0.182361,0.157895,https://images.unsplash.com/photo-157316471398...,https://images.unsplash.com/photo-1545167622-3...,13,7,1
14,770,534,Photography and Video Editing,Daniel White,0.726316,Yes,Advanced,0.775,0.043704,0.76812,0.540508,Yes,0.163329,0.157895,https://images.unsplash.com/photo-151603506937...,https://images.pexels.com/photos/2379004/pexel...,15,3,0
16,5312,3455,Python for Beginners,Charlotte King,0.064211,Yes,Beginner,0.9,0.365537,0.84583,0.958888,Yes,0.271178,0.263158,https://images.unsplash.com/photo-152637909509...,https://images.pexels.com/photos/774909/pexels...,18,2,1
22,6397,1759,Fitness and Nutrition Coaching,Prof. Emily Johnson,0.878947,No,Beginner,0.65,0.431243,0.330403,0.513906,No,0.167309,0.052632,https://images.unsplash.com/photo-157101961345...,https://images.pexels.com/photos/38554/girl-pe...,8,16,1


In [46]:
original_df.head()

Unnamed: 0,user_id,course_id,course_name,instructor,course_duration_hours,certification_offered,difficulty_level,rating,enrollment_numbers,course_price,feedback_score,study_material_available,time_spent_hours,previous_courses_taken,course_images,instructor_images
3,44733,3708,Project Management Fundamentals,Benjamin Lewis,58.3,Yes,Beginner,3.1,48245,342.8,0.969,No,22.29,6,https://images.unsplash.com/photo-157316471371...,https://images.unsplash.com/photo-151908536075...
6,16851,7887,Networking and System Administration,Dr. Robert Davis,44.9,Yes,Beginner,4.9,41050,389.32,0.893,Yes,15.66,3,https://images.unsplash.com/photo-157316471398...,https://images.unsplash.com/photo-1545167622-3...
14,770,534,Photography and Video Editing,Daniel White,74.0,Yes,Advanced,4.1,40437,388.7,0.62,Yes,14.13,3,https://images.unsplash.com/photo-151603506937...,https://images.pexels.com/photos/2379004/pexel...
16,5312,3455,Python for Beginners,Charlotte King,11.1,Yes,Beginner,4.6,43655,426.0,0.966,Yes,22.8,5,https://images.unsplash.com/photo-152637909509...,https://images.pexels.com/photos/774909/pexels...
22,6397,1759,Fitness and Nutrition Coaching,Prof. Emily Johnson,88.5,No,Beginner,3.6,44312,178.6,0.598,No,14.45,1,https://images.unsplash.com/photo-157101961345...,https://images.pexels.com/photos/38554/girl-pe...
