In [16]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load required datasets
jobs = pd.read_csv("combined_jobs_cleaned.csv")
courses = pd.read_csv("coursera_cleaned.csv")
uni_courses = pd.read_csv("uni_courses_aligned.csv")

# Load embeddings
job_embeddings = np.load("job_embeddings.npy")
course_embeddings = np.load("course_embeddings.npy")
uni_course_embeddings = np.load("uni_course_embeddings.npy")


In [18]:
def normalise_career(label):
    if not isinstance(label, str):
        return ""
    label = label.lower()
    if "software" in label:
        return "software_engineering"
    if "data analyst" in label:
        return "data_analytics"
    if "data scientist" in label:
        return "data_science"
    if "network" in label:
        return "network_engineering"
    return label

jobs['career_group'] = jobs['career_label'].apply(normalise_career)


In [19]:
def recommend_courses_for_career(career_group, top_n=5):
    job_idxs = jobs[jobs['career_group'] == career_group].index

    if len(job_idxs) == 0:
        print(f"No jobs found for career group: {career_group}")
        return None

    avg_job_embedding = job_embeddings[job_idxs].mean(axis=0)

    # Coursera recommendations
    coursera_scores = cosine_similarity(
        [avg_job_embedding], course_embeddings
    )[0]
    coursera_top = coursera_scores.argsort()[::-1][:top_n]
    coursera_recs = courses.iloc[coursera_top][
        ['course_title', 'course_organization', 'course_difficulty', 'course_rating']
    ]

    # University recommendations
    uni_scores = cosine_similarity(
        [avg_job_embedding], uni_course_embeddings
    )[0]
    uni_top = uni_scores.argsort()[::-1][:top_n]
    uni_recs = uni_courses.iloc[uni_top][
        ['course_name', 'University', 'Specialization']
    ]

    return {
        "Coursera Courses": coursera_recs.reset_index(drop=True),
        "University Programmes": uni_recs.reset_index(drop=True)
    }


In [22]:
jobs['career_group'].value_counts()


career_group
software_engineering                                                      215
sales executive                                                           138
account executive                                                          91
network_engineering                                                        55
accounts executive                                                         52
                                                                         ... 
health supplement chemist                                                   1
english teacher (january 2026)                                              1
general manager, air freight                                                1
manager - retail                                                            1
director of engineering - avani kota kinabalu hotel (pre opening team)      1
Name: count, Length: 11288, dtype: int64

In [23]:
recommend_courses_for_career("sales executive", top_n=5)


{'Coursera Courses':                                  course_title             course_organization  \
 0                   Excel Skills for Business            Macquarie University   
 1      Excel/VBA for Creative Problem Solving  University of Colorado Boulder   
 2  Excel Skills for Business: Intermediate II            Macquarie University   
 3   Excel Skills for Business: Intermediate I            Macquarie University   
 4         Excel Skills for Business: Advanced            Macquarie University   
 
   course_difficulty  course_rating  
 0          Beginner            4.9  
 1          Beginner            4.8  
 2      Intermediate            4.8  
 3      Intermediate            4.9  
 4      Intermediate            4.7  ,
 'University Programmes':                                          course_name University Specialization
 0    BA132 - Diploma in Office Technology Management       UITM            NaN
 1                              BA290 - Double Degree       UITM        