In [3]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import random
import sys
import json
sys.path.append('../src/recommendation')
import matchings

from SyntheticDataset import SyntheticDataset
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import DQN, A2C, PPO, SAC, TD3, DDPG

from stable_baselines3.common.callbacks import BaseCallback

from CourseRecEnv import CourseRecEnv, EvaluateCallback

In [2]:
dataset = SyntheticDataset('../data/synthetic/')

In [3]:
total_steps = 1000
eval_freq = 1000

SB_models = [(DQN, {"policy": "MlpPolicy"}),
    (A2C, {"policy": "MlpPolicy", "device":'cpu'} ),
    (PPO, {"policy": "MlpPolicy"})]
for SB_model, kwargs in SB_models:
    train_env = CourseRecEnv(dataset, k=5)
    eval_env = CourseRecEnv(dataset, k=5)    
    print(SB_model)
    
    eval_callback = EvaluateCallback(eval_env, eval_freq=eval_freq)

    model = SB_model(env = train_env, verbose=0, **kwargs)
    model.learn(total_timesteps=total_steps, callback=eval_callback)

<class 'stable_baselines3.dqn.dqn.DQN'>
1000 3.38
<class 'stable_baselines3.a2c.a2c.A2C'>
1000 3.9
<class 'stable_baselines3.ppo.ppo.PPO'>
1000 3.3
2000 3.3


In [21]:
import csv
import sys
import json
import pandas as pd

from collections import Counter

sys.path.append('../src/recommendation')

from Dataset import Dataset

In [5]:
dataset = Dataset("../config/dataset_V1.yaml")

In [34]:
cvs = json.load(open("../data/V1/kaggle_cvs.json"))
jobs = json.load(open("../data/V1/tech_jobs_english.json"))
courses = json.load(open("../data/V1/coco_courses_tech_english.json"))

In [14]:
mastery_levels = set()
for key,cv in cvs.items():
    for skill,level in cv:
        if type(level) != str:
            print(key,level)
            continue

        mastery_levels.add(level)

32138 ['participated in requirements gathering and design development meetings', 'reviewed code and debugged errors to improve performance', 'coordinated with systems partners to finalize designs and confirm requirements', 'consistently met deadlines and requirements for all production work orders', 'collaborated with other developers to identify and alleviate the number of bugs in the software', 'provided maintenance and development of bug fixes and patch sets for existing applications']


In [28]:
cv_mastery_levels = Counter()
for key,cv in cvs.items():
    for skill,level in cv:
        if type(level) != str:
            print(key,level)
            continue

        cv_mastery_levels[level] += 1

32138 ['participated in requirements gathering and design development meetings', 'reviewed code and debugged errors to improve performance', 'coordinated with systems partners to finalize designs and confirm requirements', 'consistently met deadlines and requirements for all production work orders', 'collaborated with other developers to identify and alleviate the number of bugs in the software', 'provided maintenance and development of bug fixes and patch sets for existing applications']


In [29]:
cv_mastery_levels.most_common(10)

[('unknown', 15876),
 ('intermediate', 325),
 ('expert', 246),
 ('beginner', 163),
 ('basics', 102),
 ('less than 1 year', 69),
 ('basic', 58),
 ('12 months experience', 17),
 ('10 months', 17),
 ('advanced', 10)]

In [31]:
job_mastery_levels = Counter()
for key,job in jobs.items():
    for skill,level in job:
        if type(level) != str:
            print(key,level)
            continue

        job_mastery_levels[level] += 1

In [32]:
job_mastery_levels.most_common(10)

[('unknown', 75127),
 ('expert', 41527),
 ('intermediate', 14897),
 ('beginner', 738),
 ('advanced', 81),
 ('fluent', 69),
 ('good', 42),
 ('intermÃ©diaire', 29),
 ('proficient', 17),
 ('advantageous', 15)]

In [40]:
to_aquire_course_mastery_levels = Counter()
required_course_mastery_levels = Counter()
for key, course in courses.items():
    if "to_acquire" in course:
        for skill, level in course["to_acquire"]:
            if type(level) != str:
                print(key, level)
                continue
            to_aquire_course_mastery_levels[level] += 1
    if "required" in course:
        for skill, level in course["required"]:
            if type(level) != str:
                print(key, level)
                continue
            required_course_mastery_levels[level] += 1

50563 ['organize slides into sections', 'edit movie clips', 'apply animation effects', 'learn how to broadcast slide shows']
653868 [{'view name': 'courseskills', 'definition': 'select courses.coursename, skills.skillname, skills.masterylevel from courses inner join skills on courses.courseid = skills.courseid'}]
678298 ['quick tables', 'tables formatting']
678298 ['insert clip art']
678298 ['insert shapes', 'insert shapes and edit']
737700 ['internet connection', 'up to date web browser']
790908 ['computer', 'internet connection', 'jdk (java development kit)', 'netbeans ide or any other ide (preferably netbeans)', 'mysql wamp server or xamp server', 'jar files (downloadable from internet)']


In [41]:
to_aquire_course_mastery_levels.most_common(10)

[('unknown', 219615),
 ('beginner', 30651),
 ('intermediate', 5513),
 ('expert', 2151),
 ('advanced', 380),
 ('basic', 45),
 ('beginner to advanced', 38),
 ('beginner to intermediate', 22),
 ('beginner to expert', 15),
 ('beginner or intermediate', 14)]

In [42]:
required_course_mastery_levels.most_common(10)

[('unknown', 17800),
 ('beginner', 8289),
 ('intermediate', 1571),
 ('expert', 175),
 ('basic', 79),
 ('advanced', 44),
 ('beginner to intermediate', 29),
 ('modest', 20),
 ('beginner or experienced', 16),
 ('beginner/intermediate', 15)]

In [46]:
mastery_levels = {'beginner': 1, 'intermediate': 2, 'expert':3, "unknown": 2}
json.dump(mastery_levels, open("../data/V1/mastery_levels.json", 'w'), indent=4)