# Importing the required modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import spacy

# Reading the dataset

In [51]:
curr = pd.read_csv("../data/curriculum.csv")

# Glimpse into the dataset

In [38]:
curr.head()

Unnamed: 0,Courses,Topic,Duration,Effort,Total Hours Lower Bound,Start Date,End Date Estimate Lower Bound,Total Hours Upper Bound,Start Date.1,End Date Estimate Upper Bound,Actual End Date,Prerequisites
0,Python for Everybody,Intro CS,10 weeks,10 hours/week,100,28-07-2022,01-09-2022,100,28-07-2022,01-09-2022,,-
1,Introduction to Computer Science and Programmi...,Intro CS,9 weeks,15 hours/week,135,01-09-2022,18-10-2022,135,01-09-2022,18-10-2022,,high school algebra
2,How to Code - Simple Data,Core Programming,7 weeks,8-10 hours/week,56,18-10-2022,06-11-2022,70,18-10-2022,11-11-2022,,-
3,How to Code - Complex Data,Core Programming,6 weeks,8-10 hours/week,48,06-11-2022,23-11-2022,60,11-11-2022,02-12-2022,,How to Code - Simple Data
4,"Programming Languages, Part A",Core Programming,5 weeks,4-8 hours/week,20,23-11-2022,30-11-2022,40,02-12-2022,16-12-2022,,How to Code (Hear instructor)


In [39]:
curr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43 entries, 0 to 42
Data columns (total 12 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Courses                        43 non-null     object 
 1   Topic                          43 non-null     object 
 2   Duration                       42 non-null     object 
 3   Effort                         42 non-null     object 
 4   Total Hours Lower Bound        43 non-null     int64  
 5   Start Date                     43 non-null     object 
 6   End Date Estimate Lower Bound  43 non-null     object 
 7   Total Hours Upper Bound        43 non-null     int64  
 8   Start Date.1                   43 non-null     object 
 9   End Date Estimate Upper Bound  43 non-null     object 
 10  Actual End Date                0 non-null      float64
 11  Prerequisites                  42 non-null     object 
dtypes: float64(1), int64(2), object(9)
memory usage: 4.2

# Alloting course codes to each course

In [5]:
def get_course_code(row):
    topic = row[1].title()
    topic = "".join(re.findall("[A-Z]",topic))
    courses_in_topic = list(curr[curr["Topic"] == row[1]]["Courses"])
    course_code = str(courses_in_topic.index(row[0]) + 1).rjust(3,"0")
    return topic + course_code

In [6]:
curr["Course Code"] = curr.apply(get_course_code,axis=1)

# Handling Course Pre-requisites

## Finding most relevant courses for prequisites using Cosine Similiarity

In [7]:
df = pd.DataFrame(columns=["Prerequisite"] + list(curr["Courses"]) + list(curr["Topic"]))

In [8]:
df

Unnamed: 0,Prerequisite,Python for Everybody,Introduction to Computer Science and Programming using Python,How to Code - Simple Data,How to Code - Complex Data,"Programming Languages, Part A","Programming Languages, Part B","Programming Languages, Part C",Object-Oriented Design,Design Patterns,...,Core Ethics,Core Ethics.1,Core Ethics.2,Advanced Programming,Advanced Programming.1,Advanced Programming.2,Advanced Programming.3,Advanced Programming.4,Advanced Programming.5,Final Project


In [9]:
nlp = spacy.load("en_core_web_sm")
for i in set(curr["Prerequisites"]): 
    if str(type(i)) != "<class 'float'>":
        for k in i.split(";"):
            prereq = nlp(i.strip())
            row = {"Prerequisite":k}
            if i in df["Prerequisite"]:
                continue
            for j in list(curr["Courses"]) + list(curr["Topic"]):
                course = nlp(j.strip())
                row[j] = prereq.similarity(course)
            df = df.append(row,ignore_index=True)

  row[j] = prereq.similarity(course)


In [10]:
#df = df.set_index("Prerequisite")

In [11]:
df

Unnamed: 0,Prerequisite,Python for Everybody,Introduction to Computer Science and Programming using Python,How to Code - Simple Data,How to Code - Complex Data,"Programming Languages, Part A","Programming Languages, Part B","Programming Languages, Part C",Object-Oriented Design,Design Patterns,...,Core Ethics,Core Ethics.1,Core Ethics.2,Advanced Programming,Advanced Programming.1,Advanced Programming.2,Advanced Programming.3,Advanced Programming.4,Advanced Programming.5,Final Project
0,Java for Everybody,0.887163,0.527122,0.267216,0.272803,0.163338,0.119332,0.168569,0.135117,0.236908,...,0.253042,0.253042,0.253042,0.284305,0.284305,0.284305,0.284305,0.284305,0.284305,0.321096
1,C++ for Everybody,0.621831,0.449858,0.333551,0.35269,0.488279,0.438433,0.499209,0.144365,0.438784,...,0.431127,0.431127,0.431127,0.345829,0.345829,0.345829,0.345829,0.345829,0.345829,0.462576
2,Java for Everybody,0.621831,0.449858,0.333551,0.35269,0.488279,0.438433,0.499209,0.144365,0.438784,...,0.431127,0.431127,0.431127,0.345829,0.345829,0.345829,0.345829,0.345829,0.345829,0.462576
3,Linear Algebra,0.621831,0.449858,0.333551,0.35269,0.488279,0.438433,0.499209,0.144365,0.438784,...,0.431127,0.431127,0.431127,0.345829,0.345829,0.345829,0.345829,0.345829,0.345829,0.462576
4,Python for Everybody,1.0,0.614067,0.284196,0.278327,0.222033,0.178415,0.236059,0.180903,0.287245,...,0.295037,0.295037,0.295037,0.320114,0.320114,0.320114,0.320114,0.320114,0.320114,0.373142
5,How to Code: Simple Data,0.347886,0.349117,0.747667,0.712122,0.480058,0.480928,0.519834,0.211499,0.517171,...,0.529959,0.529959,0.529959,0.410602,0.410602,0.410602,0.410602,0.410602,0.410602,0.510676
6,-,-0.016595,-0.165618,0.104927,0.105341,0.076003,0.024623,0.073966,0.159847,-0.026023,...,0.004199,0.004199,0.004199,-0.102842,-0.102842,-0.102842,-0.102842,-0.102842,-0.102842,-0.053804
7,From Nand to Tetris Part I,0.658241,0.709123,0.451229,0.459345,0.448134,0.400455,0.40159,0.132073,0.395237,...,0.477251,0.477251,0.477251,0.407808,0.407808,0.407808,0.407808,0.407808,0.407808,0.508955
8,high school algebra,0.369998,0.367067,0.441401,0.427629,0.333702,0.357487,0.348972,0.357612,0.490888,...,0.468916,0.468916,0.468916,0.584847,0.584847,0.584847,0.584847,0.584847,0.584847,0.549081
9,Mathematics for Computer Science,0.664016,0.763913,0.518698,0.558219,0.546759,0.544172,0.546839,0.353599,0.553546,...,0.588003,0.588003,0.588003,0.530212,0.530212,0.530212,0.530212,0.530212,0.530212,0.555373


In [12]:
def get_most_relevant_course(row):
    best_similarity = max(row[1:])
    cols = list(df.columns)
    i = list(row[1:]).index(best_similarity)
    #print(row[0],"\t",cols[i + 1],"\t",best_similarity)
    #print(row.index,"\n")
    if best_similarity > 0.74:
        return {"Pre":row[0],"Rel":cols[i + 1],"simi":best_similarity,"incorrect":None}
    else:
        return {"Pre":row[0],"Rel":None,"simi":best_similarity,"incorrect":cols[i+1]}

In [13]:
d = pd.DataFrame(columns=["Pre","Rel","simi","incorrect"])

In [14]:
for i in df.apply(get_most_relevant_course,axis=1):
    d = d.append(i, ignore_index=True)

In [15]:
d

Unnamed: 0,Pre,Rel,simi,incorrect
0,Java for Everybody,Python for Everybody,0.887163,
1,C++ for Everybody,,0.626999,Software Engineering: Introduction
2,Java for Everybody,,0.626999,Software Engineering: Introduction
3,Linear Algebra,,0.626999,Software Engineering: Introduction
4,Python for Everybody,Python for Everybody,1.0,
5,How to Code: Simple Data,How to Code - Simple Data,0.747667,
6,-,,0.228649,Computer Networking: a Top-Down Approach
7,From Nand to Tetris Part I,Build a Modern Computer from First Principles:...,0.829135,
8,high school algebra,,0.601529,Software Architecture
9,Mathematics for Computer Science,Mathematics for Computer Science,1.0,


## Adding Prerequisites to the catalog

In [16]:
def add_prereq(row):
    if row[1] == None and row[0] != "-":
        return {"Courses" : row[0].title(),"Topic":"Prerequisites"}
    return None

In [17]:
for i in d.apply(add_prereq,axis=1):
    if i != None:
        curr = curr.append(i,ignore_index=True)

In [18]:
curr = curr[["Courses","Topic","Duration","Effort"]]

In [19]:
curr.head()

Unnamed: 0,Courses,Topic,Duration,Effort
0,Python for Everybody,Intro CS,10 weeks,10 hours/week
1,Introduction to Computer Science and Programmi...,Intro CS,9 weeks,15 hours/week
2,How to Code - Simple Data,Core Programming,7 weeks,8-10 hours/week
3,How to Code - Complex Data,Core Programming,6 weeks,8-10 hours/week
4,"Programming Languages, Part A",Core Programming,5 weeks,4-8 hours/week


In [20]:
curr.to_csv("../data/curr.csv")

In [40]:
curr = pd.read_csv("../data/curr.csv")

In [41]:
curr = curr.drop(["Unnamed: 0"],axis=1)

In [42]:
curr

Unnamed: 0,Courses,Topic,Duration,Effort,Course Code
0,Python for Everybody,Intro CS,10 weeks,10 hours/week,IC001
1,Introduction to Computer Science and Programmi...,Intro CS,9 weeks,15 hours/week,IC002
2,How to Code - Simple Data,Core Programming,7 weeks,8-10 hours/week,CP001
3,How to Code - Complex Data,Core Programming,6 weeks,8-10 hours/week,CP002
4,"Programming Languages, Part A",Core Programming,5 weeks,4-8 hours/week,CP003
5,"Programming Languages, Part B",Core Programming,3 weeks,4-8 hours/week,CP004
6,"Programming Languages, Part C",Core Programming,3 weeks,4-8 hours/week,CP005
7,Object-Oriented Design,Core Programming,4 weeks,4 hours/week,CP006
8,Design Patterns,Core Programming,4 weeks,4 hours/week,CP007
9,Software Architecture,Core Programming,4 weeks,2-5 hours/week,CP008


In [43]:
def prereq2coursecodes(course : str):
    s = list(c[c["Courses"] == course]["Prerequisites"])
    if len(s) > 0 or s != ["-"]:
        return str(s[0]).split(";")
    return None

In [44]:
curr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53 entries, 0 to 52
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Courses      53 non-null     object
 1   Topic        53 non-null     object
 2   Duration     52 non-null     object
 3   Effort       42 non-null     object
 4   Course Code  53 non-null     object
dtypes: object(5)
memory usage: 2.2+ KB


In [45]:
c = pd.read_csv("../data/curriculum.csv")

In [46]:
c

Unnamed: 0,Courses,Topic,Duration,Effort,Total Hours Lower Bound,Start Date,End Date Estimate Lower Bound,Total Hours Upper Bound,Start Date.1,End Date Estimate Upper Bound,Actual End Date,Prerequisites
0,Python for Everybody,Intro CS,10 weeks,10 hours/week,100,28-07-2022,01-09-2022,100,28-07-2022,01-09-2022,,-
1,Introduction to Computer Science and Programmi...,Intro CS,9 weeks,15 hours/week,135,01-09-2022,18-10-2022,135,01-09-2022,18-10-2022,,high school algebra
2,How to Code - Simple Data,Core Programming,7 weeks,8-10 hours/week,56,18-10-2022,06-11-2022,70,18-10-2022,11-11-2022,,-
3,How to Code - Complex Data,Core Programming,6 weeks,8-10 hours/week,48,06-11-2022,23-11-2022,60,11-11-2022,02-12-2022,,How to Code - Simple Data
4,"Programming Languages, Part A",Core Programming,5 weeks,4-8 hours/week,20,23-11-2022,30-11-2022,40,02-12-2022,16-12-2022,,How to Code (Hear instructor)
5,"Programming Languages, Part B",Core Programming,3 weeks,4-8 hours/week,12,30-11-2022,04-12-2022,24,16-12-2022,25-12-2022,,"Programming Languages, Part A"
6,"Programming Languages, Part C",Core Programming,3 weeks,4-8 hours/week,12,04-12-2022,09-12-2022,24,25-12-2022,02-01-2023,,"Programming Languages, Part B"
7,Object-Oriented Design,Core Programming,4 weeks,4 hours/week,16,09-12-2022,14-12-2022,16,02-01-2023,08-01-2023,,Java for Everybody
8,Design Patterns,Core Programming,4 weeks,4 hours/week,16,14-12-2022,20-12-2022,16,08-01-2023,13-01-2023,,Object-Oriented Design
9,Software Architecture,Core Programming,4 weeks,2-5 hours/week,8,20-12-2022,23-12-2022,20,13-01-2023,20-01-2023,,Design Patterns


In [52]:
curr

Unnamed: 0,Courses,Topic,Duration,Effort,Total Hours Lower Bound,Start Date,End Date Estimate Lower Bound,Total Hours Upper Bound,Start Date.1,End Date Estimate Upper Bound,Actual End Date,Prerequisites
0,Python for Everybody,Intro CS,10 weeks,10 hours/week,100,28-07-2022,01-09-2022,100,28-07-2022,01-09-2022,,-
1,Introduction to Computer Science and Programmi...,Intro CS,9 weeks,15 hours/week,135,01-09-2022,18-10-2022,135,01-09-2022,18-10-2022,,high school algebra
2,How to Code - Simple Data,Core Programming,7 weeks,8-10 hours/week,56,18-10-2022,06-11-2022,70,18-10-2022,11-11-2022,,-
3,How to Code - Complex Data,Core Programming,6 weeks,8-10 hours/week,48,06-11-2022,23-11-2022,60,11-11-2022,02-12-2022,,How to Code - Simple Data
4,"Programming Languages, Part A",Core Programming,5 weeks,4-8 hours/week,20,23-11-2022,30-11-2022,40,02-12-2022,16-12-2022,,How to Code (Hear instructor)
5,"Programming Languages, Part B",Core Programming,3 weeks,4-8 hours/week,12,30-11-2022,04-12-2022,24,16-12-2022,25-12-2022,,"Programming Languages, Part A"
6,"Programming Languages, Part C",Core Programming,3 weeks,4-8 hours/week,12,04-12-2022,09-12-2022,24,25-12-2022,02-01-2023,,"Programming Languages, Part B"
7,Object-Oriented Design,Core Programming,4 weeks,4 hours/week,16,09-12-2022,14-12-2022,16,02-01-2023,08-01-2023,,Java for Everybody
8,Design Patterns,Core Programming,4 weeks,4 hours/week,16,14-12-2022,20-12-2022,16,08-01-2023,13-01-2023,,Object-Oriented Design
9,Software Architecture,Core Programming,4 weeks,2-5 hours/week,8,20-12-2022,23-12-2022,20,13-01-2023,20-01-2023,,Design Patterns


In [31]:
curr.to_csv("../data/curr.csv")

In [53]:
course_codes = pd.read_csv("../data/curr.csv")

In [54]:
course_codes

Unnamed: 0.1,Unnamed: 0,Courses,Topic,Duration,Effort,Course Code
0,0,Python for Everybody,Intro CS,10 weeks,10 hours/week,IC001
1,1,Introduction to Computer Science and Programmi...,Intro CS,9 weeks,15 hours/week,IC002
2,2,How to Code - Simple Data,Core Programming,7 weeks,8-10 hours/week,CP001
3,3,How to Code - Complex Data,Core Programming,6 weeks,8-10 hours/week,CP002
4,4,"Programming Languages, Part A",Core Programming,5 weeks,4-8 hours/week,CP003
5,5,"Programming Languages, Part B",Core Programming,3 weeks,4-8 hours/week,CP004
6,6,"Programming Languages, Part C",Core Programming,3 weeks,4-8 hours/week,CP005
7,7,Object-Oriented Design,Core Programming,4 weeks,4 hours/week,CP006
8,8,Design Patterns,Core Programming,4 weeks,4 hours/week,CP007
9,9,Software Architecture,Core Programming,4 weeks,2-5 hours/week,CP008


In [55]:
curr

Unnamed: 0,Courses,Topic,Duration,Effort,Total Hours Lower Bound,Start Date,End Date Estimate Lower Bound,Total Hours Upper Bound,Start Date.1,End Date Estimate Upper Bound,Actual End Date,Prerequisites
0,Python for Everybody,Intro CS,10 weeks,10 hours/week,100,28-07-2022,01-09-2022,100,28-07-2022,01-09-2022,,-
1,Introduction to Computer Science and Programmi...,Intro CS,9 weeks,15 hours/week,135,01-09-2022,18-10-2022,135,01-09-2022,18-10-2022,,high school algebra
2,How to Code - Simple Data,Core Programming,7 weeks,8-10 hours/week,56,18-10-2022,06-11-2022,70,18-10-2022,11-11-2022,,-
3,How to Code - Complex Data,Core Programming,6 weeks,8-10 hours/week,48,06-11-2022,23-11-2022,60,11-11-2022,02-12-2022,,How to Code - Simple Data
4,"Programming Languages, Part A",Core Programming,5 weeks,4-8 hours/week,20,23-11-2022,30-11-2022,40,02-12-2022,16-12-2022,,How to Code (Hear instructor)
5,"Programming Languages, Part B",Core Programming,3 weeks,4-8 hours/week,12,30-11-2022,04-12-2022,24,16-12-2022,25-12-2022,,"Programming Languages, Part A"
6,"Programming Languages, Part C",Core Programming,3 weeks,4-8 hours/week,12,04-12-2022,09-12-2022,24,25-12-2022,02-01-2023,,"Programming Languages, Part B"
7,Object-Oriented Design,Core Programming,4 weeks,4 hours/week,16,09-12-2022,14-12-2022,16,02-01-2023,08-01-2023,,Java for Everybody
8,Design Patterns,Core Programming,4 weeks,4 hours/week,16,14-12-2022,20-12-2022,16,08-01-2023,13-01-2023,,Object-Oriented Design
9,Software Architecture,Core Programming,4 weeks,2-5 hours/week,8,20-12-2022,23-12-2022,20,13-01-2023,20-01-2023,,Design Patterns


In [56]:
curr = curr[["Courses","Prerequisites"]]

In [57]:
def capitalize(s : str):
    return s.upper().strip()

In [58]:
course_codes["Courses"] = course_codes["Courses"].map(capitalize)

In [59]:
def prereq2coursecode(preqs : str):
    if str(type(preqs)) != str(type(0.9)):
        preqs = preqs.split(";")
        a = []
        for i in preqs:
            a.append(course_codes[course_codes["Courses"] == i.upper().strip()]["Course Code"])
        a = np.array(a).flatten()
        return ";".join(a)

In [60]:
curr["Prerequisite Codes"] = curr["Prerequisites"].map(prereq2coursecode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  curr["Prerequisite Codes"] = curr["Prerequisites"].map(prereq2coursecode)


In [61]:
curr

Unnamed: 0,Courses,Prerequisites,Prerequisite Codes
0,Python for Everybody,-,
1,Introduction to Computer Science and Programmi...,high school algebra,P004
2,How to Code - Simple Data,-,
3,How to Code - Complex Data,How to Code - Simple Data,CP001
4,"Programming Languages, Part A",How to Code (Hear instructor),P007
5,"Programming Languages, Part B","Programming Languages, Part A",CP003
6,"Programming Languages, Part C","Programming Languages, Part B",CP004
7,Object-Oriented Design,Java for Everybody,P002
8,Design Patterns,Object-Oriented Design,CP006
9,Software Architecture,Design Patterns,CP007


In [32]:
curr.to_csv("../data/curriculum_with_prerequisites.csv")

In [33]:
curr = pd.read_csv("../data/curriculum_with_prerequisites.csv")

In [34]:
curr

Unnamed: 0.1,Unnamed: 0,Courses,Prerequisites,Prerequisite Codes
0,0,Python for Everybody,-,
1,1,Introduction to Computer Science and Programmi...,high school algebra,P004
2,2,How to Code - Simple Data,-,
3,3,How to Code - Complex Data,How to Code - Simple Data,CP001
4,4,"Programming Languages, Part A",How to Code (Hear instructor),P007
5,5,"Programming Languages, Part B","Programming Languages, Part A",CP003
6,6,"Programming Languages, Part C","Programming Languages, Part B",CP004
7,7,Object-Oriented Design,Java for Everybody,P002
8,8,Design Patterns,Object-Oriented Design,CP006
9,9,Software Architecture,Design Patterns,CP007


In [77]:
curr.to_csv("../data/curriculum_with_prerequisites.csv")

In [100]:
y.to_csv("../data/edge_list.csv")

In [7]:
df = pd.read_csv("../data/curr.csv")

df

Unnamed: 0.1,Unnamed: 0,Courses,Topic,Duration,Effort,Course Code
0,0,Python for Everybody,Intro CS,10 weeks,10 hours/week,IC001
1,1,Introduction to Computer Science and Programmi...,Intro CS,9 weeks,15 hours/week,IC002
2,2,How to Code - Simple Data,Core Programming,7 weeks,8-10 hours/week,CP001
3,3,How to Code - Complex Data,Core Programming,6 weeks,8-10 hours/week,CP002
4,4,"Programming Languages, Part A",Core Programming,5 weeks,4-8 hours/week,CP003
5,5,"Programming Languages, Part B",Core Programming,3 weeks,4-8 hours/week,CP004
6,6,"Programming Languages, Part C",Core Programming,3 weeks,4-8 hours/week,CP005
7,7,Object-Oriented Design,Core Programming,4 weeks,4 hours/week,CP006
8,8,Design Patterns,Core Programming,4 weeks,4 hours/week,CP007
9,9,Software Architecture,Core Programming,4 weeks,2-5 hours/week,CP008


In [8]:
x = df.to_dict()

In [9]:
x

{'Unnamed: 0': {0: 0,
  1: 1,
  2: 2,
  3: 3,
  4: 4,
  5: 5,
  6: 6,
  7: 7,
  8: 8,
  9: 9,
  10: 10,
  11: 11,
  12: 12,
  13: 13,
  14: 14,
  15: 15,
  16: 16,
  17: 17,
  18: 18,
  19: 19,
  20: 20,
  21: 21,
  22: 22,
  23: 23,
  24: 24,
  25: 25,
  26: 26,
  27: 27,
  28: 28,
  29: 29,
  30: 30,
  31: 31,
  32: 32,
  33: 33,
  34: 34,
  35: 35,
  36: 36,
  37: 37,
  38: 38,
  39: 39,
  40: 40,
  41: 41,
  42: 42,
  43: 43,
  44: 44,
  45: 45,
  46: 46,
  47: 47,
  48: 48,
  49: 49,
  50: 50,
  51: 51,
  52: 52},
 'Courses': {0: 'Python for Everybody',
  1: 'Introduction to Computer Science and Programming using Python',
  2: 'How to Code - Simple Data',
  3: 'How to Code - Complex Data',
  4: 'Programming Languages, Part A',
  5: 'Programming Languages, Part B',
  6: 'Programming Languages, Part C',
  7: 'Object-Oriented Design',
  8: 'Design Patterns',
  9: 'Software Architecture',
  10: 'Calculus 1A: Differentiation',
  11: 'Calculus 1B: Integration',
  12: 'Calculus 1C: Coord

In [10]:
data = dict()

In [11]:
df2 = pd.read_csv("../data/curriculum_with_prerequisites.csv")
y = df2.to_dict()

In [12]:
df2

Unnamed: 0.1,Unnamed: 0,Courses,Topic,Duration,Effort,Total Hours Lower Bound,Start Date,End Date Estimate Lower Bound,Total Hours Upper Bound,Start Date.1,End Date Estimate Upper Bound,Actual End Date,Prerequisites,Prerequisite Codes,Course Code
0,0,Python for Everybody,Intro CS,10 weeks,10 hours/week,100,28-07-2022,01-09-2022,100,28-07-2022,01-09-2022,,-,,IC001
1,1,Introduction to Computer Science and Programmi...,Intro CS,9 weeks,15 hours/week,135,01-09-2022,18-10-2022,135,01-09-2022,18-10-2022,,high school algebra,P004,IC002
2,2,How to Code - Simple Data,Core Programming,7 weeks,8-10 hours/week,56,18-10-2022,06-11-2022,70,18-10-2022,11-11-2022,,-,,CP001
3,3,How to Code - Complex Data,Core Programming,6 weeks,8-10 hours/week,48,06-11-2022,23-11-2022,60,11-11-2022,02-12-2022,,How to Code - Simple Data,CP001,CP002
4,4,"Programming Languages, Part A",Core Programming,5 weeks,4-8 hours/week,20,23-11-2022,30-11-2022,40,02-12-2022,16-12-2022,,How to Code (Hear instructor),P007,CP003
5,5,"Programming Languages, Part B",Core Programming,3 weeks,4-8 hours/week,12,30-11-2022,04-12-2022,24,16-12-2022,25-12-2022,,"Programming Languages, Part A",CP003,CP004
6,6,"Programming Languages, Part C",Core Programming,3 weeks,4-8 hours/week,12,04-12-2022,09-12-2022,24,25-12-2022,02-01-2023,,"Programming Languages, Part B",CP004,CP005
7,7,Object-Oriented Design,Core Programming,4 weeks,4 hours/week,16,09-12-2022,14-12-2022,16,02-01-2023,08-01-2023,,Java for Everybody,P002,CP006
8,8,Design Patterns,Core Programming,4 weeks,4 hours/week,16,14-12-2022,20-12-2022,16,08-01-2023,13-01-2023,,Object-Oriented Design,CP006,CP007
9,9,Software Architecture,Core Programming,4 weeks,2-5 hours/week,8,20-12-2022,23-12-2022,20,13-01-2023,20-01-2023,,Design Patterns,CP007,CP008


In [14]:
for i in x['Course Code']:
      key = x['Course Code'][i]
      data[key] = dict()
      data[key]["Course Name"] = x["Courses"][i]
      d = x["Duration"][i]
      if type(d) != type(3.45):
         if "-" in d:
            data[key]["Duration"] = int(np.mean(list(map(int, d.replace("weeks","").split("-")))))
         else:
            data[key]["Duration"] = int(d.replace("weeks","").strip())
      else:
         data[key]["Duration"] = 30
      data[key]["Prerequisites"] = []
#print(data)

for i in y['Course Code']:
      p = y["Prerequisite Codes"][i]
      #print(y['Course Code'][i], p)
      if type(p) != type(3.45):
            data[y["Course Code"][i]]["Prerequisites"] = p.split(";")

In [37]:
import json
json.dump(data, open("../data/course_data.json", "w"), indent=4)

In [6]:
for i in x['Course Code']:
    if type(x["Duration"][i]) != type(3.4):
        print(x["Course Code"][i], x["Duration"][i].replace("weeks",""))
    else:
        print(x["Course Code"][i], 30)

IC001 10 
IC002 9 
CP001 7 
CP002 6 
CP003 5 
CP004 3 
CP005 3 
CP006 4 
CP007 4 
CP008 4 
CM001 13 
CM002 13 
CM003 6 
CM004 13 
CST001 2 
CRS001 6 
CRS002 6 
CRS003 10-12 
CRS004 8 
CT001 4 
CT002 4 
CT003 4 
CT004 4 
CS001 8 
CS002 4 
CS003 4 
CS004 4 
CA001 2 
CA002 2 
CA003 2 
CA004 11 
CA005 6 
CA006 6 
CE001 9 
CE002 4 
CE003 3 
AP001 4 
AP002 9 
AP003 14 
AP004 12 
AP005 8 
AP006 4 
FP001 30
P001 13 
P002 12 
P003 11 
P004 2 
P005 8 
P006 6 
P007 4 
P008 2 
P009 18 
P010 11 
