In [64]:
from bs4 import BeautifulSoup
import pandas as pd
import networkx as nx
import re
import os
import matplotlib.pyplot as plt
from networkx.drawing.nx_pydot import write_dot
from tqdm.notebook import tqdm

In [65]:
RCDATA = "data/cos_course_data_raw.xml"

In [66]:
with open(RCDATA) as cdata:
    s = "".join(cdata.readlines())
    tree = BeautifulSoup(s)

In [67]:
rows = []
for dep in tree.findAll("courses"):
    for course in dep.findAll("course"):
        rows.append({
            "code": getattr(course.find("code"), "string", None),
            "name": getattr(course.find("name"), "string", None),
            "credits": getattr(course.find("credits"), "string", None),
            "credit-structure": getattr(course.find("credit-structure"), "string", None),
            "pre-requisites": getattr(course.find("pre-requisites"), "string", None),
            "overlap": getattr(course.find("overlap"), "string", None),
            "department": dep.get("department"),
            "description": getattr(course.find("description"), "string", None)
        })
        
df = pd.DataFrame(rows)

In [68]:
df

Unnamed: 0,code,name,credits,credit-structure,pre-requisites,overlap,department,description
0,APL100,Engineering Mechanics,4,3-1-0,,,Department of Applied Mechanics,"Kinematics, Statics, Equations of Motion, Rigi..."
1,APL101,Applied Mathematics in Engineering Applications,3,3-0-0,,,Department of Applied Mechanics,ordinary Differential Equation: Second order o...
2,APL102,Introduction to Materials Science and Engineering,4,3-0-2,,,Department of Applied Mechanics,Structure of Solids: atomic and inter-atomic b...
3,APL103,Experimental Methods,4,3-0-2,,,Department of Applied Mechanics,Experimental Analysis: Types of measurements a...
4,APL104,Solid Mechanics,4,3-1-0,APL100,"APL105, APL108",Department of Applied Mechanics,"Introduction, State of stress at a point, equa..."
...,...,...,...,...,...,...,...,...
2387,JOL794,Selected Topics-II,3,3-0-0,,,Department of Textile and Fibre Engineering,
2388,JOS795,Independent Study,3,0-3-0,,,Department of Textile and Fibre Engineering,
2389,JOV796,Selected Topics in Photonics,1,1-0-0,,,Department of Textile and Fibre Engineering,
2390,JOD801,Major Project Part-I,6,0-0-12,,,Department of Textile and Fibre Engineering,


## Odd/Even sem course offering check 

Go over each course and the odd course data lists, and check whether the course was offered in odd or even semesters. Do nothing if offered in both, but if not consistent with both, raise a conflict.

In [69]:
YEARS = ["2122", "2021", "1920"]
COURSES_OFFERED_PATH = "data/courses_offered_anon"

In [79]:
for year in YEARS:
    odd_sem = pd.read_csv(f"{COURSES_OFFERED_PATH}/1_{year}.csv")
    even_sem = pd.read_csv(f"{COURSES_OFFERED_PATH}/2_{year}.csv")
    df[f'sem_{year}'] = df['code'].isin(odd_sem['Course Code'].str[0:6]).astype(int) + 2*(df['code'].isin(even_sem['Course Code'].str[0:6]).astype(int))    

In [80]:
#df.loc[(df['sem_2122'] == df['sem_2021']) & (df['sem_2021'] == df['sem_1920']) & (df['sem_2122'] != 0) & (df['department'] == "Department of Physics")]
df.head()

Unnamed: 0,code,name,credits,credit-structure,pre-requisites,overlap,department,description,sem_2122,sem_2021,sem_1920
0,APL100,Engineering Mechanics,4,3-1-0,,,Department of Applied Mechanics,"Kinematics, Statics, Equations of Motion, Rigi...",3,3,3
1,APL101,Applied Mathematics in Engineering Applications,3,3-0-0,,,Department of Applied Mechanics,ordinary Differential Equation: Second order o...,1,0,0
2,APL102,Introduction to Materials Science and Engineering,4,3-0-2,,,Department of Applied Mechanics,Structure of Solids: atomic and inter-atomic b...,0,3,3
3,APL103,Experimental Methods,4,3-0-2,,,Department of Applied Mechanics,Experimental Analysis: Types of measurements a...,3,3,3
4,APL104,Solid Mechanics,4,3-1-0,APL100,"APL105, APL108",Department of Applied Mechanics,"Introduction, State of stress at a point, equa...",3,3,1


## Analysis and Graph Generation

Read in the Programme XML files, and generate dependency graphs for all programmes

In [81]:
PROGRAMMES_PATH = "data/course_plans"
DOTFILES_PATH = "data/dependencies"
PROGRAMMES = os.listdir(PROGRAMMES_PATH)

In [90]:
color_dict = ["gray", "lightblue", "lightgreen", "orange"]

for prog in tqdm(PROGRAMMES):
    if prog.startswith('.'):
        continue
    with open(f"{PROGRAMMES_PATH}/{prog}") as cdata:
        s = "".join(cdata.readlines())
        soup = BeautifulSoup(s)
    clist = []
    for course in soup.courses.dc.find_all("course"):
        clist.append(course.string)
    for course in soup.courses.pl.find_all("course"):
        clist.append(course.string)
        
    if soup.program.attrs['type'] == "5y":
        for course in soup.courses.pc.find_all("course"):
            clist.append(course.string)

    G = nx.DiGraph()
    G.add_nodes_from(clist, style='filled')
    for course in clist:
        course_data = df.loc[df['code'] == course]
        if not course_data.empty: # MTL782, 783 don't have course data/description/prereqs in CoS, wow bhaiya
            prereqs = course_data.iloc[0]['pre-requisites']
            prereqlist = [] if prereqs is None else re.findall(r'[A-Z]{3}[0-9]{3}', prereqs)
            #print(f"{course}: {prereqlist}")
            for pr in prereqlist:
                if pr not in G:
                    G.add_node(pr)
                G.add_edge(pr, course)

    for node in G:
        course_data = df.loc[(df['code'] == node)]
        color = course_data.iloc[0]['sem_2122'] if not course_data.empty else 0
        G.nodes[node]['fillcolor'] = color_dict[color]

    nx.nx_pydot.to_pydot(G).write_png(f'{DOTFILES_PATH}/{prog.split(".")[0]}.png')

  0%|          | 0/17 [00:00<?, ?it/s]

5y
5y
5y


In [83]:
df.loc[df['code'] == "COL106"]

Unnamed: 0,code,name,credits,credit-structure,pre-requisites,overlap,department,description,sem_2122,sem_2021,sem_1920
553,COL106,Data Structures and Algorithms,5,3-0-4,COL100,,Department of Computer Science and Engineering,Introduction to object-oriented programming th...,3,3,3


In [209]:
df.loc[df['code'] == "MTL712"]

Unnamed: 0,code,name,credits,credit-structure,pre-requisites,overlap,department,description,sem
1551,MTL712,Computational Methods for Differential Equations,4,3-0-2,MTL107,,Department of Mathematics,Numerical methods for solving IVPs for oDEs: D...,1
