In [16]:
from bs4 import BeautifulSoup
import pandas as pd
import networkx as nx
import re
import os
import matplotlib.pyplot as plt
from networkx.drawing.nx_pydot import write_dot
from tqdm.notebook import tqdm

In [17]:
RCDATA = "data/cos_course_data_raw.xml"

In [18]:
with open(RCDATA) as cdata:
    s = "".join(cdata.readlines())
    tree = BeautifulSoup(s)

In [19]:
rows = []
for dep in tree.findAll("courses"):
    for course in dep.findAll("course"):
        rows.append({
            "code": getattr(course.find("code"), "string", None),
            "name": getattr(course.find("name"), "string", None),
            "credits": getattr(course.find("credits"), "string", None),
            "credit-structure": getattr(course.find("credit-structure"), "string", None),
            "pre-requisites": getattr(course.find("pre-requisites"), "string", None),
            "overlap": getattr(course.find("overlap"), "string", None),
            "department": dep.get("department"),
            "description": getattr(course.find("description"), "string", None)
        })
        
df = pd.DataFrame(rows)

In [20]:
df.head()

Unnamed: 0,code,name,credits,credit-structure,pre-requisites,overlap,department,description
0,APL100,Engineering Mechanics,4,3-1-0,,,Department of Applied Mechanics,"Kinematics, Statics, Equations of Motion, Rigi..."
1,APL101,Applied Mathematics in Engineering Applications,3,3-0-0,,,Department of Applied Mechanics,ordinary Differential Equation: Second order o...
2,APL102,Introduction to Materials Science and Engineering,4,3-0-2,,,Department of Applied Mechanics,Structure of Solids: atomic and inter-atomic b...
3,APL103,Experimental Methods,4,3-0-2,,,Department of Applied Mechanics,Experimental Analysis: Types of measurements a...
4,APL104,Solid Mechanics,4,3-1-0,APL100,"APL105, APL108",Department of Applied Mechanics,"Introduction, State of stress at a point, equa..."


## Odd/Even sem course offering check 

Go over each course and the odd course data lists, and check whether the course was offered in odd or even semesters. Do nothing if offered in both, but if not consistent with both, raise a conflict.

In [21]:
YEARS = ["2122", "2021", "1920"]
COURSES_OFFERED_PATH = "data/courses_offered_anon"
odd_sem = pd.read_csv(f"{COURSES_OFFERED_PATH}/1_2122.csv")
odd_sem.head()

Unnamed: 0,S.No,Course Name,Slot Name,Units,Type,Instructor,Instructor Email,Lecture Time,Tutorial Time,Practical Time,Vacancy,Current Strength,Course Code
0,1,MAJOR PROJECT,P,0.0-0.0-80.0,,EMIR BHATIA,divit70@am.iitd.ac.in,,,,60,0,AMD895
1,2,DESIGN PROJECT,X,0.0-0.0-20.0,,RASHA TALWAR,suhana55@am.iitd.ac.in,,,,60,25,AMD899
2,3,ENGG. MATHEMATICS & MECHANICS,F,3.0-0.0-0.0,,TAIMUR SAMI,shankarmiraya@am.iitd.ac.in,TThF 11:00-12:00,,,90,25,AML701
3,4,APPLIED FLUID DYNAMICS,J,3.0-1.0-0.0,,JIVIKA BERA,randhawaanay@am.iitd.ac.in,MTF 12:00-13:00,,,90,32,AML713
4,5,SOLID MECHANICS,E,3.0-0.0-0.0,,ISHITA GHOSE,cghosh@am.iitd.ac.in,TWF 10:00-11:00,,,60,22,AML732


In [22]:
for year in YEARS:
    for i in [1,2]:
        sem_yr_data = pd.read_csv(f"{COURSES_OFFERED_PATH}/{i}_{year}.csv")
        sem_yr_data = sem_yr_data.rename(columns={'Course Code': 'code'})
        df = df.merge(sem_yr_data[['code','Slot Name', 'Vacancy', 'Current Strength']],on='code',how='left')
        df = df.rename(columns={'Slot Name': f"{i}_{year}_slot", 'Vacancy': f"{i}_{year}_vacancy", 'Current Strength': f"{i}_{year}_strength"})

df.head()

Unnamed: 0,code,name,credits,credit-structure,pre-requisites,overlap,department,description,1_2122_slot,1_2122_vacancy,...,1_2021_strength,2_2021_slot,2_2021_vacancy,2_2021_strength,1_1920_slot,1_1920_vacancy,1_1920_strength,2_1920_slot,2_1920_vacancy,2_1920_strength
0,APL100,Engineering Mechanics,4,3-1-0,,,Department of Applied Mechanics,"Kinematics, Statics, Equations of Motion, Rigi...",A,500.0,...,24.0,A,600.0,0.0,A,500.0,589.0,A,250.0,568.0
1,APL100,Engineering Mechanics,4,3-1-0,,,Department of Applied Mechanics,"Kinematics, Statics, Equations of Motion, Rigi...",A,500.0,...,24.0,A,600.0,0.0,A,500.0,589.0,SU1,0.0,0.0
2,APL100,Engineering Mechanics,4,3-1-0,,,Department of Applied Mechanics,"Kinematics, Statics, Equations of Motion, Rigi...",A,500.0,...,24.0,A,600.0,0.0,A,500.0,589.0,X,0.0,0.0
3,APL101,Applied Mathematics in Engineering Applications,3,3-0-0,,,Department of Applied Mechanics,ordinary Differential Equation: Second order o...,B,200.0,...,,,,,,,,,,
4,APL102,Introduction to Materials Science and Engineering,4,3-0-2,,,Department of Applied Mechanics,Structure of Solids: atomic and inter-atomic b...,,,...,371.0,E,225.0,149.0,E,350.0,307.0,E,300.0,134.0


## Analysis and Graph Generation

Read in the Programme XML files, and generate dependency graphs for all programmes

In [81]:
PROGRAMMES_PATH = "data/course_plans"
DOTFILES_PATH = "data/dependencies"
PROGRAMMES = os.listdir(PROGRAMMES_PATH)

In [91]:
color_dict = ["gray", "lightblue", "lightgreen", "orange"]

for prog in tqdm(PROGRAMMES):
    if prog.startswith('.'):
        continue
    with open(f"{PROGRAMMES_PATH}/{prog}") as cdata:
        s = "".join(cdata.readlines())
        soup = BeautifulSoup(s)
    clist = []
    for course in soup.courses.dc.find_all("course"):
        clist.append(course.string)
    for course in soup.courses.pl.find_all("course"):
        clist.append(course.string)
        
    if soup.program.attrs['type'] == "5y":
        for course in soup.courses.pc.find_all("course"):
            clist.append(course.string)

    G = nx.DiGraph()
    G.add_nodes_from(clist, style='filled')
    for course in clist:
        course_data = df.loc[df['code'] == course]
        if not course_data.empty: # MTL782, 783 don't have course data/description/prereqs in CoS, wow bhaiya
            prereqs = course_data.iloc[0]['pre-requisites']
            prereqlist = [] if prereqs is None else re.findall(r'[A-Z]{3}[0-9]{3}', prereqs)
            #print(f"{course}: {prereqlist}")
            for pr in prereqlist:
                if pr not in G:
                    G.add_node(pr)
                G.add_edge(pr, course)

    for node in G:
        course_data = df.loc[(df['code'] == node)]
        color = course_data.iloc[0]['sem_2122'] if not course_data.empty else 0
        G.nodes[node]['fillcolor'] = color_dict[color]

    nx.nx_pydot.to_pydot(G).write_png(f'{DOTFILES_PATH}/{prog.split(".")[0]}.png')

  0%|          | 0/17 [00:00<?, ?it/s]

In [94]:
df.loc[df['code'] == "MLL341"]

Unnamed: 0,code,name,credits,credit-structure,pre-requisites,overlap,department,description,sem_2122,sem_2021,sem_1920
1442,MLL341,Materials Processing,3,2-0-2,,,Department of Materials Science and Engineering,Introduction to primary processing of material...,0,0,0


In [209]:
df.loc[df['code'] == "MTL712"]

Unnamed: 0,code,name,credits,credit-structure,pre-requisites,overlap,department,description,sem
1551,MTL712,Computational Methods for Differential Equations,4,3-0-2,MTL107,,Department of Mathematics,Numerical methods for solving IVPs for oDEs: D...,1
