In [288]:
import configparser
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

In [None]:
config = configparser.ConfigParser()
config.read('config.ini')

# Active course list of the OM

In [292]:
current_courses_url = config.get('Paths', 'current_gatech_omscs_courses_url')

# Fetch the courses currently offered by GT
response = requests.get(specializations_url)
soup = BeautifulSoup(response.content, 'html.parser')

# Find the h4 heading that says 'Current & Ongoing OMS Courses'
h4_element = soup.find('h4', string='Current & Ongoing OMS Courses')

# Get the next ul element, which contains all active courses
ul_element = h4_element.find_next('ul')

# Remove sup tags before extracting text
for sup in ul_element.find_all('sup'):
    sup.decompose()

# Extract the list item content
course_list = [' '.join(li.get_text().split()) for li in ul_element.find_all('li')]

In [293]:
# Function to extract old course code and clean course name
def extract_old_course_code(course_name):
    match = re.search(r'\((formerly (.+?))\)', course_name)
    if match:
        old_code = match.group(2)
        new_name = course_name[:match.start()].strip()
        return new_name, old_code
    return course_name, None

In [294]:
# Convert raw course list to initial dataframe
df = pd.DataFrame(course_list, columns=['Raw Course'])

# Remove asterisk if present
df['Raw Course'] = df['Raw Course'].apply(lambda x: x[1:] if x.startswith('*') else x)

# Split into Course Code and Course Name
df['Course Code'] = df['Raw Course'].str.split(':').str[0]
df['Course Name'] = df['Raw Course'].str.split(':').str[1]

# Apply the function to extract Course Name and Old Course Code
df[['Course Name', 'Old Course Code']] = df['Course Name'].apply(lambda x: pd.Series(extract_old_course_code(x)))

In [295]:
df

Unnamed: 0,Raw Course,Course Code,Course Name,Old Course Code
0,CS 6035: Introduction to Information Security,CS 6035,Introduction to Information Security,
1,CS 6150: Computing for Good,CS 6150,Computing for Good,
2,CS 6200: Introduction to Operating Systems (fo...,CS 6200,Introduction to Operating Systems,CS 8803 O02
3,CS 6210: Advanced Operating Systems,CS 6210,Advanced Operating Systems,
4,CS 6211: System Design for Cloud Computing (fo...,CS 6211,System Design for Cloud Computing,CS 8803 O12
5,CS 6238: Secure Computer Systems,CS 6238,Secure Computer Systems,
6,CS 6250: Computer Networks,CS 6250,Computer Networks,
7,CS 6260: Applied Cryptography,CS 6260,Applied Cryptography,
8,CS 6261: Security Incident Response (formerly ...,CS 6261,Security Incident Response,CS 8803 O22
9,CS 6262: Network Security,CS 6262,Network Security,
