In [1]:
import pandas as pd
import re

In [5]:
core_course_titles = pd.read_csv("ENVR Courses - Core Courses.csv", header=None, names=['course_codes'])
core_course_titles = core_course_titles['course_codes'].tolist()

In [6]:
import json

with open('all_science_courses.json', 'r') as file:
    all_courses = json.load(file)

all_courses[0]

{'course_code': 'ANAT 392',
 'course_title': 'Gross Anatomy of the Limbs and Trunk',
 'description': 'Lectures and laboratory sessions on the human gross and functional anatomy of the limbs and trunk. The course includes the study of predissected specimens. For credit only in the Department of Physical Therapy.',
 'prerequisites': [],
 'corequisites': [],
 'themes': []}

In [11]:
pattern = re.compile(r'\b[A-Z]{3,}_V\s\d{3}\b')

valid_courses = []
for course in core_course_titles:
    if pattern.fullmatch(course):
        valid_course = course.replace('_V', '')
        valid_courses.append(valid_course)

print(valid_courses)

['BIOL 121', 'ENVR 100', 'CHEM 121', 'CHEM 111', 'CHEM 141', 'SCIE 113', 'DSCI 100', 'MATH 100', 'MATH 102', 'MATH 104', 'MATH 180', 'MATH 184', 'MATH 120', 'MATH 101', 'MATH 103', 'MATH 105', 'MATH 121', 'ENVR 200', 'ENVR 205', 'ENVR 240', 'EOSC 340', 'STAT 200', 'STAT 201', 'BIOL 300', 'ENVR 300', 'ENVR 305', 'ENVR 350', 'ENVR 400', 'ENVR 450', 'EOSC 345']


In [12]:
print(len(core_course_titles), len(valid_courses))

31 30


In [15]:
def build_course_lookup(course_list):
    return {course['course_code']: course for course in course_list}

def get_recursive_courses(start_codes, course_lookup, valid_courses):
    visited = set()
    stack = list(start_codes)

    while stack:
        code = stack.pop()
        if code in visited:
            continue
        visited.add(code)

        course = course_lookup.get(code)
        if course:
            related = course.get('prerequisites', []) + course.get('corequisites', [])
            for rel_code in related:
                if rel_code not in visited:
                    stack.append(rel_code)
        else:
            # Still add to visited even if not found in original JSON
            continue

    return visited

def extract_full_course_info(course_lookup, valid_courses):
    all_needed_codes = get_recursive_courses(valid_courses, course_lookup, valid_courses)
    all_needed_codes.update(valid_courses)  # Include any valid course not reachable through prereqs/coreqs

    filtered_courses = []
    added = set()

    for code in all_needed_codes:
        if code in added:
            continue
        added.add(code)

        if code in course_lookup:
            course_data = course_lookup[code].copy()
        else:
            course_data = {
                'course_code': code,
                'course_title': '',
                'description': '',
                'prerequisites': [],
                'corequisites': [],
                'themes': []
            }

        course_data['envr_course'] = code in valid_courses
        filtered_courses.append(course_data)

    return filtered_courses

# Example usage:
course_lookup = build_course_lookup(all_courses)
final_courses = extract_full_course_info(course_lookup, valid_courses)

print(final_courses)

[{'course_code': 'BIOL 121', 'course_title': 'Genetics, Evolution and Ecology', 'description': 'Principles of storage and transmission of genetic variation; origin and evolution of species and their ecological interactions. Credit will be granted for only one of BIOL 121 or BIOL 344. [3-0-0] Prerequisite: One of BIOL 11, BIOL 12, BIOL 111.', 'prerequisites': ['BIOL 111'], 'corequisites': [], 'themes': [], 'envr_course': True}, {'course_code': 'MATH 104', 'course_title': '', 'description': '', 'prerequisites': [], 'corequisites': [], 'themes': [], 'envr_course': True}, {'course_code': 'MATH 105', 'course_title': '', 'description': '', 'prerequisites': [], 'corequisites': [], 'themes': [], 'envr_course': True}, {'course_code': 'BIOL 111', 'course_title': 'Introduction to Modern Biology', 'description': 'Concepts fundamental to biological issues, such as the genetic basis of biological variation, evolution, infectious diseases, causes of cancer, population growth, and human effects on eco

In [14]:
len(final_courses)

39

In [17]:
with open("envr_major_core.json", "w") as final:
	json.dump(final_courses, final, indent=4)