In [21]:
from pathlib import Path
import pandas as pd
import numpy as np
import json

pd.set_option('display.max_columns', None)  
pd.set_option('display.max_rows', 100)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

def trim_all_columns(df):
    """
    Trim whitespace from ends of each value across all series in dataframe
    """
    trim_strings = lambda x: x.strip() if isinstance(x, str) else x
    return df.applymap(trim_strings)

In [2]:
df_gpa = pd.read_csv("https://github.com/wadefagen/datasets/raw/master/gpa/uiuc-gpa-dataset.csv").rename(columns={"Year":"year","Term":"term","Subject":"subject","Number":"number","Primary Instructor":"instructor"})

df_gpa["instructor"] = df_gpa['instructor'].str.extract(r'(\w+, \w)')

df_gpa['total_students'] = df_gpa['A+'] + df_gpa['A'] + df_gpa['A-'] + df_gpa['B'] + df_gpa['B+'] + df_gpa['B-'] + df_gpa['C+'] + df_gpa['C'] + df_gpa['C-'] + df_gpa['D+'] + df_gpa['D'] + df_gpa['D-'] + df_gpa['F']

df_gpa['gpa'] = (df_gpa['A+'] * 4 + df_gpa['A'] * 4 + df_gpa['A-'] * 3.67 + df_gpa['B'] * 3 + df_gpa['B+'] * 3.33 + df_gpa['B-'] * 2.67 + df_gpa['C+'] * 2.33 + df_gpa['C'] * 2 + df_gpa['C-'] * 1.67 + df_gpa['D+'] * 1.33 + df_gpa['D'] + df_gpa['D-'] * 0.67) / df_gpa['total_students']

df_gpa = df_gpa.groupby(["year", "term", "subject", "number", "instructor"], as_index=False).agg({"gpa": "mean", "total_students": "sum", "A+": "sum", "A": "sum", "A-": "sum", "B+": "sum", "B": "sum", "B-": "sum", "C+": "sum", "C": "sum", "C-": "sum", "D+": "sum", "D": "sum", "D-": "sum", "F": "sum"})

df_gpa["course"] = df_gpa["subject"] + " " + df_gpa["number"].astype(str)

df_gpa

Unnamed: 0,year,term,subject,number,instructor,gpa,total_students,A+,A,A-,...,B,B-,C+,C,C-,D+,D,D-,F,course
0,2010,Fall,AAS,100,"Arnaldo, C",3.463613,69,0,22,21,...,8,3,1,2,1,0,0,1,0,AAS 100
1,2010,Fall,AAS,100,"Kwon, Y",3.358982,61,6,10,14,...,5,2,3,1,0,0,0,1,1,AAS 100
2,2010,Fall,AAS,100,"Manalansan, M",3.980294,34,21,12,0,...,0,0,0,0,0,0,0,0,0,AAS 100
3,2010,Fall,AAS,100,"Winkelmann, M",3.422059,34,1,12,11,...,0,0,1,2,0,0,1,0,1,AAS 100
4,2010,Fall,AAS,120,"Lee, A",3.127315,65,8,11,3,...,13,5,2,5,1,0,4,0,0,AAS 120
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39791,2020,Summer,TAM,212,"Chang, W",2.808077,52,5,7,7,...,6,6,0,5,2,3,1,0,4,TAM 212
39792,2020,Summer,TAM,251,"Kim, S",3.417083,48,5,14,8,...,10,2,1,3,1,0,0,0,0,TAM 251
39793,2020,Summer,TAM,335,"Ramlawi, N",3.097500,24,2,4,4,...,3,3,1,1,0,0,1,0,1,TAM 335
39794,2020,Summer,THEA,101,"Morrissette, J",3.586707,59,35,4,4,...,2,5,2,1,0,1,0,0,1,THEA 101


In [3]:
terms = [
    "../raw/{}-{}.csv".format(year, term)
    for year in range(df_gpa["year"].min(), df_gpa["year"].max() + 2) 
      for term in ["Winter", "Spring", "Summer", "Fall"] 
        if Path("../raw/{}-{}.csv".format(year, term)).is_file()
]

df_catalogs = pd.concat([pd.read_csv(term) for term in terms], ignore_index=True)
df_catalogs["term"] = pd.Categorical(df_catalogs["term"], ["Fall","Summer","Spring","Winter"], ordered=True)
df_catalogs.sort_values(by=["year", "term", "subject", "number", "crn", "meeting"], ascending=[False, True, True, True, True, True], ignore_index=True, inplace=True)
df_catalogs["course"] = df_catalogs["subject"] + " " + df_catalogs["number"].astype(str)

# Fix typos in descriptions
df_catalogs.loc[df_catalogs["course"]=="HIST 574", "description"]="Immerses students in major works of recent American religious history. Written from multiple disciplinary perspectives and wrestling with the knotty problems in which religion has been interwoven, these books will give the student a solid foundation in American religious history. 4 graduate hours. No professional credit."
df_catalogs.loc[df_catalogs["course"]=="ASST 104", "description"] = "Same as REL 104. See REL 104."
df_catalogs.loc[df_catalogs["course"]=="EPOL 551", "description"] = "Same as EOL 570. See EOL 570."

df_catalogs = trim_all_columns(df_catalogs).merge(df_gpa, how="left", on=["year", "term", "course", "subject", "number", "instructor"])

"""
df_catalogs["sectionId"] = (df_catalogs["year"].astype(str) + 
                            df_catalogs["term"].astype(str) +
                            df_catalogs["subject"].astype(str) + 
                            df_catalogs["number"].astype(str) +
                            df_catalogs["crn"].astype(str) +
                            df_catalogs["section"].astype(str))
df_catalogs["meetingId"] = (df_catalogs["year"].astype(str) + 
                            df_catalogs["term"].astype(str) +
                            df_catalogs["crn"].astype(str) +
                            df_catalogs["subject"].astype(str) + 
                            df_catalogs["number"].astype(str) +
                            df_catalogs["section"].astype(str) +
                            df_catalogs["meeting"].astype(str))
"""

df_catalogs

Unnamed: 0,year,term,college,subject,subject_name,number,name,description,credit_hours,gen_ed,...,B+,B,B-,C+,C,C-,D+,D,D-,F
0,2021,Spring,KV,AAS,Asian American Studies,100,Intro Asian American Studies,Interdisciplinary introduction to the basic co...,3 hours.,1US,...,,,,,,,,,,
1,2021,Spring,KV,AAS,Asian American Studies,100,Intro Asian American Studies,Interdisciplinary introduction to the basic co...,3 hours.,1SS,...,,,,,,,,,,
2,2021,Spring,KV,AAS,Asian American Studies,100,Intro Asian American Studies,Interdisciplinary introduction to the basic co...,3 hours.,1US,...,,,,,,,,,,
3,2021,Spring,KV,AAS,Asian American Studies,100,Intro Asian American Studies,Interdisciplinary introduction to the basic co...,3 hours.,1SS,...,,,,,,,,,,
4,2021,Spring,KV,AAS,Asian American Studies,100,Intro Asian American Studies,Interdisciplinary introduction to the basic co...,3 hours.,1US,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415066,2010,Spring,KV,YDSH,Yiddish,420,Jewish Life-Writing,Jewish life-writing from the late 18th century...,3 OR 4 hours.,,...,,,,,,,,,,
415067,2010,Spring,KV,ZULU,Zulu,404,Intermediate Zulu II,Continuation of ZULU 403; emphasis on increasi...,4 hours.,,...,,,,,,,,,,
415068,2010,Spring,KV,ZULU,Zulu,404,Intermediate Zulu II,Continuation of ZULU 403; emphasis on increasi...,4 hours.,,...,,,,,,,,,,
415069,2010,Spring,KV,ZULU,Zulu,406,Advanced Zulu II,Continuation of Zulu 405 with increased emphas...,3 hours.,,...,,,,,,,,,,


In [4]:
df_colleges = (pd.read_json("../Colleges.json", orient="index")
               .reset_index()
               .rename(columns={"index": "collegeId", 0: "name"}))
df_colleges.to_csv("../neo4j/nodes/college_nodes.csv", index=False)
df_colleges

Unnamed: 0,collegeId,name
0,KL,"Agricultural, Consumer and Environmental Sciences"
1,KY,Applied Health Sciences
2,LD,Armed Forces
3,KT,College of Media
4,KW,Division of General Studies
5,KN,Education
6,KP,Engineering
7,LK,Environmental Council
8,KR,Fine and Applied Arts
9,KM,Gies College of Business


In [5]:
df_subjects = (df_catalogs[["subject", "subject_name"]]
               .drop_duplicates(ignore_index=True)
               .dropna()
               .rename(columns={"subject": "subjectId", "subject_name": "name"}))
df_subjects.to_csv("../neo4j/nodes/subject_nodes.csv", index=False)
df_subjects

Unnamed: 0,subjectId,name
0,AAS,Asian American Studies
1,ABE,Agricultural and Biological Engineering
2,ACCY,Accountancy
3,ACE,Agricultural and Consumer Economics
4,ACES,"Agricultural, Consumer and Environmental Sciences"
...,...,...
208,CINE,Cinema Studies
209,MS,Media Studies
210,ESES,"ZZZ Earth Sys, Environ & Socie"
211,LGLA,Lingala


In [6]:
df_courses = (df_catalogs[["course", "number", "name", "description", "credit_hours"]]
              .drop_duplicates(["course"], ignore_index=True)
              .dropna()
              .rename(columns={"course": "courseId", "credit_hours": "creditHours"})
              .set_index(["courseId"]))
df_bad_descriptions = df_courses.loc[df_courses["description"].str.extract(r"See\s*([A-Z]{2,4}\s*[0-9]{3})").dropna().index]
df_bad_descriptions["see_course"] = df_bad_descriptions["description"].str.extract(r"See\s*([A-Z]{2,4}\s*[0-9]{3})")[0].values
df_bad_descriptions["better_description"] = df_courses.loc[df_bad_descriptions["see_course"].values, "description"].values
df_courses.loc[df_bad_descriptions.index, "description"] = df_bad_descriptions.apply(lambda row: row["better_description"].replace(row.name, row["see_course"]), axis=1)
df_courses.reset_index(inplace=True)
df_courses.to_csv("../neo4j/nodes/course_nodes.csv", index=False)
df_courses

Unnamed: 0,courseId,number,name,description,creditHours
0,AAS 100,100,Intro Asian American Studies,Interdisciplinary introduction to the basic co...,3 hours.
1,AAS 201,201,US Racial & Ethnic Politics,Examines efforts by racial and ethnic communit...,3 hours.
2,AAS 215,215,US Citizenship Comparatively,"Examines the racial, gendered, and sexualized ...",3 hours.
3,AAS 246,246,Asian American Youth in Film,Examines both mainstream and independent films...,3 hours.
4,AAS 258,258,Muslims in America,Introduction to the study of Muslims in the Un...,3 hours.
...,...,...,...,...,...
10193,VCM 658,658,Clinical Procedure/Problem I,Course is designed to train students in physic...,1 hours.
10194,VCM 683,683,Advanced Soft Tissue Surgery,"Advanced instruction in the pathophysiology, d...",1 hours.
10195,VCM 684,684,Client Relations,"Introduction to client relations, including te...",1 hours.
10196,VCM 691,691,Adv Orthopedics Fract Fixation,Advanced instruction in the pathophysiology of...,1 hours.


In [22]:
df_courses["prerequisites"] = df_courses["description"].str.extract("Prerequisite:\s*([^.]*)")
df_courses

Unnamed: 0,courseId,number,name,description,creditHours,prerequisites
0,AAS 100,100,Intro Asian American Studies,"Interdisciplinary introduction to the basic concepts and approaches in Asian American Studies. Surveys the various dimensions of Asian American experiences including history, social organization, literature, arts, and politics.",3 hours.,
1,AAS 201,201,US Racial & Ethnic Politics,"Examines efforts by racial and ethnic communities to organize politically and by society to allocate resources based on race or ethnicity. Topical focus includes African Americans, Latinos, Asian Americans, Native Americans, and white ethnics. The primary goal of the course is to develop a more comprehensive understanding of racial and ethnic politics by identifying commonalities and differences among these groups and their relationship to the state. Same as PS 201, AFRO 201, and LLS 201.",3 hours.,
2,AAS 215,215,US Citizenship Comparatively,"Examines the racial, gendered, and sexualized aspects of US citizenship historically and comparatively. Interdisciplinary course taught from a humanities perspective. Readings draw from critical legal studies, history, literature, literary criticism, and ethnography. Same as AFRO 215, AIS 295, GWS 215, and LLS 215. Prerequisite: One of: AAS 100, AAS 120, AFRO 100 AIS 101, GWS 250, LLS 100.",3 hours.,"One of: AAS 100, AAS 120, AFRO 100 AIS 101, GWS 250, LLS 100"
3,AAS 246,246,Asian American Youth in Film,Examines both mainstream and independent films and documentaries representing and/or produced by Asian American youth. Explores the role of multiculturalism and diversity issues in informing young people's experiences.,3 hours.,
4,AAS 258,258,Muslims in America,"Introduction to the study of Muslims in the United States and broadly the history of Islam in the Americas. Using a comparative approach, we study how the historical narrative of African American and Latino Muslims relates to newer immigrant populations, primarily Arab American and South Asian American Muslim communities. Same as LLS 258 and REL 258.",3 hours.,
...,...,...,...,...,...,...
10193,VCM 658,658,Clinical Procedure/Problem I,"Course is designed to train students in physical examination, procedural and other diagnostic skills required to diagnose and treat common diseases of companion animals. Students will also be introduced to problem-based medical problems solving and evidence-based medicine. Prerequisite: Second-year standing in the veterinary medicine curriculum.",1 hours.,Second-year standing in the veterinary medicine curriculum
10194,VCM 683,683,Advanced Soft Tissue Surgery,"Advanced instruction in the pathophysiology, diagnosis and treatment of soft tissue surgical disorders of the small animal patient. Lectures will incorporate clinical case presentations and discussion. The laboratory sessions will be used to teach surgical procedures which are commonly performed in small animal clinical practice and which are not taught in the core curriculum. Prerequisite: Concurrent registration in VCM 654.",1 hours.,Concurrent registration in VCM 654
10195,VCM 684,684,Client Relations,"Introduction to client relations, including techniques of effective verbal and nonverbal communication and applications of these techniques for veterinary students.",1 hours.,
10196,VCM 691,691,Adv Orthopedics Fract Fixation,"Advanced instruction in the pathophysiology of bone fracture and healing, techniques of fracture fixation, and complications of fracture repair. Prerequisite: VCM 654; third year standing in the veterinary medicine curriculum.",1 hours.,VCM 654; third year standing in the veterinary medicine curriculum


In [28]:
df_courses[df_courses["prerequisites"].str.contains("One of", case=False, na=False)]

Unnamed: 0,courseId,number,name,description,creditHours,prerequisites
2,AAS 215,215,US Citizenship Comparatively,"Examines the racial, gendered, and sexualized aspects of US citizenship historically and comparatively. Interdisciplinary course taught from a humanities perspective. Readings draw from critical legal studies, history, literature, literary criticism, and ethnography. Same as AFRO 215, AIS 295, GWS 215, and LLS 215. Prerequisite: One of: AAS 100, AAS 120, AFRO 100 AIS 101, GWS 250, LLS 100.",3 hours.,"One of: AAS 100, AAS 120, AFRO 100 AIS 101, GWS 250, LLS 100"
17,ABE 225,225,ABE Principles: Bioenvironment,"Principles of environmental control for biological structures: psychrometrics; mass and heat transfer through buildings; ventilation requirements. Prerequisite: One of MATH 220, MATH 221, MATH 234.",2 hours.,"One of MATH 220, MATH 221, MATH 234"
18,ABE 226,226,ABE Principles: Bioprocessing,"Principles of bioprocess engineering applied to food and agricultural products: material balances; fluid flow; heat and mass transfers; drying; evaporation; fermentation; distillation; process simulation. Prerequisite: One of MATH 220, MATH 221, MATH 234.",2 hours.,"One of MATH 220, MATH 221, MATH 234"
27,ABE 469,469,Industry-Linked Design Project,"Industry-submitted and sponsored design projects which utilize principles of design, engineering analysis and functional operation of engineering systems. Design teams develop concepts, evaluate alternatives, model and analyze solutions, and build and test a final product. Emphases on communication skills, technical writing, and interaction with industry representatives. 4 undergraduate hours. 4 graduate hours. Prerequisite: One of ABE 361, CHBE 421, TAM 335; or credit or concurrent registration in ME 370.",4 hours.,"One of ABE 361, CHBE 421, TAM 335; or credit or concurrent registration in ME 370"
111,ACE 444,444,Financial Services & Investing Planning,"Advanced skills in and understanding of asset pricing, equity and debt investment, portfolio theory and diversification, asset allocation, financial risk management, and financial intermediation and regulation emphasizing applications in financial planning and agricultural finance. 3 undergraduate hours. 3 or 4 graduate hours. Prerequisite: One of ACE 240, ACE 345, or FIN 221 or consent of instructor.",3 OR 4 hours.,"One of ACE 240, ACE 345, or FIN 221 or consent of instructor"
...,...,...,...,...,...,...
10074,AVI 542,542,Cooperative Problem Solving,"Advanced graduate seminar on problem-solving models and taxonomies, models of coordination of activity and communication among multiple agents, design of human-machine cooperative problem-solving systems, adaptive automation, and intelligent decision support. Readings drawn from work in pragmatics, distributed artificial intelligence, cognitive engineering, and other related areas. Same as IE 542. Prerequisite: Credit or concurrent registration in at least one of CS 440, IE 540, PSYC 527.",4 hours.,"Credit or concurrent registration in at least one of CS 440, IE 540, PSYC 527"
10079,CEE 527,527,Constr Conflict Resolution,"Basic theories and applications of dispute avoidance and resolution techniques in the construction industry. Mechanisms to promote collaborative environments and resolve disputes in construction projects; the different steps in the Dispute Resolution Ladder and the main features of a conflict management plan; case studies of practical applications of disputes avoidance and resolution techniques in the construction industry throughout the world. Prerequisite: One of CEE 420, CEE 421, CEE 422.",4 hours.,"One of CEE 420, CEE 421, CEE 422"
10096,ESES 208,208,History of the Earth System,"Presents systematic analysis of formation and evolution of the Earth and its dynamic systems (lithosphere, hydrosphere, atmosphere, and biosphere). Also introduces methods of reconstructing Earth's history through use of geochronology, paleontology, and the stratigraphic records. Introduces the geological history of life evolution, mountain belts and continents, geochemical systems, climate, sea level, and the Earth's interior. Field trip required. Same as ESE 208. Additional fees may apply. See Class Schedule. Prerequisite: One of GEOL 100, GEOL 101, GEOL 103, GEOL 104 or GEOL 107; or consent of instructor.",4 hours.,"One of GEOL 100, GEOL 101, GEOL 103, GEOL 104 or GEOL 107; or consent of instructor"
10099,ESES 333,333,Earth Materials and the Env,"Studies the origin, identification, and environmental significance of earth materials (minerals, rocks, and soil). Environmental topics include: mineral resources; acid mine drainage; volcanic hazards; swelling soils; engineering strength, porosity/permeability, and architectural uses of earth materials; and asbestos. One day field trip is required. Same as ESE 333. Additional fees may apply. See Class Schedule. Credit is not given for both GEOL 333 and GEOL 432. Prerequisite: CHEM 102 and CHEM 103; GEOL 100 and GEOL 110, or one of GEOL 101, GEOL 103, GEOL 104 or GEOL 107; or consent of instructor.",4 hours.,"CHEM 102 and CHEM 103; GEOL 100 and GEOL 110, or one of GEOL 101, GEOL 103, GEOL 104 or GEOL 107; or consent of instructor"


In [29]:
df_courses[df_courses["prerequisites"].str.contains("both", case=False, na=False)]

Unnamed: 0,courseId,number,name,description,creditHours,prerequisites
390,ANTH 471,471,Ethnography through Language,"Overview of theoretical perspectives and methodologies in linguistic anthropology, including sociolinguistics, ethnography of communication, performance and poetics, discursive practices, and structural analyses. 3 undergraduate hours. 4 graduate hours. Prerequisite: ANTH 230 or ANTH 270 and preferably both.",3 OR 4 hours.,ANTH 230 or ANTH 270 and preferably both
908,CHEM 315,315,Instrumental Chem Systems Lab,Laboratory course emphasizes the application of modern instrumental techniques for characterizing the kinetic behavior and equilibrium properties of chemical systems. Prerequisite: Either CHEM 237 or both CHEM 223 and CHEM 233.,2 hours.,Either CHEM 237 or both CHEM 223 and CHEM 233
934,CHEM 538,538,Topics in Organic Chemistry,"Advanced course dealing with subject matter not ordinarily covered by regularly scheduled courses, such as natural product synthesis and biosynthesis, organic photochemistry, chemistry of special families of organic compounds, etc. May be repeated. Prerequisite: CHEM 532 and CHEM 534, both of which may be taken concurrently.",2 TO 4 hours.,"CHEM 532 and CHEM 534, both of which may be taken concurrently"
3626,PSYC 336,336,Topics in Clin/Comm Psych,"Survey and critical review of subdisciplines in clinical/community psychology; concepts, methods, and assessments, intervention strategies and tactics. Subdisciplines addressed will vary. See Class Schedule for current titles. May be repeated with approval to a maximum of 6 undergraduate hours in same term, or to a maximum of 9 undergraduate hours in subsequent terms. Prerequisite: PSYC 238 or PSYC 239 or both depending on topic.",3 hours.,PSYC 238 or PSYC 239 or both depending on topic
4526,ARCH 598,598,Specialized Architectural Practice,"This course adds an academic dimension to professionally supervised field experiences in which problems in architectural design and technology are defined, researched, and solved. Advanced doctoral students are introduced to applied research processes in any of architecture's sub-disciplines. Sites of applied research may include commercial firms, not-for-profit organizations, and government agencies. Students work in school-approved firms or agencies of their choice. Written accounts of work accomplished, documentation of research questions developed and pursued, and reflective experiential learning reports must be submitted for evaluation. Field experiences may be part time or full time. 0 graduate hours. No professional credit. Approved for S/U grading only. May be repeated for up to 11 months total of training. This course is intended to facilitate CPT in professionally focused environmental design research for students in the PhD program. Prerequisite: Required research methods course (ARCH 505/LA 505 or ARCH 563/LA 563 or equivalent) and approval of both PhD program chair and student's PhD adviser. For PhD students who have completed stage 1 of coursework.",0 hours.,Required research methods course (ARCH 505/LA 505 or ARCH 563/LA 563 or equivalent) and approval of both PhD program chair and student's PhD adviser
5615,MSE 470,470,Design and Use of Biomaterials,"Characterization and use of biomaterials in medical applications. Concepts of biocompatibility in terms of structure and properties of materials and interactions between materials and proteins, cells, and tissue. Issues related to the design of biomaterials. Design of biomaterials to meet specific medical needs. 3 undergraduate hours. 3 graduate hours. Prerequisite: Credit or concurrent registration in both MCB 252 and either CHEM 232 or MSE 403.",3 hours.,Credit or concurrent registration in both MCB 252 and either CHEM 232 or MSE 403
7545,ABE 463,463,Electrohydraulic Systems,"Engineering principles of electrohydraulic control systems related to off-road vehicles. Basics of fluid power systems, concepts of electrohydraulic systems and controls, analysis and design of electrohydraulic control systems, and applications of electrohydraulic control. Additional fees may apply. See Class Schedule. 3 undergraduate hours. 3 graduate hours. Prerequisite: ECE 110 or both ECE 205 and ECE 206; ME 310 or TAM 335.",3 hours.,ECE 110 or both ECE 205 and ECE 206; ME 310 or TAM 335
9482,NPRE 556,556,Reactor Theory II,"Advanced treatment of the theory of slow-neutron scattering, neutron thermalization, Doppler broadening, fuel depletion and fuel loadings, properties of neutron migration operators, and mathematical neutron transport theory; interpretation of related experiments; advanced topics. Prerequisite: NPRE 521 and NPRE 555 (both waived for Physics majors).",4 hours.,NPRE 521 and NPRE 555 (both waived for Physics majors)
9999,PHYS 521,521,Reactor Theory II,"Advanced treatment of the theory of slow-neutron scattering, neutron thermalization, Doppler broadening, fuel depletion and fuel loadings, properties of neutron migration operators, and mathematical neutron transport theory; interpretation of related experiments; advanced topics. Prerequisite: NPRE 521 and NPRE 555 (both waived for Physics majors).",4 hours.,NPRE 521 and NPRE 555 (both waived for Physics majors)


In [56]:
df_courses[df_courses["description"].str.contains("professional hour", case=False, na=False)]

Unnamed: 0,courseId,number,name,description,creditHours,prerequisites,undergraduate,graduate
660,BADM 394,394,Senior Research I,Research and readings course for students majoring in business administration. May be taken by students in the college honors program in partial fulfillment of the honors requirements. May be repeated in the same or separate terms for unlimited undergraduate hours. Not applicable to graduate or professional hours.,2 TO 4 hours.,,,
661,BADM 395,395,Senior Research II,Research and readings course for students majoring in business administration. May be taken by students in the college honors program in partial fulfillment of the honors requirements. May be repeated if topics vary. Not applicable to graduate or professional hours.,1 TO 4 hours.,,,
758,BSE 634,634,Clinical Neuroscience,"Topics include neurovascular disorders, seizures, brain injury, dementia, tumors of the brain, disorders involving neuroinflammation, psychotic disorders, affective disorders, anxiety disorders, as well as disorders of the peripheral nervous system and neuromuscular junction. No graduate credit. 5 professional hours. Approved for S/U grading only. May be repeated in separate semesters, for a total of 15 credit hours with approval from the Student Progress and Promotions Committee. Prerequisite: Restricted to students enrolled in the MD program at Carle Illinois College of Medicine.",5 hours.,Restricted to students enrolled in the MD program at Carle Illinois College of Medicine,,
759,BSE 635,635,Musculoskeletal,"Topics include primary inflammatory diseases such as rheumatoid arthritis, lupus, polymyalgia rheumatica and associated disorders, degenerative diseases of the joints such as osteoarthritis, primary diseases of muscle, primary diseases of bone such as osteoporosis and osteogenesis imperfecta as well as mechanical trauma to bone leading to fracture. No graduate credit. 4 professional hours. Approved for S/U grading only. May be repeated in separate semesters for a total of 12 credit hours with approval from the Student Progress and Promotions Committee. Prerequisite: Restricted to students enrolled in the MD program at Carle Illinois College of Medicine.",4 hours.,Restricted to students enrolled in the MD program at Carle Illinois College of Medicine,,
760,BSE 636,636,"Digestion, Nutrition, & Metabolism","Topics include malabsorption syndrome, vitamin D deficiency, G6PD deficiency, TPN, obesity, GERD with stricture/Barretts, Crohn's disease, peptic ulcer disease with hemorrhage, chronic diarrhea, pyloric stenosis – peds hepatitis C, and colonic polyposis. No graduate credit. 1 to 5 professional hours. Approved for S/U grading only. May be repeated in separate semesters for a total of 15 credit hours with approval from the Student Progress and Promotions Committee. Available for honors grade. Prerequisite: Participation in ongoing study of the digestive system. Restricted to students enrolled in the MD program at Carle Illinois College of Medicine.",1 TO 5 hours.,Participation in ongoing study of the digestive system,,
...,...,...,...,...,...,...,...,...
10152,PATH 637,637,Veterinary Virology,Fundamental principles of animal virology; mechanisms of virus-cell and virus-host interactions; explores properties of the major groups of animal virus in relation to replication and pathogenesis of viral disease. Lecture and laboratory. 3 graduate or professional hours. Prerequisite: Second-year standing in the veterinary curriculum or consent of instructor.,3 hours.,Second-year standing in the veterinary curriculum or consent of instructor,,
10153,PATH 638,638,Veterinary Clinical Pathology,"Discussion of the function and interpretation of hematological, serum and urine, chemical and certain other analyses as aids in the diagnosis of animal diseases; emphasis is on the correlation of laboratory and clinical manifestations of disease. 4 professional hours. Prerequisite: Second-year standing in veterinary curriculum.",4 hours.,Second-year standing in veterinary curriculum,,
10154,PATH 641,641,Food Safety and Public Health,Introduction to public health; diseases of animals transmissible to man; and procedures and techniques used in inspection of food of animal origin. 2 graduate or professional hours. Prerequisite: Second-year standing in veterinary curriculum or consent of instructor.,2 hours.,Second-year standing in veterinary curriculum or consent of instructor,,
10186,VB 619,619,Pharmacology II,Lecture-discussion on the action of chemical agents on physiological processes and disease-producing organisms. 3 graduate or professional hours. Prerequisite: VB 618 or equivalent.,3 hours.,VB 618 or equivalent,,


In [57]:
df_courses[df_courses["description"].str.contains("repeated", case=False, na=False)]

Unnamed: 0,courseId,number,name,description,creditHours,prerequisites,undergraduate,graduate
8,AAS 299,299,Begin Topics Asian Am Studies,May be repeated in the same or subsequent terms to a maximum of 6 hours.,3 hours.,,,
16,ABE 199,199,Undergraduate Open Seminar,May be repeated to a maximum of 12 hours.,1 TO 5 hours.,,,
19,ABE 397,397,Independent Study,"Individual research, special problems, thesis, development or design work under the supervision of a member of the faculty. May be repeated to a maximum of 8 hours. Prerequisite: Consent of instructor.",1 TO 4 hours.,Consent of instructor,,
28,ABE 497,497,Independent Study,"Individual research, special problems, thesis, development or design work under the supervision of a member of the faculty. 1 to 4 undergraduate hours. No graduate credit. May be repeated to a maximum of 8 hours. Prerequisite: Consent of instructor.",1 TO 4 hours.,Consent of instructor,4,
29,ABE 498,498,Special Topics,Subject offerings of new and developing areas of knowledge in agricultural and biological engineering intended to augment the existing curriculum. See Class Schedule or departmental course information for topics and prerequisites. 1 to 4 undergraduate hours. 1 to 4 graduate hours. May be repeated in the same or separate terms if topics vary to a maximum of 16 hours.,1 to 4 hours.,,4,4
...,...,...,...,...,...,...,...,...
10162,PSYC 411,411,Bio Psych Lab,"For students doing research in biological, behavioral and cognitive neuroscience. This course will provide in-depth background knowledge for their research, and teach students to make effective oral and written presentations of their findings. The course may be taken for two terms with the first term emphasizing a review of the literature and the second term concentrating on the presentation of the results. Same as NEUR 411. No graduate credit. May be repeated to a maximum of 6 hours. Prerequisite: PSYC 311 or equivalent and students must arrange to do a research project with a faculty member.",3 hours.,PSYC 311 or equivalent and students must arrange to do a research project with a faculty member,,
10170,SNSK 199,199,Undergraduate Open Seminar,May be repeated.,1 TO 5 hours.,,,
10179,UP 408,408,Law and Planning,"This course examines the legal framework within which planning takes place in urban areas of this country. It emphasizes the role of law in structuring local government responses to social, economic and physical planning issues and in allocating power among local governments, between local governments and state and federal governments, and between governments and the private sectors of society. Course may not be repeated for credit. Prerequisite: Senior standing.",4 hours.,Senior standing,,
10185,VB 591,591,Biosciences Seminar Series,"Review and discussion of selected topics. Students are required to participate in weekly discussions and present one formal seminar per year, on a topic approved by the instructor. Approved for S/U grading. May be repeated to a maximum of 4 hours. Prerequisite: Enrollment in VB graduate program or consent of instructor.",0 TO 1 hours.,Enrollment in VB graduate program or consent of instructor,,


In [33]:
df_courses[df_courses["creditHours"].str.contains("to", case=False, na=False)]

Unnamed: 0,courseId,number,name,description,creditHours,prerequisites
16,ABE 199,199,Undergraduate Open Seminar,May be repeated to a maximum of 12 hours.,1 TO 5 hours.,
19,ABE 397,397,Independent Study,"Individual research, special problems, thesis, development or design work under the supervision of a member of the faculty. May be repeated to a maximum of 8 hours. Prerequisite: Consent of instructor.",1 TO 4 hours.,Consent of instructor
28,ABE 497,497,Independent Study,"Individual research, special problems, thesis, development or design work under the supervision of a member of the faculty. 1 to 4 undergraduate hours. No graduate credit. May be repeated to a maximum of 8 hours. Prerequisite: Consent of instructor.",1 TO 4 hours.,Consent of instructor
29,ABE 498,498,Special Topics,Subject offerings of new and developing areas of knowledge in agricultural and biological engineering intended to augment the existing curriculum. See Class Schedule or departmental course information for topics and prerequisites. 1 to 4 undergraduate hours. 1 to 4 graduate hours. May be repeated in the same or separate terms if topics vary to a maximum of 16 hours.,1 to 4 hours.,
31,ABE 597,597,Independent Study,Individual investigations or studies of any phases of agricultural engineering selected by the student and approved by the advisor and the faculty member who will supervise the study. May be repeated to a maximum of 16 hours. Prerequisite: Consent of instructor.,1 TO 4 hours.,Consent of instructor
...,...,...,...,...,...,...
10117,HIST 437,437,Middle East in 20th Century,"Political and ideological developments in Egypt, Arabia, the Fertile Crescent (including Israel), Iran, and Turkey from World War I to the present, with emphasis on the period to the 1960s; economic, social, and cultural trends in the region also addressed. 3 undergraduate hours. 2 or 4 graduate hours. Prerequisite: One year of college history or political science, or consent of instructor. HIST 135 is recommended.",2 TO 4 hours.,"One year of college history or political science, or consent of instructor"
10170,SNSK 199,199,Undergraduate Open Seminar,May be repeated.,1 TO 5 hours.,
10182,VB 533,533,Repro Physiology Lab Methods,"Laboratory methods used in reproductive physiology studies, such as blood sampling, large animal surgery, collection of tissues and gametes, embryo recovery, in vitro fertilization, tissue culture, hormone measurements, and directed individual research problems. Same as MCB 533 and CB 533. Prerequisite: Consent of instructor.",1 TO 3 hours.,Consent of instructor
10185,VB 591,591,Biosciences Seminar Series,"Review and discussion of selected topics. Students are required to participate in weekly discussions and present one formal seminar per year, on a topic approved by the instructor. Approved for S/U grading. May be repeated to a maximum of 4 hours. Prerequisite: Enrollment in VB graduate program or consent of instructor.",0 TO 1 hours.,Enrollment in VB graduate program or consent of instructor


In [46]:
df_courses.set_index("courseId")
df_courses["undergraduate"] = df_courses["description"].str.extract("(\d+\s+)undergraduate hour")
df_courses[df_courses["undergraduate"].str.contains("", case=False, na=False)]

Unnamed: 0,courseId,number,name,description,creditHours,prerequisites,undergraduate
13,AAS 400,400,Critical Ethnic Studies,"Examines the formation of the field of Critical Ethnic Studies and elaborates its key concepts, such as settler colonialism, indigeneity, heteropatriarchy, decolonization, and liberation. Same as LLS 460. 3 undergraduate hours. 4 graduate hours.",3 OR 4 hours.,,3
14,AAS 479,479,"Race, Medicine, and Society","The idea of race has historically been central to how Western cultures conceptualize and think about human difference. This course examines the historical significance of race through one domain of knowledge: medicine. Specifically, it will be concerned with ""race"" as a central category in the medical construction and management of individuals and populations. Case studies might focus on colonial medicine, race and public health, sexuality and reproduction, global health disparities, and genetics and genomics. Same as LLS 479 and ANTH 479. 3 undergraduate hours. 4 graduate hours. Prerequisite: LLS 100 or consent of instructor.",3 OR 4 hours.,LLS 100 or consent of instructor,3
20,ABE 424,424,Principles of Mobile Robotics,"The objective of this course is to prepare students in design, navigation, control, and autonomy of aerial and ground robots that operate in harsh, uncertain, and changing field environments. The course will cover three primary aspects of field robotics: perception (sensing), motion control, and data analytics, and bring everything together through labs involving ground robots and flying unmanned aircraft (drones). 4 undergraduate hours. 4 graduate hours. Approved for Letter and S/U grading. Prerequisite: MATH 221, MATH 225, and MATH 285 , or ABE 415, or ABE 440, IE 300, or STAT 400 or equiv, CS 125 or equiv., or graduate standing.",4 hours.,"MATH 221, MATH 225, and MATH 285 , or ABE 415, or ABE 440, IE 300, or STAT 400 or equiv, CS 125 or equiv",4
21,ABE 425,425,Engrg Measurement Systems,"Principles of instrumentation systems, including sensing, signal conditioning, computerized data acquisition, test design, data analysis and synthesis. Additional fees may apply. See Class Schedule. 4 undergraduate hours. 4 graduate hours. Credit is not given for both ABE 425 and ME 360. Prerequisite: ECE 205.",4 hours.,ECE 205,4
22,ABE 440,440,Applied Statistical Methods I,"Statistical methods involving relationships between populations and samples; collection, organization, and analysis of data; and techniques in testing hypotheses with an introduction to regression, correlation, and analysis of variance limited to the completely randomized design and the randomized complete-block design. Same as CPSC 440, ANSC 440, FSHN 440, and NRES 440. 4 undergraduate hours. 4 graduate hours. Prerequisite: MATH 112 or equivalent.",4 hours.,MATH 112 or equivalent,4
...,...,...,...,...,...,...,...
10165,RLST 451,451,Postmodern Religious Thought,"Examination of postmodern religious themes, including the death of God, the critique of ontotheology, ""¿criture,"" the Face of the Other, and messianicity. Authors to be studied will include S¿ren Kierkegaard, Martin Heidegger, Franz Rosenzweig, Emmanuel Levinas, and Jacques Derrida, with attention to how their thought deconstructs the traditional boundaries between reason and faith. 3 undergraduate hours. 4 graduate hours. Prerequisite: At least one course beyond the 100 level in Philosophy or Religion.",3 OR 4 hours.,At least one course beyond the 100 level in Philosophy or Religion,3
10166,RLST 464,464,Modern Japanese Drama,"Modern Japanese culture as seen through drama. Special emphasis is given to the period after World War II. Readings in English supplemented by films and videotapes. Same as CWL 462, EALC 464, and THEA 487. 3 undergraduate hours. 3 or 4 graduate hours. No knowledge of Japanese required. Prerequisite: At least one course on Japanese culture or consent of instructor.",3 OR 4 hours.,At least one course on Japanese culture or consent of instructor,3
10167,RLST 483,483,Salvation in Islamic Thought,"Introduction to salvation in Islamic thought, with emphasis on discussions of the fate of ""Others"" (i.e. non-Muslims). Begins with a study of the origins and sources of this discourse, followed by an examination of evolving orientations from the formative to the post-formative periods. Important controversies generated along the way, including exclusivist-inclusivist, universalist-anti-universalist, and Sufi-anti-Sufi debates, will be explored. This is followed by an assessment of the new approaches to salvation in modern Islamic thought, with particular emphasis on the contemporary pluralist-inclusivist debate. Finally, alternative approaches to the topic of salvation, including reincarnation, will be examined. 3 undergraduate hours. 4 graduate hours. Prerequisite: Previous coursework on Islam or consent of instructor.",3 OR 4 hours.,Previous coursework on Islam or consent of instructor,3
10177,THEA 487,487,Modern Japanese Drama,"Modern Japanese culture as seen through drama. Special emphasis is given to the period after World War II. Readings in English supplemented by films and videotapes. Same as CWL 462, RLST 464, and EALC 464. 3 undergraduate hours. 3 or 4 graduate hours. No knowledge of Japanese required. Prerequisite: At least one course on Japanese culture or consent of instructor.",3 OR 4 hours.,At least one course on Japanese culture or consent of instructor,3


In [48]:
df_courses["graduate"] = df_courses["description"].str.extract("(\d+\s+)graduate hour")
df_courses[df_courses["graduate"].str.contains("", case=False, na=False)]
"""
MATCH (course:Course)
WITH course, apoc.text.regexGroups(course.description, "[.]\s*(\d*\s*?[or|OR|to|TO]*\s*?\d*)\s*undergraduate hour") as undergradMatches, apoc.text.regexGroups(course.description, "[.]\s*(\d*\s*?[or|OR|to|TO]*\s*?\d*)\s*graduate hour") as gradMatches
WITH course, 
CASE 
    WHEN size(undergradMatches) = 0 OR size(undergradMatches[0]) < 2 THEN course.creditHours
    ELSE undergradMatches[0][1]
END AS undergradCredits,
CASE 
    WHEN size(gradMatches) = 0 OR size(gradMatches[0]) < 2 THEN course.creditHours
    ELSE gradMatches[0][1]
END AS gradCredits
WITH course, apoc.text.regexGroups(undergradCredits, "\d+([\.]\d+)?") as undergradCreditMatches, apoc.text.regexGroups(gradCredits, "\d+([\.]\d+)?") as gradCreditMatches
WITH course, undergradCreditMatches, gradCreditMatches, undergradCreditMatches[0][0] as undergradMin, gradCreditMatches[0][0] as gradMin
WITH course, undergradMin, gradMin,
CASE
    WHEN size(undergradCreditMatches) = 1 THEN undergradMin
    WHEN size(undergradCreditMatches) > 1 THEN undergradCreditMatches[1][0]
    ELSE 0
END AS undergradMax,
CASE
    WHEN size(gradCreditMatches) = 1 THEN gradMin
    WHEN size(gradCreditMatches) > 1 THEN gradCreditMatches[1][0]
    ELSE 0
END AS gradMax
RETURN course, {
    undergraduate: {
        max: undergradMax,
        min: undergradMin
    },
    graduate: {
        max: gradMax,
        min: gradMin
    }
} AS result
"""

Unnamed: 0,courseId,number,name,description,creditHours,prerequisites,undergraduate,graduate
13,AAS 400,400,Critical Ethnic Studies,"Examines the formation of the field of Critical Ethnic Studies and elaborates its key concepts, such as settler colonialism, indigeneity, heteropatriarchy, decolonization, and liberation. Same as LLS 460. 3 undergraduate hours. 4 graduate hours.",3 OR 4 hours.,,3,4
14,AAS 479,479,"Race, Medicine, and Society","The idea of race has historically been central to how Western cultures conceptualize and think about human difference. This course examines the historical significance of race through one domain of knowledge: medicine. Specifically, it will be concerned with ""race"" as a central category in the medical construction and management of individuals and populations. Case studies might focus on colonial medicine, race and public health, sexuality and reproduction, global health disparities, and genetics and genomics. Same as LLS 479 and ANTH 479. 3 undergraduate hours. 4 graduate hours. Prerequisite: LLS 100 or consent of instructor.",3 OR 4 hours.,LLS 100 or consent of instructor,3,4
20,ABE 424,424,Principles of Mobile Robotics,"The objective of this course is to prepare students in design, navigation, control, and autonomy of aerial and ground robots that operate in harsh, uncertain, and changing field environments. The course will cover three primary aspects of field robotics: perception (sensing), motion control, and data analytics, and bring everything together through labs involving ground robots and flying unmanned aircraft (drones). 4 undergraduate hours. 4 graduate hours. Approved for Letter and S/U grading. Prerequisite: MATH 221, MATH 225, and MATH 285 , or ABE 415, or ABE 440, IE 300, or STAT 400 or equiv, CS 125 or equiv., or graduate standing.",4 hours.,"MATH 221, MATH 225, and MATH 285 , or ABE 415, or ABE 440, IE 300, or STAT 400 or equiv, CS 125 or equiv",4,4
21,ABE 425,425,Engrg Measurement Systems,"Principles of instrumentation systems, including sensing, signal conditioning, computerized data acquisition, test design, data analysis and synthesis. Additional fees may apply. See Class Schedule. 4 undergraduate hours. 4 graduate hours. Credit is not given for both ABE 425 and ME 360. Prerequisite: ECE 205.",4 hours.,ECE 205,4,4
22,ABE 440,440,Applied Statistical Methods I,"Statistical methods involving relationships between populations and samples; collection, organization, and analysis of data; and techniques in testing hypotheses with an introduction to regression, correlation, and analysis of variance limited to the completely randomized design and the randomized complete-block design. Same as CPSC 440, ANSC 440, FSHN 440, and NRES 440. 4 undergraduate hours. 4 graduate hours. Prerequisite: MATH 112 or equivalent.",4 hours.,MATH 112 or equivalent,4,4
...,...,...,...,...,...,...,...,...
10165,RLST 451,451,Postmodern Religious Thought,"Examination of postmodern religious themes, including the death of God, the critique of ontotheology, ""¿criture,"" the Face of the Other, and messianicity. Authors to be studied will include S¿ren Kierkegaard, Martin Heidegger, Franz Rosenzweig, Emmanuel Levinas, and Jacques Derrida, with attention to how their thought deconstructs the traditional boundaries between reason and faith. 3 undergraduate hours. 4 graduate hours. Prerequisite: At least one course beyond the 100 level in Philosophy or Religion.",3 OR 4 hours.,At least one course beyond the 100 level in Philosophy or Religion,3,4
10166,RLST 464,464,Modern Japanese Drama,"Modern Japanese culture as seen through drama. Special emphasis is given to the period after World War II. Readings in English supplemented by films and videotapes. Same as CWL 462, EALC 464, and THEA 487. 3 undergraduate hours. 3 or 4 graduate hours. No knowledge of Japanese required. Prerequisite: At least one course on Japanese culture or consent of instructor.",3 OR 4 hours.,At least one course on Japanese culture or consent of instructor,3,4
10167,RLST 483,483,Salvation in Islamic Thought,"Introduction to salvation in Islamic thought, with emphasis on discussions of the fate of ""Others"" (i.e. non-Muslims). Begins with a study of the origins and sources of this discourse, followed by an examination of evolving orientations from the formative to the post-formative periods. Important controversies generated along the way, including exclusivist-inclusivist, universalist-anti-universalist, and Sufi-anti-Sufi debates, will be explored. This is followed by an assessment of the new approaches to salvation in modern Islamic thought, with particular emphasis on the contemporary pluralist-inclusivist debate. Finally, alternative approaches to the topic of salvation, including reincarnation, will be examined. 3 undergraduate hours. 4 graduate hours. Prerequisite: Previous coursework on Islam or consent of instructor.",3 OR 4 hours.,Previous coursework on Islam or consent of instructor,3,4
10177,THEA 487,487,Modern Japanese Drama,"Modern Japanese culture as seen through drama. Special emphasis is given to the period after World War II. Readings in English supplemented by films and videotapes. Same as CWL 462, RLST 464, and EALC 464. 3 undergraduate hours. 3 or 4 graduate hours. No knowledge of Japanese required. Prerequisite: At least one course on Japanese culture or consent of instructor.",3 OR 4 hours.,At least one course on Japanese culture or consent of instructor,3,4


In [7]:
df_sections = (df_catalogs[["crn", "year", "term", "part_of_term", 
                           "gpa", "A+", "A", "A-", "B+", "B", "B-",
                           "C+", "C", "C-", "D+", "D", "D-", "F",
                           "section", "section_info", "section_notes", 
                           "section_attributes", "section_capp_area", 
                           "section_co_request", "section_special_approval"]]
              .drop_duplicates(subset=["crn", "year", "term"])
              .dropna(subset=["crn", "year", "term"])
              .rename(columns={"part_of_term": "partOfTerm",
                              "section_info": "sectionInfo", "section_notes": "sectionNotes",
                              "section_attributes": "sectionAttributes", "section_capp_area": "sectionCappArea",
                              "section_co_request": "sectionCoRequest", "section_special_approval": "sectionSpecialApproval"}))
#df_sections[["A+", "A", "A-", "B+", "B", "B-","C+", "C", "C-", "D+", "D", "D-", "F"]] = df_sections[["A+", "A", "A-", "B+", "B", "B-","C+", "C", "C-", "D+", "D", "D-", "F"]].fillna(0)
df_sections.to_csv("../neo4j/nodes/section_nodes.csv", index=False)

df_sections

Unnamed: 0,crn,year,term,partOfTerm,gpa,A+,A,A-,B+,B,...,D,D-,F,section,sectionInfo,sectionNotes,sectionAttributes,sectionCappArea,sectionCoRequest,sectionSpecialApproval
0,30106,2021,Spring,1,,,,,,,...,,,,AL1,,,"Social & Beh Sci - Soc Sci, and Cultural Studi...",,,
2,30107,2021,Spring,1,,,,,,,...,,,,AD1,,,"Social & Beh Sci - Soc Sci, and Cultural Studi...",,,
4,41729,2021,Spring,1,,,,,,,...,,,,AD2,,,"Social & Beh Sci - Soc Sci, and Cultural Studi...",,,
6,43832,2021,Spring,1,,,,,,,...,,,,AD3,,,"Social & Beh Sci - Soc Sci, and Cultural Studi...",,,
8,48232,2021,Spring,1,,,,,,,...,,,,AD4,,,"Social & Beh Sci - Soc Sci, and Cultural Studi...",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415063,53863,2010,Spring,1,,,,,,,...,,,,C,,,Advanced Composition course.,,,
415065,51059,2010,Spring,1,,,,,,,...,,,,U3,This section for undergraduate students for 3 ...,,,,,
415066,51060,2010,Spring,1,,,,,,,...,,,,G4,This section for graduate students for 4 credi...,Restricted to Graduate - Urbana-Champaign.,,,,
415067,30990,2010,Spring,1,,,,,,,...,,,,A,,,,,,


In [8]:
df_meetings = (df_catalogs[["crn", "year", "term",
                            "start_date", "end_date", "start_time", 
                            "end_time", "type", "type_name", "meeting",
                            "days", "room", "building"]]
               .drop_duplicates(subset=["crn", "year", "term", "meeting"])
               .dropna(subset=["crn", "year", "term", "type", "meeting"])
               .rename(columns={"start_date": "startDate", "end_date": "endDate", 
                                "start_time": "startTime", "end_time": "endTime",
                                "type": "typeId", "type_name": "name", "course": "courseId"}))
df_meetings = (df_meetings
               .set_index(["crn", "year", "term", "meeting"])
               .merge((df_catalogs[["crn", "year", "term", "meeting", "instructor"]]
                       .replace(np.nan, "")
                       .drop_duplicates()
                       .dropna()
                       .groupby(["crn", "year", "term", "meeting"])
                       .agg({"instructor": ':'.join})),
                      how="left",
                      on=["crn", "year", "term", "meeting"])
               .reset_index())
df_meetings.to_csv("../neo4j/nodes/meeting_nodes.csv", index=False)

df_meetings

Unnamed: 0,crn,year,term,meeting,startDate,endDate,startTime,endTime,typeId,name,days,room,building,instructor
0,30106,2021,Spring,0,2021-01-25Z,2021-05-05Z,01:00 PM,01:50 PM,OLC,Online Lecture,MW,,,"Tabares, L"
1,30107,2021,Spring,0,2021-01-25Z,2021-05-05Z,09:00 AM,09:50 AM,OD,Online Discussion,F,,,"Boonsripaisal, S"
2,41729,2021,Spring,0,2021-01-25Z,2021-05-05Z,10:00 AM,10:50 AM,OD,Online Discussion,F,,,"Boonsripaisal, S"
3,43832,2021,Spring,0,2021-01-25Z,2021-05-05Z,10:00 AM,10:50 AM,OD,Online Discussion,F,,,"Guruparan, A"
4,48232,2021,Spring,0,2021-01-25Z,2021-05-05Z,11:00 AM,11:50 AM,OD,Online Discussion,F,,,"Guruparan, A"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285452,53863,2010,Spring,1,2010-01-19Z,2010-05-05Z,02:00 PM,03:15 PM,LEC,Lecture,T,122,1203 1/2 W Nevada,"O'Connor, J"
285453,51059,2010,Spring,0,2010-01-19Z,2010-05-05Z,02:00 PM,04:50 PM,LCD,Lecture-Discussion,M,313,Davenport Hall,"Harris, R"
285454,51060,2010,Spring,0,2010-01-19Z,2010-05-05Z,02:00 PM,04:50 PM,LCD,Lecture-Discussion,M,313,Davenport Hall,"Harris, R"
285455,30990,2010,Spring,0,2010-01-19Z,2010-05-05Z,10:00 AM,10:50 AM,LCD,Lecture-Discussion,MTWR,243,Armory,"Bokamba, E:Hlongwa, T"


In [10]:
df_sections_meetings = (df_catalogs[["crn", "year", "term", "part_of_term", 
                                     "gpa", "A+", "A", "A-", "B+", "B", "B-",
                                     "C+", "C", "C-", "D+", "D", "D-", "F",
                                     "start_date", "end_date", "start_time", 
                                     "end_time", "type", "type_name",
                                     "days", "room", "building", "meeting",
                                     "section", "section_info", "section_notes", 
                                     "section_attributes", "section_capp_area", 
                                     "section_co_request", "section_special_approval"]]
                            .drop_duplicates()
                            .dropna(subset=["crn", "section", "year", "term", "part_of_term", "type"])
                            .rename(columns={"part_of_term": "partOfTerm",
                                             "section_info": "sectionInfo", 
                                             "section_notes": "sectionNotes",
                                             "section_attributes": "sectionAttributes", 
                                             "section_capp_area": "sectionCappArea",
                                             "section_co_request": "sectionCoRequest", 
                                             "section_special_approval": "sectionSpecialApproval",
                                             "start_date": "startDate", "end_date": "endDate", 
                                             "start_time": "startTime", "end_time": "endTime",
                                             "type": "typeId", "type_name": "name"}))
df_sections_meetings.to_csv("../neo4j/sections_meetings.csv", index=False)
df_sections_meetings

Unnamed: 0,crn,year,term,partOfTerm,gpa,A+,A,A-,B+,B,...,room,building,meeting,section,sectionInfo,sectionNotes,sectionAttributes,sectionCappArea,sectionCoRequest,sectionSpecialApproval
0,30106,2021,Spring,1,,,,,,,...,,,0,AL1,,,"Social & Beh Sci - Soc Sci, and Cultural Studi...",,,
2,30107,2021,Spring,1,,,,,,,...,,,0,AD1,,,"Social & Beh Sci - Soc Sci, and Cultural Studi...",,,
4,41729,2021,Spring,1,,,,,,,...,,,0,AD2,,,"Social & Beh Sci - Soc Sci, and Cultural Studi...",,,
6,43832,2021,Spring,1,,,,,,,...,,,0,AD3,,,"Social & Beh Sci - Soc Sci, and Cultural Studi...",,,
8,48232,2021,Spring,1,,,,,,,...,,,0,AD4,,,"Social & Beh Sci - Soc Sci, and Cultural Studi...",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415064,53863,2010,Spring,1,,,,,,,...,122,1203 1/2 W Nevada,1,C,,,Advanced Composition course.,,,
415065,51059,2010,Spring,1,,,,,,,...,313,Davenport Hall,0,U3,This section for undergraduate students for 3 ...,,,,,
415066,51060,2010,Spring,1,,,,,,,...,313,Davenport Hall,0,G4,This section for graduate students for 4 credi...,Restricted to Graduate - Urbana-Champaign.,,,,
415067,30990,2010,Spring,1,,,,,,,...,243,Armory,0,A,,,,,,


In [11]:
df_instructors = (df_catalogs[["instructor"]]
                  .drop_duplicates(ignore_index=True)
                  .dropna()
                  .rename(columns={"instructor": "name"}))
df_instructors.to_csv("../neo4j/nodes/instructor_nodes.csv", index=False)
df_instructors

Unnamed: 0,name
0,"Tabares, L"
1,"Boonsripaisal, S"
2,"Guruparan, A"
3,"Kang, Y"
4,"Wang, Y"
...,...
16538,"Pope, B"
16539,"Shchur, O"
16540,"Naughton, J"
16541,"Troutt, H"


In [12]:
df_gen_eds = (df_catalogs[["gen_ed", "gen_ed_name"]]
             .drop_duplicates(ignore_index=True)
             .dropna()
             .rename(columns={"gen_ed": "genEdId", "gen_ed_name": "name"}))
df_gen_eds.to_csv("../neo4j/nodes/gen_ed_nodes.csv", index=False)
df_gen_eds

Unnamed: 0,genEdId,name
0,1US,Cultural Studies - US Minority
1,1SS,Social & Beh Sci - Soc Sci
2,1HP,Humanities – Hist & Phil
4,1CLL,Advanced Composition
6,1NW,Cultural Studies - Non-West
7,1QR1,Quantitative Reasoning I
9,1LS,Nat Sci & Tech - Life Sciences
11,1WCC,Cultural Studies - Western
13,1LA,Humanities – Lit & Arts
18,1PS,Nat Sci & Tech - Phys Sciences


In [13]:
df_colleges_to_subjects = (df_catalogs[["college", "subject"]]
                            .drop_duplicates(ignore_index=True)
                            .dropna()
                            .rename(columns={"college": "collegeId", "subject": "subjectId"}))
df_colleges_to_subjects.to_csv("../neo4j/relationships/colleges_to_subjects.csv", index=False)
df_colleges_to_subjects

Unnamed: 0,collegeId,subjectId
0,KV,AAS
1,KL,ABE
2,KM,ACCY
3,KL,ACE
4,KL,ACES
...,...,...
211,KV,CINE
212,KT,MS
213,KV,ESES
214,KV,LGLA


In [14]:
df_subjects_to_courses = (df_catalogs[["subject", "course"]]
                         .drop_duplicates(ignore_index=True)
                         .dropna()
                         .rename(columns={"subject": "subjectId", "course": "courseId"}))
df_subjects_to_courses.to_csv("../neo4j/relationships/subjects_to_courses.csv", index=False)
df_subjects_to_courses

Unnamed: 0,subjectId,courseId
0,AAS,AAS 100
1,AAS,AAS 201
2,AAS,AAS 215
3,AAS,AAS 246
4,AAS,AAS 258
...,...,...
10198,VCM,VCM 658
10199,VCM,VCM 683
10200,VCM,VCM 684
10201,VCM,VCM 691


In [15]:
df_courses_to_sections = (df_catalogs[["course", "crn", "year", "term"]]
                         .drop_duplicates(ignore_index=True)
                         .dropna()
                         .rename(columns={"course": "courseId"}))
df_courses_to_sections.to_csv("../neo4j/relationships/courses_to_sections.csv", index=False)
df_courses_to_sections

Unnamed: 0,courseId,crn,year,term
0,AAS 100,30106,2021,Spring
1,AAS 100,30107,2021,Spring
2,AAS 100,41729,2021,Spring
3,AAS 100,43832,2021,Spring
4,AAS 100,48232,2021,Spring
...,...,...,...,...
269200,WRIT 303,53863,2010,Spring
269201,YDSH 420,51059,2010,Spring
269202,YDSH 420,51060,2010,Spring
269203,ZULU 404,30990,2010,Spring


In [None]:
df_gen_eds_to_courses = (df_catalogs[["course", "gen_ed"]]
                        .drop_duplicates(ignore_index=True)
                        .dropna()
                        .rename(columns={"course": "courseId", "gen_ed": "genEdId"}))
df_gen_eds_to_courses.to_csv("../neo4j/relationships/gen_eds_to_courses.csv", index=False)
df_gen_eds_to_courses

In [None]:
df_instructors_to_meetings = (df_catalogs[["year", "term", "part_of_term", "crn", "type", "meeting", "instructor"]]
                              .drop_duplicates(ignore_index=True)
                              .dropna()
                              .rename(columns={"part_of_term": "partOfTerm", "type": "typeId"}))
df_instructors_to_meetings.to_csv("../neo4j/relationships/instructors_to_meetings.csv", index=False)
df_instructors_to_meetings