In [15]:
import numpy as np
import plotly

def add_classes(self, additional_classes):
    self.classes = np.append(self.classes, additional_classes)
    self.classes = np.unique(self.classes)
    return self.classes

def assign_attributes(self, **kwargs):
    for key in kwargs:
        setattr(self, key, kwargs[key])

class Course:

    '''
    Recommended kwargs: "department_number", "name", "class_times", "terms_offered", "years_offered"
    '''

    def __init__(self, **kwargs):
        assign_attributes(self, **kwargs)
        
class Major:

    '''
    Each major is an object with the following attributes:

    name: string of the name of major

    requirements: dictionary of requirements for the major.

    only_minor: boolean, True if this object is only a minor, False otherwise

    abbreviation: string of the abbreviation of major

    required_course: numpy array of all the courses that can 
    be used to satify that specific requirement. 
    '''

    def __init__(self, name, only_minor, **kwargs):
        self.name = str(name)
        self.only_minor = only_minor
        self.requirements = {}
        assign_attributes(self, **kwargs)

    def add_requirements(self, **kwargs):
        for key in kwargs:
            self.requirements[key] = kwargs[key]

            

In [16]:
class Schedule:
    def __init__(self, time_constraints):
        self.time_constraints = time_constraints
        self.terms = np.array([])

        for i in range():
            self.terms.append(Term(time_constraints))
    
    def add_terms(self, additional_terms):
        np.append(self.terms, additional_terms)
    
    #def assign_classes(term):

In [17]:
'''
This next codeblock will use TQFRs and the major requirements 
to create a schedule for a given major.
'''

class Catalog:
    def __init__(self, file_path, want_entire_text, **kwargs):
        self.file_path = file_path
        self.want_entire_text = want_entire_text
        assign_attributes(self, **kwargs)
        if (want_entire_text):
            import PyPDF2
            pdf = open(file_path, 'rb')
            pdf_reader = PyPDF2.PdfFileReader(pdf)
            pdf_string = ""
            for page in range(pdf_reader.numPages):
                pdf_string += pdf_reader.getPage(page).extractText()
            self.entire_text = pdf_string
            self.entire_text = self.entire_text.replace('\n', ' ')
            self.entire_text = self.entire_text.replace('\r', ' ')
            self.entire_text = self.entire_text.replace('\t', ' ')

    def make_list_of_all_majors(self, list_of_all_majors_url, **kwargs):
        self.list_of_all_majors_url = list_of_all_majors_url
        '''
        pattern and major_preface are optional arguments.
        '''
        #This function will webscrape from a given URL
        #and return a list of all the majors in the catalog
        assign_attributes(self, **kwargs)
        from urllib.request import urlopen
        page = urlopen(list_of_all_majors_url)
        html = page.read().decode('utf-8')
        catalog.html = html
        try: 
            split_html = html.split(self.pattern)
        except:
            self.pattern = 'sidebar-menu-block__level-3__item'
            split_html = html.split(self.pattern)
        split_html = np.delete(split_html, 0)  
        list_of_all_majors = np.array([])
        try: 
            for substring in split_html:
                major = substring.split(self.major_preface)[1].split('\n')[0]
                list_of_all_majors = np.append(list_of_all_majors, major)
        except:
            self.major_preface = ">\n                          "
            for substring in split_html:
                major = substring.split(self.major_preface)[1].split('\n')[0]
                list_of_all_majors = np.append(list_of_all_majors, major)
        self.list_of_all_majors = list_of_all_majors


    def get_all_snippets(self):
        #need entire_text and list of all majors to return a list of all the major snippets
        all_snippets = np.array([])
       
        for major_index in range(len(self.list_of_all_majors)-1):
            try: 
                beginning = self.entire_text.find(self.list_of_all_majors[major_index] + "Option")
                end = self.entire_text.find(self.list_of_all_majors[major_index+1] + "Option")
                snippet = self.entire_text[beginning - 1: end + 1]
            except:
                snippet = self.entire_text.split(major + "Minor")[0]
            all_snippets = np.append(all_snippets, snippet)
        self.all_snippets = all_snippets

astro = Major(abbreviation = 'Ay', name = 'Astrophysics', only_minor= False)

catalog = Catalog(file_path =
    'C:/Users/engin/Documents/GitHub/Computing-and-Formatting/catalogUGinfo.pdf', 
    want_entire_text = False)
catalog.make_list_of_all_majors(list_of_all_majors_url = 
    "https://www.admissions.caltech.edu/explore/academics/majors-minors")
print(catalog.list_of_all_majors)
#catalog.get_all_snippets()

'''
Make array of all major objects using that webpage from Caltech. Then use their official names
and whether they are a major or minor to find their snippet in the catalog. 
Then parse each snippet to list all of the major requirements. 
'''



['Aerospace' 'Applied and Computational Mathematics' 'Applied Physics'
 'Astrophysics' 'Bioengineering' 'Biology'
 'Business, Economics, and Management' 'Chemical Engineering' 'Chemistry'
 'Computational and Neural Systems' 'Computer Science'
 'Control and Dynamical Systems' 'Economics' 'Electrical Engineering'
 'English' 'Engineering and Applied Science'
 'Geobiology, Geochemistry, Geology, Geophysics, and Planetary Science'
 'History' 'History and Philosophy of Science'
 'Information and Data Sciences' 'Materials Science' 'Mathematics'
 'Mechanical Engineering' 'Philosophy' 'Physics' 'Political Science'
 'Structural Mechanics']


'\nMake array of all major objects using that webpage from Caltech. Then use their official names\nand whether they are a major or minor to find their snippet in the catalog. \nThen parse each snippet to list all of the major requirements. \n'

In [23]:
import re

def get_indices_of_substring(string, substring):
    indices_object = re.finditer(pattern=substring, string=string)
    indices = [index.start() for index in indices_object]
    return indices

minor_index = get_indices_of_substring(catalog.html, '<b>Minors</b>')
print(minor_index)

major_indices_dict = {}

for major in catalog.list_of_all_majors:
    major_indices_dict[major] = get_indices_of_substring(catalog.html, major)
major_indices_dict

[161, 3361, 3467, 141685, 160956, 172120, 172217, 174071, 174328, 174514, 189405]


{'Aerospace': [174946, 189481],
 'Applied and Computational Mathematics': [175250, 186672],
 'Applied Physics': [175560, 186781],
 'Astrophysics': [175845, 186865],
 'Bioengineering': [176129, 186948],
 'Biology': [176408, 185841, 187026],
 'Business, Economics, and Management': [176706],
 'Chemical Engineering': [177019, 185952, 187236],
 'Chemistry': [177306, 185938, 187322, 189556],
 'Computational and Neural Systems': [177605, 187420],
 'Computer Science': [177911, 187525, 189638],
 'Control and Dynamical Systems': [178214, 189740],
 'Economics': [176716, 178510, 187133, 187607],
 'Electrical Engineering': [178799, 187695],
 'English': [179086, 187900, 189833],
 'Engineering and Applied Science': [179382, 186031, 187805],
 'Geobiology, Geochemistry, Geology, Geophysics, and Planetary Science': [179735],
 'History': [180068, 180363, 188494, 188588, 190182, 190276],
 'History and Philosophy of Science': [180363, 188588, 190276],
 'Information and Data Sciences': [180683, 188707, 1903

In [None]:
get_indices_of_substring(major, catalog.html)

<h2><b>Majors</b></h2><ul><li><a href="/explore/academics/majors-minors/applied-and-computational-mathematics">Applied and Computational Mathematics</a></li><li><a href="/explore/academics/majors-minors/applied-physics">Applied Physics</a></li><li><a href="/explore/academics/majors-minors/astrophysics">Astrophysics</a></li><li><a href="/explore/academics/majors-minors/bioengineering">Bioengineering</a></li><li><a href="/explore/academics/majors-minors/biology">Biology</a></li><li><a href="/explore/academics/majors-minors/business-economics-and-management">Business, Economics &amp; Management</a></li><li><a href="/explore/academics/majors-minors/chemical-engineering">Chemical Engineering</a></li><li><a href="/explore/academics/majors-minors/chemistry">Chemistry</a></li><li><a href="/explore/academics/majors-minors/computational-and-neural-systems">Computational and Neural Systems</a></li><li><a href="/explore/academics/majors-minors/computer-science">Computer Science</a></li><li><a href="/explore/academics/majors-minors/economics">Economics</a></li><li><a href="/explore/academics/majors-minors/electrical-engineering">Electrical Engineering</a></li><li><a href="/explore/academics/majors-minors/engineering-and-applied-science">Engineering and Applied Science</a></li><li><a href="/explore/academics/majors-minors/english">English</a></li><li><a href="/explore/academics/majors-minors/geobiology-geochemistry-geology-geophysics-and-planetary-science">Geobiology</a></li><li><a href="/explore/academics/majors-minors/geobiology-geochemistry-geology-geophysics-and-planetary-science">Geochemistry</a></li><li><a href="/explore/academics/majors-minors/geobiology-geochemistry-geology-geophysics-and-planetary-science">Geology</a></li><li><a href="/explore/academics/majors-minors/geobiology-geochemistry-geology-geophysics-and-planetary-science">Geophysics</a></li><li><a href="/explore/academics/majors-minors/history">History</a></li><li><a href="/explore/academics/majors-minors/history-and-philosophy-science">History and Philosophy of Science</a></li><li><a href="/explore/academics/majors-minors/information-and-data-sciences">Information and Data Sciences</a></li><li><a href="/explore/academics/majors-minors/materials-science">Materials Science</a></li><li><a href="/explore/academics/majors-minors/mathematics">Mathematics</a></li><li><a href="/explore/academics/majors-minors/mechanical-engineering">Mechanical Engineering</a></li><li><a href="/explore/academics/majors-minors/philosophy">Philosophy</a></li><li><a href="/explore/academics/majors-minors/physics">Physics</a></li><li><a href="/explore/academics/majors-minors/geobiology-geochemistry-geology-geophysics-and-planetary-science">Planetary Science</a></li><li><a href="/explore/academics/majors-minors/political-science">Political Science</a></li></ul><h2><b>Minors</b></h2><ul><li><a href="/explore/academics/majors-minors/aerospace">Aerospace</a></li><li><a href="/explore/academics/majors-minors/chemistry">Chemistry</a></li><li><a href="/explore/academics/majors-minors/computer-science">Computer Science</a></li><li><a href="/explore/academics/majors-minors/control-and-dynamical-systems">Control and Dynamical Systems</a></li><li><a href="/explore/academics/majors-minors/english">English</a></li><li><a href="/explore/academics/majors-minors/engineering-and-applied-science">Environmental Science and Engineering</a></li><li><a href="/explore/academics/majors-minors/geobiology-geochemistry-geology-geophysics-and-planetary-science">Geological and Planetary Systems</a></li><li><a href="/explore/academics/majors-minors/history">History</a></li><li><a href="/explore/academics/majors-minors/history-and-philosophy-science">History and Philosophy of Science</a></li><li><a href="/explore/academics/majors-minors/information-and-data-sciences">Information and Data Sciences</a></li><li><a href="/explore/academics/majors-minors/philosophy">Philosophy</a></li><li><a href="/explore/academics/majors-minors/structural-mechanics">Structural Mechanics</a></li></ul>
