In [6]:
import astropy.units as units
import astropy.constants as constants
import matplotlib.pyplot as plt
import sympy as sym
import numpy as np 
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
import requests
import re
import os


def add_classes(self, additional_classes):
    self.classes = np.append(self.classes, additional_classes)
    self.classes = np.unique(self.classes)
    return self.classes

def assign_attributes(self, **kwargs):
    for key in kwargs:
        setattr(self, key, kwargs[key])

class Course:
    '''
    Recommended kwargs: "department_number", "name", "class_times", "terms_offered", "years_offered"
    '''
    def __init__(self, **kwargs):
        assign_attributes(self, **kwargs)
        
class Major:
    '''
    Each major is an object with the following attributes:

    name: string of the name of major

    requirements: dictionary of requirements for the major.

    only_minor: boolean, True if this object is only a minor, False otherwise

    abbreviation: string of the abbreviation of major

    required_course: numpy array of all the courses that can 
    be used to satify that specific requirement. 
    '''

    def __init__(self, name, only_minor, **kwargs):
        self.name = str(name)
        self.only_minor = only_minor
        self.requirements = {}
        assign_attributes(self, **kwargs)

    def add_requirements(self, **kwargs):
        for key in kwargs:
            self.requirements[key] = kwargs[key]

            

In [7]:
class Schedule:
    def __init__(self, time_constraints):
        self.time_constraints = time_constraints
        self.terms = np.array([])

        for i in range():
            self.terms.append(Term(time_constraints))
    
    def add_terms(self, additional_terms):
        np.append(self.terms, additional_terms)
    
    #def assign_classes(term):

In [23]:
'''
This next codeblock will use TQFRs and the major requirements 
to create a schedule for a given major.
'''

class Catalog:
    def __init__(self, file_path, want_entire_text, **kwargs):
        self.file_path = file_path
        self.want_entire_text = want_entire_text
        assign_attributes(self, **kwargs)
        if (want_entire_text):
            import PyPDF2
            pdf = open(file_path, 'rb')
            pdf_reader = PyPDF2.PdfFileReader(pdf)
            pdf_string = ""
            for page in range(pdf_reader.numPages):
                pdf_string += pdf_reader.getPage(page).extractText()
            self.entire_text = pdf_string
            self.entire_text = self.entire_text.replace('\n', ' ')
            self.entire_text = self.entire_text.replace('\r', ' ')
            self.entire_text = self.entire_text.replace('\t', ' ')

    def make_list_of_all_majors(self, **kwargs):
        pass

astro_cat = Catalog(file_path = "catalogUGinfo.pdf", want_entire_text = True)
major_indicator = "Option Requirements" 
track_indicators = ["Track Requirements:", "track:"]
minor_indicator = "Minor Requirements"

def get_indices_of_substring(string, substring):
    indices_object = re.finditer(pattern=substring, string=string)
    indices = [index.start() for index in indices_object]
    return np.array(indices)

major_indices = get_indices_of_substring(astro_cat.entire_text, major_indicator)[1:]
for indicator in track_indicators:
    major_indices = np.append(major_indices, 
        get_indices_of_substring(astro_cat.entire_text, indicator))
minor_indices = get_indices_of_substring(astro_cat.entire_text, minor_indicator)
white_space_indices = get_indices_of_substring(astro_cat.entire_text, ' ')


In [24]:
#find nearest period before index
def find_nearest_symbol_before(index, string, symbol_list):
    for i in range(index, 0, -1):
        if string[i] in symbol_list:
            return i + 1
    return 0

symbol_list = ['.', ':', ';', '!', '?', '-', ',', '\n', '\r', '\t',
            '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '+']

#remove whitespace at beginning of string
def remove_whitespace_at_beginning(string):
    for i in range(0, len(string)):
        if string[i] != ' ':
            return string[i:]
    return string

for index in major_indices:
    start_index = find_nearest_symbol_before(index, astro_cat.entire_text, symbol_list)
    print(remove_whitespace_at_beginning(astro_cat.entire_text[start_index:index]))

ACM 
APh 
Ay 
BE 
Bi 
BEM 
ChE 
Ch 
CNS 
CS 
Ec 
EE 
EAS 
En 
GPS 
GPS Option Requirements Geology 
Geobiology 
Geochemistry 
Geophysics 
Planetary Science 
H 
HPS 
IDS 
MS 
Ma 
ME 
PI 
Ph 
PS 
Chemistry 
Physics 
Biomolecular 
Sustainability 
Process systems 
Materials 
Computational 


In [25]:
def get_major_requirements(major, major_indicator = "Option Requirements",
    major_ending = 'Passing grades must be earned in a total of 486 units', want_list = True,
    requirement_separator = '1. ', lines_end_in_periods = False):
    major_index = get_indices_of_substring(astro_cat.entire_text, 
                major + " " + major_indicator)
                #in case the major name is a substring of another major        
    for index in major_index:
            start_index = find_nearest_symbol_before(index, astro_cat.entire_text, symbol_list)
            end_index = start_index + astro_cat.entire_text[start_index:].index(major_ending)
            requirements = remove_whitespace_at_beginning(astro_cat.entire_text[start_index:end_index])
            if (requirements[:len(major)] == major): #it's a match
                if (want_list):
                    return separate_major_requirements(requirements, requirement_separator, lines_end_in_periods)
                return requirements

def separate_major_requirements(major_requirements, requirement_separator, lines_end_in_periods = False):
    if not lines_end_in_periods:
        requirement_separator = requirement_separator[1:]
    requirements_list = []
    i = 1
    while True:
        new = major_requirements.split(str(i) + requirement_separator)
        if(len(new) < 2 or new[1] == ''):
            try: 
                requirements_list.append(new[0][0:new[0].index('.')])
            except:
                pass #the final requirement was not a complete sentence
            print(new)
            return requirements_list
        requirements_list.append(new[0])
        i += 1
        if(lines_end_in_periods):
            index = new[1].index('.')
            new[1] = new[1][0:index] + new[1][index+1:]        
        major_requirements = new[1]

#The way the sections for each minor are written are too irregular to end them cleanly (so far)
#The math major is not formatted regularly
get_major_requirements('Ay')

[' ']


['Ay Option Requirements ',
 ' Ay 20, 21, 101, 102, 30 or one term of Ay 141, Ay 31, Ma 2, Ma  3, Ph 2 abc or Ph 12 abc, Ph 125 ab, and Ph 106 abc.',
 ' Any three of Ph 3, Ph 5, Ph 6, Ph 7, or Ay 105. APh 23 and 24  taken as a pair may be substituted for one of these labs.  ',
 ' 63 additional units of Ay or Ph courses. Ph 127 a, Ph 136 bc and  one of Ph 21, Ph 22, Ph 121 abc are strongly recommended. Undergraduate Information 251',
 ' 27 additional units of science or engineering electives, of which  18 must be outside the Division of Physics, Mathematics and  Astronomy. ACM 95 ab is a popular choice. Core classes ( see  pages 240Œ244) or other introductory-level courses such as CS 1  do not count toward fulfillment of this requirement. ']

Now given a set of major requirements, we will return a schedule using the TQFRs and course schedules.

In [44]:
first_course_index = astro_cat.entire_text.index("Ae 100. Research in Aerospace")
start_index = find_nearest_symbol_before(first_course_index, astro_cat.entire_text, symbol_list)
last_course_instructor = astro_cat.entire_text.index("to feedback from peer reviewers.") 
#if this isn't the only class they teach use the find substrin function
courses_section = remove_whitespace_at_beginning(astro_cat.entire_text[first_course_index:last_course_instructor])


48743

In [57]:
start = courses_section.index("Ay 20")
end = start + courses_section[start:].index(";   ")
courses_section[start:end]

'Ay 20.  Basic Astronomy and the Galaxy.   -requisites: Ma 1 abc, Ph 1 abc or instructor™s permission.  The elec-tromagnetic spectrum and basic radiative transfer; ground and space  observing techniques; ﬁpictorial Fourier descriptionﬂ of astrophysical  485 -tions; the birth, structure, evolution, and death of stars; the structure   of-magnitude calculations and scaling arguments in order to elucidate  the physics of astrophysical phenomena. Short labs will introduce   Ay 21. Galaxies and Cosmology.  9 units (3-0-6); second term. Pre -requisites: Ma 1 abc, Ph 1 abc or instructor™s permission.  Cosmological tests; constituents of the universe, dark matter, and dark energy; ther -mal history of the universe, cosmic nucleosynthesis, recombination, and  cosmic microwave background; formation and evolution of structure   - universe; quasars and other active galactic nuclei, and their evolution'

In [66]:
#import .txt file as string
with open('C://Users//engin//Documents//GitHub//Computing-and-Formatting//wi2020schedule.txt', 'r') as myfile:
    new_schedule = myfile.read()
new_schedule = new_schedule.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')


19136

In [2]:
from multiprocessing import Process, freeze_support
from automate_web_typing import *

TabError: inconsistent use of tabs and spaces in indentation (automate_web_typing.py, line 119)

In [2]:
get_webpage_text()

opening  http://schedules.caltech.edu/WI2020-21.html
started the loop
Ran <function click_search_bar at 0x0000010E2394CD30> ( C:/Users/engin/Downloads/caltechLogoToClick.PNG ) for  5 seconds


"\r\n\r\nOffice of the Registrar - California Institute of Technology Student Affairs - California Institute of Technology\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\r\n\tCOURSE SCHEDULE \t\r\n\t\r\n\tWI 2020-21 \t\r\n\t\r\n\t  \t\r\n\t\r\n\tCaltechNotesGIF \t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\t\r\n\tKey to Abbreviations \t\tPrintable PDF Version of Schedule \t\r\n\t\t\r\n\tDEPARTMENTS \t\r\n\t\r\n\t\t\r\n\tAerospace (Ae) \t\tEngineering (E) \t\r\n\t\t\r\n\tAnthropology (An) \t\tEnglish (En) \t\r\n\t\t\r\n\tApplied & Computational Math (ACM) \t\tEnglish as a Second Language (ESL) \t\r\n\t\t\r\n\tApplied Mechanics (AM) \t\tEnvironmental Science & Engineering (ESE) \t\r\n\t\t\r\n\tApplied Physics (APh) \t\tFreshman Seminars (FS) \t\r\n\t\t\r\n\tAstrophysics (Ay) \t\tGeology (Ge) \t\r\n\t\t\r\n\tBiochemistry & Molecular Biophysics (BMB) \t\tHistory (H) \t\r\n\t\t\r\n\tBioengineering (BE) \t\tHistory and Philosophy of Science (HPS) \t\r\n\t\t\r\n\tBiology (Bi) \t\tHumanities (Hu