<a href="https://colab.research.google.com/github/SjSterling/Cosmology/blob/main/credithour.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
from bs4 import BeautifulSoup

start_url = "http://catalog.valdosta.edu/undergraduate/courses-instruction/"

def get_links(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    links = soup.select('a[href^="/undergraduate/courses-instruction/"]')
    return [(link["href"], link.text.strip()) for link in links]

def scrape_class_data(url, name):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    class_blocks = soup.select('div.courseblock')
    class_data_list = []
    for class_block in class_blocks:
        title_element = class_block.select_one('p.courseblocktitle')
        title = title_element.text.strip() if title_element else "No Title Available"

        description_element = class_block.select_one('p.courseblockdesc')
        description = description_element.text.strip() if description_element else "No Description Available"

        # Extract the course code and credit hours from the title
        # Assuming the title format is like "ASTR 1010K - Astronomy of the Solar System (3-0-3)"
        title_parts = title.split('-')
        if len(title_parts) >= 2:
            course_code = title_parts[0].strip()
            credit_hours = title_parts[-1].strip().split('(')[-1].split(')')[0].strip()
        else:
            course_code = "No Course Code Available"
            credit_hours = "0"  # Default to 0 credit hours

        class_data = {
            'Name': name,
            'Course Code': course_code,
            'Title': title,
            'Description': description,
            'Credit Hours': credit_hours,
        }
        class_data_list.append(class_data)
    return class_data_list

def main():
    links = get_links(start_url)
    all_class_data_list = []
    for link, name in links:
        full_url = f"http://catalog.valdosta.edu{link}"
        class_data_list = scrape_class_data(full_url, name)
        all_class_data_list.extend(class_data_list)

    # Create a dictionary with course code as key and credit hours as value
    course_credit_hours = {data['Course Code']: data['Credit Hours'] for data in all_class_data_list}

    # Print the course code and credit hours for reference
    for course_code, credit_hours in course_credit_hours.items():
        print(f"Course Code: {course_code}, Credit Hours: {credit_hours}")

if __name__ == "__main__":
    main()
