In [1]:
from io import StringIO
from bs4 import BeautifulSoup, NavigableString
import pandas as pd
import requests
import re
import os
from WebScrapingProject.Schools.ai_utils import *

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [2]:
url = 'https://www.westcliff.edu/financial-aid/tuition-fees'
page = requests.get(url)

In [3]:
soup = BeautifulSoup(page.text, 'html.parser')

In [4]:
def extract_content(soup, start_string, end_string, include_end=False):
    content = ''
    start = soup.find(string=start_string)
    end = soup.find(string=end_string)
    if start and end:  # ensuring both start and end points are found
        for element in start.parent.find_all_next(string=True):  # iterate over next elements in tree
            if element == end:
                if include_end:
                    content += element.strip() + '\n'
                break  # stop iteration when reaching 'end' string
            if isinstance(element, NavigableString):
                content += element.strip() + '\n'  # add a newline character after each element

    content = re.sub('\n+', '\n', content)
    content = content.replace(",", "")

    return content

def extract_inner_string(content, start_marker, end_marker, include_end=False):
    if start_marker not in content or end_marker not in content:
        raise ValueError('Start or end marker not found in content.')
    _, _, after_start = content.partition(start_marker)
    inner_content, _, after_end = after_start.partition(end_marker)
    result_content = start_marker + inner_content
    if include_end:
        result_content += end_marker
    return result_content

In [5]:
x1 = extract_content(soup, 'The 2023-2024 Tuition & Fees', 'F-1 Students')
# print(x1)
x2 = extract_content(soup, 'The 2023-2024 Tuition & Fees', 'The 2024-2025 Tuition & Fees')
# print(x2)
x3 = extract_inner_string(x2, 'International (F-1) Students', '$0.00 per $1000', include_end=True)
# print(x3)
x_final = x1 + x3
print(x_final)

The 2023-2024 Tuition & Fees
Effective Fall Session 1 2023
International (F-1) Students
Degree Program Tuition
Bachelor Programs
120 Credits
$695 per credit hour
Master Programs
36 Credits
$795 per credit hour
Master Programs (Professional)
36 Credits
$855 per credit hour
Doctorate Programs
60 Credits
$855 per credit hour
Doctorate Programs (Professional)
60 Credits
$940 per credit hour
Certificate Program Tuition
Graduate Certificates (8 months)
12 Credits
$9540
Coding Bootcamp Undergraduate Certificate
18 Credits
$12000
Undergraduate Certificate
18 Credits
$12510
English As an Additional Language (EAL/ESL) – REAL Intensive English
$2600 per course
$5200
Undergraduate Communication Pathway
$2600 Intensive English + Concurrent Course
$9370
Graduate Communication Pathway
$2600 Intensive English + Concurrent Course
$9970
Graduate Communication Pathway
$2600 Intensive English + Concurrent Doctoral Course
$10330
Schedule of Fees
Application (non-refundable)
$50 one time
Registration (non-r

In [11]:
y1 = extract_content(soup, 'The 2024-2025 Tuition & Fees', 'F-1 Students')
# print(y1)
y2 = extract_content(soup, 'The 2024-2025 Tuition & Fees', '$0.00 per $1,000', include_end=True)
# print(y2)
y3 = extract_inner_string(y2, 'International (F-1) Students', '$0.00 per $1000', include_end=True)
# print(y3)
y_final = y1 + y3
print(y_final)

The 2024-2025 Tuition & Fees
Effective Fall Session 1 2024
International (F-1) Students
Degree Program Tuition
Bachelor Programs
120 Credits
$730 per credit hour
Master Programs
36 Credits
$819 per credit hour
Master Programs (Professional)
36 Credits
$855 per credit hour
Doctorate Programs
60 Credits
$881 per credit hour
Doctorate Programs (Professional)
60 Credits
$940 per credit hour
Certificate Program Tuition
Undergrad Leadership Cert/Bus Admin Cert
18 Credits
$13140
Undergrad TESOL Cert
18 Credits
$13140
Gcert Marketing
12 Credits
$10572
Gcert Exec. Mgmt
12 Credits
$10572
Gcert TESOL
12 Credits
$10572
UG Coding Bootcamp
24 Credits
$12000
Undergraduate Communication Pathway
$2600/REAL Course + Concurrent Course
$9580
Graduate Communication Pathway (Master level)
$2600/REAL Course + Concurrent Course
$10114
Graduate Communication Pathway (Doctoral level)
$2600/REAL Course + Concurrent Doctoral Course
$10446
Schedule of Fees
Application (non-refundable)
$50 one time
Registration (no

In [16]:
final_text = x_final + '\n\n' + y_final
print(final_text)

The 2023-2024 Tuition & Fees
Effective Fall Session 1 2023
International (F-1) Students
Degree Program Tuition
Bachelor Programs
120 Credits
$695 per credit hour
Master Programs
36 Credits
$795 per credit hour
Master Programs (Professional)
36 Credits
$855 per credit hour
Doctorate Programs
60 Credits
$855 per credit hour
Doctorate Programs (Professional)
60 Credits
$940 per credit hour
Certificate Program Tuition
Graduate Certificates (8 months)
12 Credits
$9540
Coding Bootcamp Undergraduate Certificate
18 Credits
$12000
Undergraduate Certificate
18 Credits
$12510
English As an Additional Language (EAL/ESL) – REAL Intensive English
$2600 per course
$5200
Undergraduate Communication Pathway
$2600 Intensive English + Concurrent Course
$9370
Graduate Communication Pathway
$2600 Intensive English + Concurrent Course
$9970
Graduate Communication Pathway
$2600 Intensive English + Concurrent Doctoral Course
$10330
Schedule of Fees
Application (non-refundable)
$50 one time
Registration (non-r