In [11]:
# import relevant libraries
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import pandas as pd


In [3]:
def scrape_page(page_url):
    # fetch the webpage content
    response = requests.get(page_url)
    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return None
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    # Find all the span elements with class 'text-warning'
    spans = soup.find_all('span', class_='text-warning')
    # Extract the text from each span element
    venue_info = [span.text.strip() for span in spans]
    return venue_info

In [4]:
# scraping a single page
url = "https://sts.ug.edu.gh/timetable/thedate/2024-04-03"
venueInfo = scrape_page(url)

In [5]:
if venueInfo:
    for info in venueInfo:
        print(info)

MAIN-CAMPUS
UGCS Lab 3 - Main, UGCS Lab 4A, UGCS Lab 4B, Balme-Knowledge Commons, Balme- IAC Training Lab, UGBS GRADUATE SCHOOL LAB - Opposite PRESEC, School of Engineering Lab, Dept. of Information Studies - Lab 1, Dept. of Information Studies - Lab 2, J. R. A  ICT Centre Next to  Kofi Dra Conf. Centre (Behind Psycology Department), School of Nursing, COE-SCDE LAB, Communications Studies Lab - Opposite JQB, Akuafo Hall Lab,
MAIN-CAMPUS
UGCS Lab 3 - Main, UGCS Lab 4A, Balme-Knowledge Commons, Balme- IAC Training Lab, UGBS GRADUATE SCHOOL LAB - Opposite PRESEC, School of Engineering Lab, Dept. of Information Studies - Lab 1, Dept. of Information Studies - Lab 2, J. R. A  ICT Centre Next to  Kofi Dra Conf. Centre (Behind Psyc. Dept), School of Nursing, COE-SCDE LAB, Communications Studies Lab - Opposite JQB, Akuafo Hall Lab,
MAIN-CAMPUS
UGCS Lab 4B,
MAIN-CAMPUS
UGCS Lab 3 - Main, UGCS Lab 4A, UGCS Lab 4B, Balme-Knowledge Commons, Balme- IAC Training Lab, UGBS GRADUATE SCHOOL LAB - Opposi

In [6]:
# function for date range linked to page
def generate_dates(start_date, end_date):
    current_date = start_date
    while current_date <= end_date:
        yield current_date
        current_date += timedelta(days=1)

In [7]:
# scraping multiple pages using generate_dates()
startDate = datetime(2024, 4, 3)
endDate = datetime(2024, 4, 21)
allVenues = set()

for date in generate_dates(startDate, endDate):
    formatted_date = date.strftime('%Y-%m-%d')
    url = f"https://sts.ug.edu.gh/timetable/thedate/{formatted_date}"
    print(f"Scraping URL: {url}")
    venueInfo = scrape_page(url)
    if venueInfo:
        print(f"Date: {formatted_date}")
        for info in venueInfo:
            # Split the info string by commas and add to the set
            venues = info.split(',')
            for venue in venues:
                cleaned_venue = venue.strip()  # Remove leading/trailing whitespace
                if cleaned_venue:  
                    allVenues.add(cleaned_venue)
        print("\n")
    else:
        print(f"No venue information found for Date: {formatted_date}\n")

Scraping URL: https://sts.ug.edu.gh/timetable/thedate/2024-04-03
Date: 2024-04-03


Scraping URL: https://sts.ug.edu.gh/timetable/thedate/2024-04-04
Date: 2024-04-04


Scraping URL: https://sts.ug.edu.gh/timetable/thedate/2024-04-05
Date: 2024-04-05


Scraping URL: https://sts.ug.edu.gh/timetable/thedate/2024-04-06
Date: 2024-04-06


Scraping URL: https://sts.ug.edu.gh/timetable/thedate/2024-04-07
Date: 2024-04-07


Scraping URL: https://sts.ug.edu.gh/timetable/thedate/2024-04-08
Date: 2024-04-08


Scraping URL: https://sts.ug.edu.gh/timetable/thedate/2024-04-09
Date: 2024-04-09


Scraping URL: https://sts.ug.edu.gh/timetable/thedate/2024-04-10
No venue information found for Date: 2024-04-10

Scraping URL: https://sts.ug.edu.gh/timetable/thedate/2024-04-11
No venue information found for Date: 2024-04-11

Scraping URL: https://sts.ug.edu.gh/timetable/thedate/2024-04-12
Date: 2024-04-12


Scraping URL: https://sts.ug.edu.gh/timetable/thedate/2024-04-13
Date: 2024-04-13


Scraping URL: ht

In [8]:
print("Unique Venues:")
for venue in sorted(allVenues):
    print(venue)

Unique Venues:
Akuafo Hall Computer Lab
Akuafo Hall Lab
BALME - IAC TRAINING LAB
Balme- IAC Training Lab
Balme-IAC Training Lab
Balme-Knowledge Commons
CENTRAL CAFETERIA
CENTRL CAFETERIA
COE-SCDE LAB
CoE-SCDE Computer Lab
Communications Studies Lab - Opposite JQB
Dept. of Information Studies - Lab 1
Dept. of Information Studies - Lab 2
Dept. of Information Studies Lab 1
GCB BLDG.
GCB BUILDING
J. R. A  ICT Centre Next to  Kofi Dra Conf. Centre (Behind Psyc. Dept)
J. R. A  ICT Centre Next to  Kofi Dra Conf. Centre (Behind Psycology Department)
J. R. A  ICT Centre Next to  Kofi Drah Conf. Centre (Behind Psyc. Dept)
J.R.A ICT CENTER (NEXT TO KOFI DRA CONFRENCE CENTER - BEHIND DEPT OF PSYCH - AROUND N'BLOCK)
J.R.A. ICT Centre Next to Kofi Drah Conference Centre (Behind Psychology Dept)
JONES QUARTEY BLDG. (FIRST FLOOR)
JONES QUARTEY BLDG. (GROUND FLOOR)
JONES QUARTEY BUILDING      (FIRST FLOOR)
JONES QUARTEY BUILDING      (GROUND FLOOR)
JONES QUARTEY BUILDING     (GROUND FLOOR)
JONES QUARTE

In [12]:
uniqueVenues = sorted(allVenues)
df = pd.DataFrame(uniqueVenues, columns=["Venue"])
df.to_excel("uniqueVenues.xlsx", index=False)
print("Unique venues have been saved to 'uniqueVenues.xlsx'")

Unique venues have been saved to 'uniqueVenues.xlsx'
