## Raleigh Event Webscraper
### -Dhillon Patel

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

### Web scraper for Goodnights Comedy Club

In [2]:
# URL of the Goodnights Comedy events page
url = "https://www.goodnightscomedy.com/events"

# Add crawl delay to avoid overloading website
sleep_time = 5
if type(sleep_time) == int:
    time.sleep(sleep_time)

# Make a request to fetch the webpage content
response = requests.get(url)


if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all event items on the page by targeting the 'event-list-item' class
    events = soup.find_all('div', class_='event-list-item')

    # Initialize a list to hold the extracted event details
    event_list = []

    # Loop through each event item to extract relevant details
    for event in events:

        # Extract the title of the event from the <h3> tag with the class 'el-header'
        title_element = event.find('h3', class_='el-header')

        # Extract the date and time of the event from the <h6> tag with the class 'event-date'
        # The HTML includes both date and time in the same tag
        time_element = event.find('h6', class_='event-date')

        # Extract the ticket link from an <a> tag containing the text 'Buy Tickets'
        ticket_link_element = event.find('a', string='Buy Tickets')

        # Clean and extract text or provide a default value if the element is missing
        title = title_element.text.strip() if title_element else "None Found"
        date = time_element.text.strip().split(",")[0].strip() if time_element else "None Found"  # Extract date
        time = time_element.text.strip().split(",")[1].strip() if time_element else "None Found"  # Extract time
        ticket_link = ticket_link_element['href'] if ticket_link_element else "None Found"  # Extract link

        # Append the extracted details as a dictionary to the list
        event_list.append({
            "Title": title,  # Event title
            "Date": date,    # Event date
            "Time": time,    # Event time
            # Construct the full ticket URL if it exists; otherwise, keep it as 'None'
            "Ticket Link": f"https://www.goodnightscomedy.com{ticket_link}" if ticket_link != "None Found" else ticket_link
        })

    # Convert the list of extracted event details into a pandas DataFrame
    events_df = pd.DataFrame(event_list)

    # Display the DataFrame to the user
    display(events_df)

else:
    # Print an error message if the webpage could not be fetched successfully
    print("Failed to retrieve the webpage. Please check the URL or try again later.")

Unnamed: 0,Title,Date,Time,Ticket Link
0,Special Event: Natasha Leggero,None Found,None Found,https://www.goodnightscomedy.com/events/104690
1,Special Event: New Year's Eve with Gabriel Rut...,None Found,None Found,https://www.goodnightscomedy.com/events/100750
2,Room 861 at Goodnights: New Year's Eve with Re...,None Found,None Found,https://www.goodnightscomedy.com/events/106166
3,Special Event: Frankie Quiñones,None Found,None Found,https://www.goodnightscomedy.com/events/100793
4,Room 861 at Goodnights: James Adomian,None Found,None Found,https://www.goodnightscomedy.com/events/102003
5,Special Event: Cat Ce,Wed Jan 8 2025,7:15 PM,https://www.goodnightscomedy.com/shows/278619
6,Goodnights Presents: Zainab Johnson,Thu Jan 9 2025,8:00 PM,https://www.goodnightscomedy.com/shows/267654
7,Room 861 at Goodnights: Joe Machi,None Found,None Found,https://www.goodnightscomedy.com/events/106323
8,Special Event: DC Curry,None Found,None Found,https://www.goodnightscomedy.com/events/100796
9,Special Event: Casey Rocket,None Found,None Found,https://www.goodnightscomedy.com/events/101744


### Web scraper for Lenovo Center

In [3]:
import time
# URL of the Lenovo Center events page
url2 = "https://www.lenovocenter.com/events"

# Add crawl delay to avoid overloading website
sleep_time = 5
if type(sleep_time) == int:
    time.sleep(sleep_time)

# Make a request to fetch the webpage content
response2 = requests.get(url2)


if response2.status_code == 200:
    soup2 = BeautifulSoup(response2.content, 'html.parser')

    # Find all event items on the page by targeting the class 'eventItem'
    events2 = soup2.find_all('div', class_='eventItem')

    # Initialize a list to hold the extracted event details
    event_list2 = []

    # Loop through each event item to extract relevant details
    for event2 in events2:

        # Extract the event title by finding the <h3> tag with the class 'title'
        title = event2.find('h3', class_='title').text.strip()

        # Extract the event date by finding the <span> tag with the class 'm-date__singleDate'
        date = event2.find('span', class_='m-date__singleDate').text.strip()

        # Extract the event time from the <h5> tag with the class 'time'
        time_element = event2.find('h5', class_='time')  # Find the time element
        time = time_element.text.strip().split("  ")[1].strip()  # Clean and split text to extract time

        # Extract the ticket link from the <a> tag with the class 'more'
        link = event2.find('a', class_='more')['href']

        # Append the extracted details as a dictionary to the list
        event_list2.append({
            "Title": title,          # Event title
            "Date": date,            # Event date
            "Time": time,            # Event time
            "Ticket Link": link      # Link to buy tickets
        })

    # Convert the list of extracted event details into a pandas DataFrame for easier manipulation
    events_df2 = pd.DataFrame(event_list2)

    # Display the DataFrame to the user
    display(events_df2)

else:
    # Print an error message if the webpage could not be fetched successfully
    print("Failed to retrieve the webpage. Please check the URL or try again later.")


Unnamed: 0,Title,Date,Time,Ticket Link
0,New Jersey Devils vs. Carolina Hurricanes,Dec 28 / 2024,7:00 PM,https://www.lenovocenter.com/events/detail/new...
1,Minnesota Wild vs. Carolina Hurricanes,Jan 04 / 2025,7:00 PM,https://www.lenovocenter.com/events/detail/min...
2,Pittsburgh Penguins vs. Carolina Hurricanes,Jan 05 / 2025,6:00 PM,https://www.lenovocenter.com/events/detail/pit...
3,Notre Dame vs. NC State,Jan 08 / 2025,7:00 PM,https://www.lenovocenter.com/events/detail/not...
4,Toronto Maple Leafs vs. Carolina Hurricanes,Jan 09 / 2025,7:30 PM,https://www.lenovocenter.com/events/detail/tor...
5,Vancouver Canucks vs. Carolina Hurricanes,Jan 10 / 2025,7:00 PM,https://www.lenovocenter.com/events/detail/van...
6,North Carolina vs. NC State,Jan 11 / 2025,4:00 PM,https://www.lenovocenter.com/events/detail/nor...
7,Anaheim Ducks vs. Carolina Hurricanes,Jan 12 / 2025,5:00 PM,https://www.lenovocenter.com/events/detail/ana...
8,2025 Hurricanes Alumni Game,Jan 13 / 2025,7:00 PM,https://www.lenovocenter.com/events/detail/202...
9,Vegas Golden Knights vs. Carolina Hurricanes,Jan 17 / 2025,7:00 PM,https://www.lenovocenter.com/events/detail/veg...


### Web scraper for Coastal Credit Union Music Park

In [6]:
import time
# URL of the site
url3 = "https://www.coastalcreditunionmusicpark.com/shows"

# Add crawl delay to avoid overloading website
sleep_time = 5
if type(sleep_time) == int:
    time.sleep(sleep_time)


response3 = requests.get(url3)
soup3 = BeautifulSoup(response3.text, 'html.parser')


if response3.status_code == 200:
    # Find all event cards on the page targeting the class 'chakra-card css-1jq78gh'
    events3 = soup3.find_all('div', class_='chakra-card css-1jq78gh')
    event_list3 = []  # Initialize an empty list to store event details

    # Loop through each event card to extract the relevant content
    for event3 in events3:

        # Add a 5-second delay before processing each event
        time.sleep(5)

        # Extract the name of the event
        name3 = event3.find('p', class_='chakra-text css-zvlevn').string

        # Extract the date of the event
        date3 = event3.find('p', class_='chakra-text css-l141uf').string

        # Extract the ticket link for the event
        ticket_link3 = event3.find('a', string='Buy Tickets')['href']

        # Placeholder for time extraction, update the class name when known
        time_element3 = event3.find('', class_="")  # No time class specified

        # Append the extracted details as a dictionary to the list
        event_list3.append({
            "Title": name3,          # Event title
            "Date": date3,           # Event date
            "Time": time_element3,   # Event time (currently not specified)
            "Ticket Link": ticket_link3  # Link to buy tickets
        })

    # Convert the list of events into a pandas DataFrame for easier manipulation and analysis
    events_df3 = pd.DataFrame(event_list3)

    # Display the DataFrame to the user
    display(events_df3)
else:
    # Print an error message if the request failed
    print("Failed to retrieve the webpage. Please check the URL or try again later.")


Unnamed: 0,Title,Date,Time,Ticket Link
0,Tyler Childers: On The Road,"Tue Apr 15, 2025",,https://concerts.livenation.com/tyler-childers...
1,RÜFÜS DU SOL Inhale / Exhale World Tour 2025,"Fri May 9, 2025",,https://concerts.livenation.com/rufus-du-sol-i...
2,Keith Urban: High and Alive World Tour,"Sat May 31, 2025",,https://concerts.livenation.com/keith-urban-hi...
3,Riley Green: Damn Country Music Tour,"Sat Jun 14, 2025",,https://concerts.livenation.com/riley-green-da...
4,Avril Lavigne: The Greatest Hits,"Tue Jun 17, 2025",,https://concerts.livenation.com/avril-lavigne-...
5,Rod Stewart: One Last Time,"Tue Jul 8, 2025",,https://concerts.livenation.com/rod-stewart-on...
6,Styx & Kevin Cronin + Don Felder The Brotherho...,"Wed Jul 9, 2025",,https://concerts.livenation.com/styx-kevin-cro...
7,Thomas Rhett: Better In Boots Tour 2025,"Thu Jul 31, 2025",,https://concerts.livenation.com/thomas-rhett-b...
8,"Nelly with Ja Rule, Eve and Special Guests: Wh...","Sat Sep 13, 2025",,https://concerts.livenation.com/nelly-with-ja-...
9,2025 Premium Season Tickets Wait List,"Wed Dec 31, 2025",,https://premium.livenation.com/venue/coastal-c...


### Concatenated Dataframe with the 3 websites.

In [5]:
allevents = pd.concat([events_df, events_df2, events_df3], ignore_index=True)

# Display the combined DataFrame
display(allevents)
allevents.to_csv('Events.csv')


Unnamed: 0,Title,Date,Time,Ticket Link
0,Special Event: Natasha Leggero,None Found,None Found,https://www.goodnightscomedy.com/events/104690
1,Special Event: New Year's Eve with Gabriel Rut...,None Found,None Found,https://www.goodnightscomedy.com/events/100750
2,Room 861 at Goodnights: New Year's Eve with Re...,None Found,None Found,https://www.goodnightscomedy.com/events/106166
3,Special Event: Frankie Quiñones,None Found,None Found,https://www.goodnightscomedy.com/events/100793
4,Room 861 at Goodnights: James Adomian,None Found,None Found,https://www.goodnightscomedy.com/events/102003
...,...,...,...,...
66,Rod Stewart: One Last Time,"Tue Jul 8, 2025",,https://concerts.livenation.com/rod-stewart-on...
67,Styx & Kevin Cronin + Don Felder The Brotherho...,"Wed Jul 9, 2025",,https://concerts.livenation.com/styx-kevin-cro...
68,Thomas Rhett: Better In Boots Tour 2025,"Thu Jul 31, 2025",,https://concerts.livenation.com/thomas-rhett-b...
69,"Nelly with Ja Rule, Eve and Special Guests: Wh...","Sat Sep 13, 2025",,https://concerts.livenation.com/nelly-with-ja-...
