In [10]:
# Install required packages
pip install pandas sqlalchemy mysqlclient



In [11]:
import requests  # For making HTTP requests
from bs4 import BeautifulSoup  # For parsing HTML
import pandas as pd  # For working with dataframes
from sqlalchemy import create_engine  # For creating a database engine


In [12]:
# List of Ticketmaster URLs with event IDs based on the artist
ticketmaster_urls = [
    ('https://www.ticketmaster.com/imagine-dragons-tickets/artist/1435919#reviews', '1435919'),
    ('https://www.ticketmaster.com/pnk-tickets/artist/718655#reviews', '718655'),
    ('https://www.ticketmaster.com/aventura-tickets/artist/973009#reviews', '973009')
]

In [13]:
## Rating element
## <span class="VisuallyHidden-sc-8buqks-0 lmhoCy">Rating: 5 out of 5</span>

## Review element
## <p class="sc-1xx96t9-2 hzQyrn">If you haven’t seen then shame… really really top notch. Tight , smooth and Dan is mesmerizing. Hope to see you guys soon Peace and Love</p>

In [15]:
# Function to scrape reviews and return them as a list of dictionaries
def scrape_ticketmaster_reviews(url, event_id):
    reviews_list = []  # List to store reviews
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        reviews = soup.find_all('p', class_='sc-1xx96t9-2 hzQyrn')
        for review in reviews:
            # Append each review as a dictionary to the list
            reviews_list.append({"Artist_ID": event_id, "Review": review.text.strip()})
    except requests.RequestException:
        print(f"Failed to fetch reviews for event ID {event_id}")
    return reviews_list

# List to collect all reviews
all_reviews = []

# Loop over each URL and event ID to scrape reviews
for url, event_id in ticketmaster_urls:
    # Append reviews from each URL to the all_reviews list
    all_reviews.extend(scrape_ticketmaster_reviews(url, event_id))

# Convert list of dictionaries to DataFrame
df_reviews = pd.DataFrame(all_reviews)

# Display the DataFrame
df_reviews


Unnamed: 0,Artist_ID,Review
0,1435919,If you haven’t seen then shame… really really ...
1,1435919,This is by far the best concert I've ever seen...
2,1435919,Absolutely amazing if your looking for great b...
3,1435919,"Saw them in Tampa November 11, 2023. So impres..."
4,1435919,I cannot tell you how amazing Imagine Dragons ...
5,1435919,I went to the Hero& Headliners Show in Tampa. ...
6,1435919,Imagine Dragons really put everything into the...
7,1435919,"Such amazing artists, I'm so grateful to have ..."
8,1435919,Great show!!! I had so much fun! Imagine drago...
9,1435919,Everything was absolutely perfect!! Macklemore...


In [21]:
# save df to a csv file
df_reviews.to_csv('reviews.csv', index=False)

In [17]:
# Create Database Engine
def create_db_engine(user, password, host, port, database):
    return create_engine(f'mysql+mysqldb://{user}:{password}@{host}:{port}/{database}')


In [20]:
# Function to load CSV into DataFrame
def load_csv(file_path):
    return pd.read_csv(file_path)

# Function to upload DataFrame to MySQL
def upload_to_mysql(df, engine, table_name):
    df.to_sql(name=table_name, con=engine, if_exists='append', index=False, chunksize=500)

# Main script execution
if __name__ == "__main__":
    # Database credentials and connection details
    USER = 'admin'
    PASSWORD = 'isba_4715'
    HOST = 'isba-dev-01.ctpnruf7xrki.us-east-1.rds.amazonaws.com'
    PORT = '3306'
    DATABASE = 'sql_project'
    TABLE_NAME = 'Reviews'  # Make sure this table is already created in your MySQL database

    # Path to the CSV file
    CSV_FILE_PATH = '/content/reviews.csv'

    # Create a database engine
    engine = create_db_engine(USER, PASSWORD, HOST, PORT, DATABASE)

    # Load data from CSV
    dataframe = load_csv(CSV_FILE_PATH)

    # Upload data to MySQL
    upload_to_mysql(dataframe, engine, TABLE_NAME)
    print("CSV data has been uploaded successfully to MySQL!")

CSV data has been uploaded successfully to MySQL!
