In [21]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the Songkick page for Lexington events this month
url = 'https://www.songkick.com/metro-areas/24580-us-lexington/july-2024'

# Fetch the webpage content
response = requests.get(url)
content = response.content

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(content, 'html.parser')

# Initialize lists to store the extracted data
artists = []
locations = []
dateTimes = []
artist_links = []
artist_images = []

# Extract event details
for event in soup.find_all('li', class_='event-listings-element'):
    
    # Extract artist name
    artist_tag = event.find('p', class_='artists')
    artist_name = artist_tag.strong.get_text(strip=True) if artist_tag else None
    
    #extract artist link
    artist_link_tag = artist_tag.find('a') if artist_tag else None
    artist_link = f"https://www.songkick.com{artist_link_tag['href']}" if artist_link_tag else None

    # Extract event location
    locate_tag = event.find('p', class_='location')
    locate_name = locate_tag.get_text(strip=True) if locate_tag else None
    
    # Extract date and time
    time_element = event.find('time')
    if time_element:
        datetime_value = time_element['datetime']
        time_text = time_element.get_text(strip=True)
    else:
        datetime_value = 'N/A'
        time_text = 'N/A'
        
   # Extract artist image
    image_tag = event.find('a', class_='thumb')
    image_url = image_tag['data-src'] if image_tag and 'data-src' in image_tag.attrs else None
    if image_url:
        image_filename = f"artist_images/{artist_name.replace(' ', '_')}.jpg"
        image_response = requests.get(image_url)
        with open(image_filename, 'wb') as img_file:
            img_file.write(image_response.content)
        artist_images.append(image_filename)
    else:
        artist_images.append(None)
    
    print(image_tag)
    
    # Append extracted data to lists
    artists.append(artist_name)
    locations.append(locate_name)
    dateTimes.append(datetime_value)
    artist_links.append(artist_link)

# Create a DataFrame from the extracted data
data = {
    'Artist': artists,
    'Location': locations,
    'Datetime': dateTimes,
    'Artist Link': artist_links,
    'Artist Image': artist_images
}

df = pd.DataFrame(data)

# Append the new data to the existing CSV file
csv_filename = 'lexington_events2.csv'

try:
    existing_df = pd.read_csv(csv_filename)
    updated_df = pd.concat([existing_df, df], ignore_index=True)
except FileNotFoundError:
    updated_df = df

# Save the updated DataFrame to the CSV file
updated_df.to_csv(csv_filename, index=False)

print(f'Data saved to {csv_filename}')


<a class="thumb" href="/concerts/41948676-steel-woods-at-burl">
<img alt="The Steel Woods Concert Tickets - 2024 Tour Dates" class="artist-profile-image artist lazyload" data-src="//images.sk-static.com/images/media/profile_images/artists/8906034/large_avatar" height="48" src="//assets.sk-static.com/images/default_images/large_avatar/default-artist.png" width="48"/>
</a>
<a class="thumb" href="/concerts/42013289-tyler-deaver-at-lake-reba-recreational-complex-park">
<img alt="Tyler Deaver Concert Tickets - 2024 Tour Dates" class="artist-profile-image artist lazyload" data-src="//images.sk-static.com/images/media/profile_images/artists/10196820/large_avatar" height="48" src="//assets.sk-static.com/images/default_images/large_avatar/default-artist.png" width="48"/>
</a>
<a class="thumb" href="/concerts/41912553-town-mountain-at-burl">
<img alt="Town Mountain Concert Tickets - 2024 Tour Dates" class="artist-profile-image artist lazyload" data-src="//images.sk-static.com/images/media/profil

In [9]:
    # Extract artist image 
    image_tag = event.find('a', class_='thumb')
    print(image_tag)

<a class="thumb" href="/concerts/42005174-breed-official-at-als-bar">
<img alt="Breed (Official) Concert Tickets - 2024 Tour Dates" class="artist-profile-image artist lazyload" data-src="//images.sk-static.com/images/media/profile_images/artists/10236976/large_avatar" height="48" src="//assets.sk-static.com/images/default_images/large_avatar/default-artist.png" width="48"/>
</a>


## Take the output csv and split the Datetime field to Date and Time. Convert the time from Military Time to Imperial

In [22]:

# Load the CSV file
file_path = 'lexington_events2.csv'
data = pd.read_csv(file_path)

# Function to separate datetime into date and time
def split_datetime(row):
    if 'T' in row['Datetime']:
        date_time = row['Datetime'].split('T')[0]
        time = row['Datetime'].split('T')[1][:5]  # assuming the time format is HH:MM:SS
    else:
        date_time = row['Datetime']
        time = None  # No time provided
    return pd.Series([date_time, time])

# Function to convert time from military to imperial
def convert_to_imperial(time_str):
    if time_str is not None:
        time_obj = pd.to_datetime(time_str, format='%H:%M')
        return time_obj.strftime('%I:%M %p')
    return None

# Apply the function to split Datetime into Date and Time columns
data[['Date', 'Time']] = data.apply(split_datetime, axis=1)

# Convert Time to imperial format
data['Time'] = data['Time'].apply(convert_to_imperial)

# Saving the modified DataFrame to a new CSV file
output_file_path = 'lexington_events_time_imperial.csv'  # Change this to your desired file path
data.to_csv(output_file_path, index=False)

print(f"File saved successfully at {output_file_path}")


File saved successfully at lexington_events_time_imperial.csv
