In [1]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the Songkick page for Lexington events this month
url = 'https://www.songkick.com/metro-areas/24580-us-lexington/november-2024'

# Fetch the webpage content
response = requests.get(url)
content = response.content

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(content, 'html.parser')

# Initialize lists to store the extracted data
artists = []
locations = []
dateTimes = []
artist_links = []
artist_images = []

# Create a directory for artist images if it doesn't exist
os.makedirs('artist_images', exist_ok=True)

# Extract event details
for event in soup.find_all('li', class_='event-listings-element'):
    # Extract artist name
    artist_tag = event.find('p', class_='artists')
    artist_name = artist_tag.strong.get_text(strip=True) if artist_tag else None

    # Extract artist link
    artist_link_tag = artist_tag.find('a') if artist_tag else None
    artist_link = f"https://www.songkick.com{artist_link_tag['href']}" if artist_link_tag else None

    # Extract event location
    locate_tag = event.find('p', class_='location')
    locate_name = locate_tag.get_text(strip=True) if locate_tag else None

    # Extract date and time
    time_element = event.find('time')
    if time_element:
        datetime_value = time_element['datetime']
        time_text = time_element.get_text(strip=True)
    else:
        datetime_value = 'N/A'
        time_text = 'N/A'

    # Extract artist image
    image_tag = event.find('a', class_='thumb').find('img', class_='artist-profile-image') if event.find('a', class_='thumb') else None
    if image_tag and 'data-src' in image_tag.attrs:
        image_url = "https:" + image_tag['data-src']
        image_filename = f"artist_images/{artist_name.replace(' ', '_')}.jpg"
        image_response = requests.get(image_url)
        with open(image_filename, 'wb') as img_file:
            img_file.write(image_response.content)
        artist_images.append(image_filename)
    else:
        artist_images.append(None)
    
    # Append extracted data to lists
    artists.append(artist_name)
    locations.append(locate_name)
    dateTimes.append(datetime_value)
    artist_links.append(artist_link)

# Create a DataFrame from the extracted data
data = {
    'Artist': artists,
    'Location': locations,
    'Datetime': dateTimes,
    'Artist Link': artist_links,
    'Artist Image': artist_images
}

df = pd.DataFrame(data)

# Append the new data to the existing CSV file
csv_filename = 'lexington_events2.csv'

try:
    existing_df = pd.read_csv(csv_filename)
    updated_df = pd.concat([existing_df, df], ignore_index=True)
except FileNotFoundError:
    updated_df = df

# Save the updated DataFrame to the CSV file
updated_df.to_csv(csv_filename, index=False)

print(f'Data saved to {csv_filename}')


Data saved to lexington_events2.csv


In [2]:
# Adjust the time to imperial units  
# Load the CSV file
file_path = 'lexington_events2.csv'
data = pd.read_csv(file_path)

# Function to separate datetime into date and time
def split_datetime(row):
    if 'T' in row['Datetime']:
        date_time = row['Datetime'].split('T')[0]
        time = row['Datetime'].split('T')[1][:5]  # assuming the time format is HH:MM:SS
    else:
        date_time = row['Datetime']
        time = None  # No time provided
    return pd.Series([date_time, time])

# Function to convert time from military to imperial
def convert_to_imperial(time_str):
    if time_str is not None:
        time_obj = pd.to_datetime(time_str, format='%H:%M')
        return time_obj.strftime('%I:%M %p')
    return None

# Apply the function to split Datetime into Date and Time columns
data[['Date', 'Time']] = data.apply(split_datetime, axis=1)

# Convert Time to imperial format
data['Time'] = data['Time'].apply(convert_to_imperial)

# Saving the modified DataFrame to a new CSV file
output_file_path = 'lexington_events_time_imperial.csv'  # Change this to your desired file path
data.to_csv(output_file_path, index=False)

print(f"File saved successfully at {output_file_path}")


File saved successfully at lexington_events_time_imperial.csv


In [3]:
# Remove any towns or extra address from the bar/restaurant 
def update_csv_file(input_csv, output_csv):
    # Load the CSV file
    df = pd.read_csv(input_csv)
    
    # Remove specified text from the 'Location' field
    df['Location'] = df['Location'].str.replace(',Lexington, KY, US', '', regex=False)
    df['Location'] = df['Location'].str.replace(',Georgetown, KY, US', '', regex=False)
    df['Location'] = df['Location'].str.replace(',London, KY, US', '', regex=False)
    df['Location'] = df['Location'].str.replace(',North Lexington, KY, US', '', regex=False)
    df['Location'] = df['Location'].str.replace(',Richmond, KY, US', '', regex=False)

    
    # Save the modified DataFrame to a new CSV file
    df.to_csv(output_csv, index=False)
    print(f'Data saved to {output_csv}')
    return df

# Specify the input and output CSV file names
input_csv = 'lexington_events_time_imperial.csv'
output_csv = 'lexington_events_time_imperial_modified.csv'

# Call the function to update the CSV file
updated_df = update_csv_file(input_csv, output_csv)

# Display the updated DataFrame
updated_df.head()

Data saved to lexington_events_time_imperial_modified.csv


Unnamed: 0,Artist,Location,Datetime,Artist Link,Artist Image,Date,Time
0,JESSIE MURPH,Manchester Music Hall,2024-11-02T20:00:00-0500,https://www.songkick.com/concerts/42127792-jes...,artist_images/JESSIE_MURPH.jpg,2024-11-02,08:00 PM
1,Into the Fog,"Winchester, KY, US",2024-11-02T19:00:00-0500,https://www.songkick.com/concerts/42235574-int...,artist_images/Into_the_Fog.jpg,2024-11-02,07:00 PM
2,Bedford and Mule Haggard,Al's Bar,2024-11-02T21:00:00-0500,https://www.songkick.com/concerts/42252263-bed...,artist_images/Bedford_and_Mule_Haggard.jpg,2024-11-02,09:00 PM
3,JESSIE MURPH,Manchester Music Hall,2024-11-03T20:00:00-0600,https://www.songkick.com/concerts/42134056-jes...,artist_images/JESSIE_MURPH.jpg,2024-11-03,08:00 PM
4,Cardenales De Nuevo Leon,Royal Legacy Bailes,2024-11-03T20:00:00-0600,https://www.songkick.com/concerts/42086134-car...,artist_images/Cardenales_De_Nuevo_Leon.jpg,2024-11-03,08:00 PM


In [4]:
import pandas as pd
import geopandas as gpd

# Load the modified CSV file
csv_path = 'lexington_events_time_imperial_modified.csv'
events_df = pd.read_csv(csv_path)

# Load the venues shapefile
shapefile_path = 'shp/venues.shp'
venues_gdf = gpd.read_file(shapefile_path)

# Merge the dataframes based on 'Location' in the CSV and 'Venue' in the shapefile
merged_gdf = venues_gdf.merge(events_df, left_on='Venue', right_on='Location')

# Rename the 'Artist Link' field to 'ArtistLink'
merged_gdf = merged_gdf.rename(columns={'Artist Link': 'ArtistLink','Artist Image': 'ArtistImage'})

# Save the merged GeoDataFrame to a .geojson file
output_path = 'shp/merged_venues_events.geojson'
merged_gdf.to_file(output_path, driver='GeoJSON')

# Display the first few rows of the merged GeoDataFrame
print(merged_gdf.head())


   id                  Venue                    geometry              Artist  \
0 NaN               The Burl  POINT (-84.51887 38.05698)         Futurebirds   
1 NaN               The Burl  POINT (-84.51887 38.05698)       HAPPY LANDING   
2 NaN               The Burl  POINT (-84.51887 38.05698)  Magnolia Boulevard   
3 NaN               The Burl  POINT (-84.51887 38.05698)        Wayne Graham   
4 NaN  Manchester Music Hall  POINT (-84.50944 38.05320)        JESSIE MURPH   

                Location                  Datetime  \
0               The Burl  2024-11-15T19:00:00-0600   
1               The Burl  2024-11-22T19:00:00-0600   
2               The Burl  2024-11-29T19:00:00-0600   
3               The Burl                2024-11-30   
4  Manchester Music Hall  2024-11-02T20:00:00-0500   

                                          ArtistLink  \
0  https://www.songkick.com/concerts/42185331-fut...   
1  https://www.songkick.com/concerts/42075491-hap...   
2  https://www.songkick.co