# Importing Libraries

In [1]:
#!pip install requests beautifulsoup4 folium geopy
#!pip install ipywidgets
#!pip install spotipy

In [2]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from geopy.geocoders import Nominatim
import folium
import time
from IPython.display import display, clear_output
import datetime

import spotipy
from spotipy import Spotify
from spotipy.oauth2 import SpotifyClientCredentials
from IPython.display import display, HTML
import ipywidgets as widgets

# Scraping TicketMaster (first 5 concerts, for now)

In [3]:
# Path to your WebDriver (e.g., ChromeDriver)
webdriver_path = 'C:/Users/annaz/Downloads/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe'


# Initialize a Selenium WebDriver
driver = webdriver.Chrome(webdriver_path)

# Go to the Ticketmaster Paris page
driver.get('https://www.ticketmaster.fr/fr/paris')
time.sleep(5)  

# Get the page source and close the browser
html = driver.page_source
driver.quit()

# Parse the page with BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

concerts = []
for i in range(5):
    event = soup.find('li', id=f'event-result-{i}')
    if event:
        title = event.find('h3', class_='event-result-title').get_text(strip=True)
        date = event.find('time', itemprop='startDate').get_text(strip=True)

        # Extract the arena name
        arena = event.find('span', itemprop='name').get_text(strip=True) if event.find('span', itemprop='name') else "Unknown Arena"

        # Extract the city location
        city_location = event.find('span', itemprop='addressLocality').get_text(strip=True) if event.find('span', itemprop='addressLocality') else "Unknown City"

        location = f"{arena}, {city_location}"
        concerts.append({"title": title, "location": location, "date": date})


# Print the concert information
for concert in concerts:
    print(concert)


  driver = webdriver.Chrome(webdriver_path)


{'title': 'KAROL G', 'location': 'ACCOR ARENA, PARIS', 'date': '22 juin 2024'}
{'title': 'PACKAGE KAROL G', 'location': 'ACCOR ARENA, PARIS', 'date': '22 juin 2024'}
{'title': 'PACKAGE KAROL G', 'location': 'ACCOR ARENA, PARIS', 'date': '23 juin 2024'}
{'title': 'SLIPKNOT', 'location': 'ACCOR ARENA, PARIS', 'date': '12 décembre 2024'}
{'title': 'SLIPKNOT', 'location': 'ACCOR ARENA, PARIS', 'date': '12 décembre 2024'}


# Adding pins for each concert on the map

In [4]:
def geocode_location(location):
    geolocator = Nominatim(user_agent="geoapiExercises")
    try:
        loc = geolocator.geocode(location)
        return (loc.latitude, loc.longitude)
    except:
        return None

# Geocode each concert location
for concert in concerts:
    coords = geocode_location(concert['location'])
    if coords:
        concert['coords'] = coords
    else:
        concert['coords'] = (0, 0)  # Default coords, adjust as necessary
    time.sleep(1)  # To avoid hitting request limits on the geocoding API

# Create a map and add markers
map = folium.Map(location=[48.8566, 2.3522], zoom_start=12)  # Centered on Paris

# Group concerts by coordinates
grouped_concerts = {}
for concert in concerts:
    coords = concert['coords']
    if coords in grouped_concerts:
        grouped_concerts[coords].append(concert)
    else:
        grouped_concerts[coords] = [concert]

# Add markers for grouped concerts
for coords, concerts_at_location in grouped_concerts.items():
    popup_content = "<ul>"
    for concert in concerts_at_location:
        popup_content += f"<li>{concert['title']} - {concert['date']}</li>"
    popup_content += "</ul>"

    folium.Marker(
        location=coords,
        popup=popup_content,
        tooltip="Concerts at this location"
    ).add_to(map)

display(map)


For now (and at the time we tested the code), as we only scraped the first 5 concerts, they are all in ACCOR ARENA, therefore there is only one pin but we made sure to made a toggle list of all the concert of the location, with the corresponding dates.

# Scraping Wikipedia to get additional information about the artists

In [5]:
def get_artist_info(artist_name):
    webdriver_path = 'C:/Users/annaz/Downloads/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe'
    driver = webdriver.Chrome(webdriver_path)

    try:
        # Search for the artist on Wikipedia
        search_url = f'https://fr.wikipedia.org/wiki/{artist_name.replace(" ", "_")}'
        driver.get(search_url)

        # Wait for the page to load (you may need to adjust the wait time)
        driver.implicitly_wait(5)

        # Extract information using BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Extracting information from the infobox
        infobox = soup.find('table', {'class': 'infobox'})

        # Extract genre, country of origin, and other relevant information
        genre = soup.find('span', {'class': 'wd_p136'}).text.strip() if soup.find('span', {'class': 'wd_p136'}) else 'N/A'
        birth_date = soup.find('span', {'class': 'wd_p569'}).text.strip() if soup.find('span', {'class': 'wd_p569'}) else 'N/A'
        country = soup.find('span', {'class': 'wd_p27'}).text.strip() if soup.find('span', {'class': 'wd_p27'}) else 'N/A'

        # Additional information you may want to extract
        top_song = soup.find('span', {'class': 'wd_p800'}).text.strip() if soup.find('span', {'class': 'wd_p800'}) else 'N/A'

        return {
            'genre': genre,
            'birth_date': birth_date,
            'top_song': top_song,
            'country': country
        }
    except Exception as e:
        print(f"Error: {e}")
        return None
    finally:
        driver.quit()

# Example usage
artist_name = 'DJ Snake'
artist_info = get_artist_info(artist_name)

if artist_info:
    print(f"Information for {artist_name}:")
    print(f"Genre: {artist_info['genre']}")
    print(f"Birth: {artist_info['birth_date']}")
    print(f"Nationality: {artist_info['country']}")
    print(f"Top Songs: {artist_info['top_song']}")
else:
    print(f"No information found for {artist_name}")


  driver = webdriver.Chrome(webdriver_path)


Information for DJ Snake:
Genre: EDM trap music, electro house, moombahton, hip-hop, rap
Birth: 13 juin 1986
Nationality: françaisealgérienne
Top Songs: Turn Down for What, Middle, Let Me Love You, Taki Taki, Loco Contigo


# Incorporating those information for artists that have upcoming concerts

In [18]:
# Function to get artist information from Wikipedia
def get_artist_info(artist_name):
    webdriver_path = 'C:/Users/annaz/Downloads/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe'
    driver = webdriver.Chrome(webdriver_path)

    try:
        # Search for the artist on Wikipedia
        search_url = f'https://fr.wikipedia.org/wiki/{artist_name.title().replace(" ", "_")}'
        driver.get(search_url)

        # Wait for the page to load (you may need to adjust the wait time)
        driver.implicitly_wait(5)

        # Extract information using BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Extracting information from the infobox
        infobox = soup.find('table', {'class': 'infobox'})

        # Extract genre, country of origin, and other relevant information
        genre = soup.find('span', {'class': 'wd_p136'}).text.strip() if soup.find('span', {'class': 'wd_p136'}) else 'N/A'
        birth_date = soup.find('span', {'class': 'wd_p569'}).text.strip() if soup.find('span', {'class': 'wd_p569'}) else 'N/A'
        country = soup.find('span', {'class': 'wd_p27'}).text.strip() if soup.find('span', {'class': 'wd_p27'}) else 'N/A'

        # Additional information you may want to extract
        top_song = soup.find('span', {'class': 'wd_p800'}).text.strip() if soup.find('span', {'class': 'wd_p800'}) else 'N/A'

        return {
            'genre': genre,
            'birth_date': birth_date,
            'top_song': top_song,
            'country': country
        }
    except Exception as e:
        print(f"Error: {e}")
        return None
    finally:
        driver.quit()
        
        
# Path to your WebDriver (e.g., ChromeDriver)
webdriver_path = 'C:/Users/annaz/Downloads/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe'

# Initialize a Selenium WebDriver
driver = webdriver.Chrome(webdriver_path)

# Go to the Ticketmaster Paris page
driver.get('https://www.ticketmaster.fr/fr/paris')
time.sleep(5)

# Get the page source and close the browser
html = driver.page_source
driver.quit()

# Parse the page with BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

concerts = []
for i in range(10):
    event = soup.find('li', id=f'event-result-{i}')
    if event:
        title = event.find('h3', class_='event-result-title').get_text(strip=True)
        date = event.find('time', itemprop='startDate').get_text(strip=True)

        # Extract the arena name
        arena = event.find('span', itemprop='name').get_text(strip=True) if event.find('span', itemprop='name') else "Unknown Arena"

        # Extract the city location
        city_location = event.find('span', itemprop='addressLocality').get_text(strip=True) if event.find('span', itemprop='addressLocality') else "Unknown City"

        location = f"{arena}, {city_location}"
        concerts.append({"title": title, "location": location, "date": date})

def geocode_location(location):
    geolocator = Nominatim(user_agent="geoapiExercises")
    try:
        loc = geolocator.geocode(location)
        return (loc.latitude, loc.longitude)
    except:
        return None

    
# Print the concert information
for concert in concerts:
    print(concert)
    
    # Geocode each concert location
    coords = geocode_location(concert['location'])
    if coords:
        concert['coords'] = coords
    else:
        concert['coords'] = (0, 0)  # Default coords, adjust as necessary
    time.sleep(1)  # To avoid hitting request limits on the geocoding API
    
    # Get artist information from Wikipedia
    artist_name = concert['title'].split('-')[0].strip()
    artist_info = get_artist_info(artist_name)

    if artist_info:
        print(f"Information for {artist_name}:")
        print(f"Genre: {artist_info['genre']}")
        print(f"Country of Origin: {artist_info['country']}")
        print(f"Date of Birth: {artist_info['birth_date']}")
        print(f"Top Songs: {artist_info['top_song']}")


  driver = webdriver.Chrome(webdriver_path)


{'title': 'KAROL G', 'location': 'ACCOR ARENA, PARIS', 'date': '22 juin 2024'}


  driver = webdriver.Chrome(webdriver_path)


Information for KAROL G:
Genre: N/A
Country of Origin: N/A
Date of Birth: N/A
Top Songs: N/A
{'title': 'PACKAGE KAROL G', 'location': 'ACCOR ARENA, PARIS', 'date': '22 juin 2024'}
Information for PACKAGE KAROL G:
Genre: N/A
Country of Origin: N/A
Date of Birth: N/A
Top Songs: N/A
{'title': 'PACKAGE KAROL G', 'location': 'ACCOR ARENA, PARIS', 'date': '23 juin 2024'}
Information for PACKAGE KAROL G:
Genre: N/A
Country of Origin: N/A
Date of Birth: N/A
Top Songs: N/A
{'title': 'SLIPKNOT', 'location': 'ACCOR ARENA, PARIS', 'date': '12 décembre 2024'}
Information for SLIPKNOT:
Genre: N/A
Country of Origin: N/A
Date of Birth: N/A
Top Songs: N/A
{'title': 'SLIPKNOT', 'location': 'ACCOR ARENA, PARIS', 'date': '12 décembre 2024'}
Information for SLIPKNOT:
Genre: N/A
Country of Origin: N/A
Date of Birth: N/A
Top Songs: N/A
{'title': 'DOJA CAT', 'location': 'ACCOR ARENA, PARIS', 'date': '21 juin 2024'}
Information for DOJA CAT:
Genre: RnB contemporain, hip-hop, pop, pop rap, indie pop, RnB altern

This step is complicated because we realised that all the wikipedia pages are different so it's hard to write a code that compiles for all the artists page.

For now we're going to leave this step aside and focus on other features.


# Connecting to Spotify API to retreive Top Artists of User

In [7]:
from spotipy import Spotify
from spotipy.oauth2 import SpotifyOAuth


# Spotify API credentials
SPOTIPY_CLIENT_ID = '4d4db200f5614387aa4823b35988e110'
SPOTIPY_CLIENT_SECRET = '57811bbab7f2448c8a18cf5a561b11f4'
REDIRECT_URI = 'http://localhost:8889/callback'

# Set up Spotipy with OAuth
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    SPOTIPY_CLIENT_ID,
    SPOTIPY_CLIENT_SECRET,
    REDIRECT_URI,
    scope='user-top-read',  # Request permission to read user's top artists
))

# Get current user's top artists
def get_top_artists():
    try:
        results = sp.current_user_top_artists(limit=10)
        top_artists = [artist['name'] for artist in results['items']]
        return top_artists
    except Exception as e:
        print(f"Error: {e}")
        return None

# Display top artists
top_artists = get_top_artists()

if top_artists is not None:
    print("Your Top 10 Artists:")
    for index, artist in enumerate(top_artists, start=1):
        print(f"{index}. {artist}")


Your Top 10 Artists:
1. Taylor Swift
2. Olivia Dean
3. Hozier
4. Ruel
5. Jungle
6. THE DRIVER ERA
7. The Weeknd
8. Mac DeMarco
9. KAYTRANADA
10. Fred again..


### Filter and display concert info filtered on Top Artists

In [8]:
# Filter concerts based on top artists
top_artists.append("Karol G") #Adding an artist that has a concert soon for the example
filtered_concerts = [concert for concert in concerts if any(artist in concert['title'].title() for artist in top_artists)]
filtered_concerts

[{'title': 'KAROL G',
  'location': 'ACCOR ARENA, PARIS',
  'date': '22 juin 2024',
  'coords': (48.83870845, 2.3787666192264645)},
 {'title': 'PACKAGE KAROL G',
  'location': 'ACCOR ARENA, PARIS',
  'date': '22 juin 2024',
  'coords': (48.83870845, 2.3787666192264645)},
 {'title': 'PACKAGE KAROL G',
  'location': 'ACCOR ARENA, PARIS',
  'date': '23 juin 2024',
  'coords': (48.83870845, 2.3787666192264645)}]

# Scrap Ticketmaster + Add Filter + Display Map

In [20]:
# Path to your WebDriver (e.g., ChromeDriver)
webdriver_path = 'C:/Users/annaz/Downloads/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe'

# Initialize a Selenium WebDriver
driver = webdriver.Chrome(webdriver_path)

base_url = "https://www.ticketmaster.fr"

# Go to the Ticketmaster Paris page
driver.get('https://www.ticketmaster.fr/fr/paris')
time.sleep(5)  # Wait for JavaScript to load the content

# Get the page source and close the browser
html = driver.page_source
driver.quit()

# Parse the page with BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

concerts = []
for i in range(20):
    event = soup.find('li', id=f'event-result-{i}')
    if event:
        title = event.find('h3', class_='event-result-title').get_text(strip=True)
        date = event.find('time', itemprop='startDate').get_text(strip=True)

        # Extract the arena name
        arena = event.find('span', itemprop='name').get_text(strip=True) if event.find('span', itemprop='name') else "Unknown Arena"

        # Extract the city location
        city_location = event.find('span', itemprop='addressLocality').get_text(strip=True) if event.find('span', itemprop='addressLocality') else "Unknown City"

        # Extract the event URL
        event_link = event.find('a', class_='event-result-title-link')
        url = base_url + event_link['href'] if event_link and event_link.has_attr('href') else "URL not found"
        
        genre_item = event.find('p', class_='event-result-genre-item')
        full_genre = genre_item.get_text(strip=True) if genre_item else "Unknown Genre"
        genre = full_genre.split(' - ')[-1] if ' - ' in full_genre else full_genre


        location = f"{arena}, {city_location}"
        concerts.append({"title": title, "location": location, "date": date, "event_url": url, "genre": genre})


# Print the concert information
#for concert in concerts:
#    print(concert)

  driver = webdriver.Chrome(webdriver_path)


In [10]:
# Function to convert French date strings to datetime objects
def translate_month_fr_to_en(date_str):
    fr_to_en_months = {
        'janvier': 'January',
        'février': 'February',
        'mars': 'March',
        'avril': 'April',
        'mai': 'May',
        'juin': 'June',
        'juillet': 'July',
        'août': 'August',
        'septembre': 'September',
        'octobre': 'October',
        'novembre': 'November',
        'décembre': 'December'
    }
    for fr, en in fr_to_en_months.items():
        if fr in date_str:
            return date_str.replace(fr, en)
    return date_str

def str_to_date(date_str):
    date_str = translate_month_fr_to_en(date_str)
    return datetime.datetime.strptime(date_str, '%d %B %Y').date()  # Convert to date object


# Function to geocode locations
def geocode_location(location):
    geolocator = Nominatim(user_agent="geoapiExercises")
    try:
        loc = geolocator.geocode(location)
        return (loc.latitude, loc.longitude)
    except:
        return None

# Geocode each concert location
for concert in concerts:
    coords = geocode_location(concert['location'])
    if coords:
        concert['coords'] = coords
    else:
        concert['coords'] = (0, 0)  # Default coords


In [21]:
from spotipy import Spotify
from spotipy.oauth2 import SpotifyOAuth


# Spotify API credentials
SPOTIPY_CLIENT_ID = '4d4db200f5614387aa4823b35988e110'
SPOTIPY_CLIENT_SECRET = '57811bbab7f2448c8a18cf5a561b11f4'
REDIRECT_URI = 'http://localhost:8889/callback'

# Set up Spotipy with OAuth
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    SPOTIPY_CLIENT_ID,
    SPOTIPY_CLIENT_SECRET,
    REDIRECT_URI,
    scope='user-top-read',  # Request permission to read user's top artists
))

# Get current user's top artists
def get_top_artists():
    try:
        results = sp.current_user_top_artists(limit=10)
        top_artists = [artist['name'].upper() for artist in results['items']]
        return top_artists
    except Exception as e:
        print(f"Error: {e}")
        return None

# Display top artists
top_artists = get_top_artists()
top_artists.append("KAROL G")


### With artist_filter containing all top 10 artists

### With artist_filter containing only "All" or "Top Artists" (grouped)

In [15]:
# Create widgets for date, genre, and location filters
date_start_filter = widgets.DatePicker(description='Start Date')
date_end_filter = widgets.DatePicker(description='End Date')
genre_filter = widgets.Dropdown(options=['All'] + sorted(set(concert['genre'] for concert in concerts)))
location_filter = widgets.Dropdown(options=['All'] + sorted(set(concert['location'] for concert in concerts)))
artist_filter = widgets.Dropdown(options=['All', 'Top Artists'])


def filter_concerts(change):
    # Clear the current output
    clear_output(wait=True)
    display(widgets.Label('Filter by Date:'), date_start_filter, date_end_filter)
    display(widgets.Label('Filter by Genre:'), genre_filter)
    display(widgets.Label('Filter by Location:'), location_filter)
    display(widgets.Label('Filter by Artists:'), artist_filter)


    # Filter concerts based on the selected filters
    filtered_concerts = concerts
    if date_start_filter.value and date_end_filter.value:
        start_date = date_start_filter.value
        end_date = date_end_filter.value
        filtered_concerts = [concert for concert in filtered_concerts if start_date <= str_to_date(concert['date']) <= end_date]
    if genre_filter.value != 'All':
        filtered_concerts = [concert for concert in filtered_concerts if concert['genre'] == genre_filter.value]
    if location_filter.value != 'All':
        filtered_concerts = [concert for concert in filtered_concerts if concert['location'] == location_filter.value]
#    if artist_filter.value != 'All':
#        filtered_concerts = [concert for concert in filtered_concerts if concert['title'] == artist_filter.value]
    if artist_filter.value == 'Top Artists':
#        top_artists = get_top_artists()
        filtered_concerts = [concert for concert in filtered_concerts if concert['title'] in top_artists]

    # Group filtered concerts by location
    filtered_concerts_by_location = {}
    for concert in filtered_concerts:
        key = concert['coords']
        if key not in filtered_concerts_by_location:
            filtered_concerts_by_location[key] = {'name': concert['location'], 'concerts': []}
        filtered_concerts_by_location[key]['concerts'].append(concert)

    # Sort concerts by date within each location
    for location_data in filtered_concerts_by_location.values():
        location_data['concerts'].sort(key=lambda x: str_to_date(x['date']))

    # Create a new map with filtered concerts
    map = folium.Map(location=[48.8566, 2.3522], zoom_start=12)
    for location, data in filtered_concerts_by_location.items():
        location_name = data['name']
        concerts_at_location = data['concerts']

        popup_content = f"<h3>{location_name}</h3><ul>"
        for concert in concerts_at_location:
            # Create a hyperlink for each concert
            concert_link = f"<a href='{concert['event_url']}' target='_blank'>{concert['title']}</a>"
            popup_content += f"<li>{concert_link} - {concert['date']}</li>"
        popup_content += '</ul>'

        # Use BeautifulSoup to format HTML in the popup
        soup = BeautifulSoup(popup_content, 'html.parser')
        popup_content = str(soup)

        folium.Marker(
            location=location,
            popup=folium.Popup(popup_content, max_width=300),
            tooltip=location_name
        ).add_to(map)

    # Display the new filtered map
    display(map)

# Observe changes in the widgets and update the map accordingly
date_start_filter.observe(filter_concerts, names='value')
date_end_filter.observe(filter_concerts, names='value')
genre_filter.observe(filter_concerts, names='value')
location_filter.observe(filter_concerts, names='value')
artist_filter.observe(filter_concerts, names='value')


# Display the widgets and the initial map
display(widgets.Label('Filter by Date:'), date_start_filter, date_end_filter)
display(widgets.Label('Filter by Genre:'), genre_filter)
display(widgets.Label('Filter by Location:'), location_filter)
display(widgets.Label('Filter by Artists:'), artist_filter)
filter_concerts(None)    

Label(value='Filter by Date:')

DatePicker(value=None, description='Start Date')

DatePicker(value=None, description='End Date')

Label(value='Filter by Genre:')

Dropdown(options=('All', 'COMEDIE MUSICALE', 'DANSE HIP-HOP', 'HARD METAL', 'POP ROCK FOLK', 'RAP HIP-HOP SLAM…

Label(value='Filter by Location:')

Dropdown(options=('All', 'ACCOR ARENA, PARIS', 'CASINO DE PARIS, PARIS', 'Dome de Paris - Palais des sports, P…

Label(value='Filter by Artists:')

Dropdown(options=('All', 'Top Artists'), value='All')