# Importing Libraries

In [2]:
#pip install requests beautifulsoup4 folium geopy

Collecting folium
  Downloading folium-0.15.1-py2.py3-none-any.whl (97 kB)
Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
Collecting xyzservices
  Downloading xyzservices-2023.10.1-py3-none-any.whl (56 kB)
Collecting branca>=0.6.0
  Downloading branca-0.7.0-py3-none-any.whl (25 kB)
Collecting geographiclib<3,>=1.52
  Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
Installing collected packages: xyzservices, geographiclib, branca, geopy, folium
Successfully installed branca-0.7.0 folium-0.15.1 geographiclib-2.0 geopy-2.4.1 xyzservices-2023.10.1
Note: you may need to restart the kernel to use updated packages.


In [12]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from geopy.geocoders import Nominatim
import folium
import time
from IPython.display import display

# Scraping TicketMaster (first 5 concerts, for now)

In [9]:
# Path to your WebDriver (e.g., ChromeDriver)
webdriver_path = 'C:/Users/annaz/Downloads/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe'


# Initialize a Selenium WebDriver
driver = webdriver.Chrome(webdriver_path)

# Go to the Ticketmaster Paris page
driver.get('https://www.ticketmaster.fr/fr/paris')
time.sleep(5)  

# Get the page source and close the browser
html = driver.page_source
driver.quit()

# Parse the page with BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

concerts = []
for i in range(5):
    event = soup.find('li', id=f'event-result-{i}')
    if event:
        title = event.find('h3', class_='event-result-title').get_text(strip=True)
        date = event.find('time', itemprop='startDate').get_text(strip=True)

        # Extract the arena name
        arena = event.find('span', itemprop='name').get_text(strip=True) if event.find('span', itemprop='name') else "Unknown Arena"

        # Extract the city location
        city_location = event.find('span', itemprop='addressLocality').get_text(strip=True) if event.find('span', itemprop='addressLocality') else "Unknown City"

        location = f"{arena}, {city_location}"
        concerts.append({"title": title, "location": location, "date": date})


# Print the concert information
for concert in concerts:
    print(concert)


  driver = webdriver.Chrome(webdriver_path)


{'title': 'DJ SNAKE', 'location': 'ACCOR ARENA, PARIS', 'date': '10 mai 2025'}
{'title': 'KAROL G', 'location': 'ACCOR ARENA, PARIS', 'date': '22 juin 2024'}
{'title': 'KAROL G', 'location': 'ACCOR ARENA, PARIS', 'date': '22 juin 2024'}
{'title': 'DOJA CAT', 'location': 'ACCOR ARENA, PARIS', 'date': '21 juin 2024'}
{'title': 'PACKAGE DOJA CAT', 'location': 'ACCOR ARENA, PARIS', 'date': '21 juin 2024'}


We get those 5 concerts, they are all located at ACCOR ARENA, PARIS.

# Adding pins for each concert on the map

In [18]:
def geocode_location(location):
    geolocator = Nominatim(user_agent="geoapiExercises")
    try:
        loc = geolocator.geocode(location)
        return (loc.latitude, loc.longitude)
    except:
        return None

# Geocode each concert location
for concert in concerts:
    coords = geocode_location(concert['location'])
    if coords:
        concert['coords'] = coords
    else:
        concert['coords'] = (0, 0)  # Default coords, adjust as necessary
    time.sleep(1)  # To avoid hitting request limits on the geocoding API

# Create a map and add markers
map = folium.Map(location=[48.8566, 2.3522], zoom_start=12)  # Centered on Paris

# Group concerts by coordinates
grouped_concerts = {}
for concert in concerts:
    coords = concert['coords']
    if coords in grouped_concerts:
        grouped_concerts[coords].append(concert)
    else:
        grouped_concerts[coords] = [concert]

# Add markers for grouped concerts
for coords, concerts_at_location in grouped_concerts.items():
    popup_content = "<ul>"
    for concert in concerts_at_location:
        popup_content += f"<li>{concert['title']} - {concert['date']}</li>"
    popup_content += "</ul>"

    folium.Marker(
        location=coords,
        popup=popup_content,
        tooltip="Concerts at this location"
    ).add_to(map)

display(map)


For now, as the first 5 concerts are in ACCOR ARENA, there is only one pin but we made sure to made a toggle list of all the concert of the location, with the corresponding dates.

# Scraping Wikipedia to get additional information about the artists

In [34]:
def get_artist_info(artist_name):
    webdriver_path = 'C:/Users/annaz/Downloads/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe'
    driver = webdriver.Chrome(webdriver_path)

    try:
        # Search for the artist on Wikipedia
        search_url = f'https://en.wikipedia.org/wiki/{artist_name.replace(" ", "_")}'
        driver.get(search_url)

        # Wait for the page to load (you may need to adjust the wait time)
        driver.implicitly_wait(5)

        # Extract information using BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Extracting information from the infobox
        infobox = soup.find('table', {'class': 'infobox'})

        # Extract genre, country of origin, and other relevant information
        genre = infobox.find('th', text='Genres').find_next('td').text.strip() if infobox and infobox.find('th', text='Genres') else 'N/A'
        country_of_origin = soup.find('span', {'class': 'wd_p27'}).text.strip() if soup.find('span', {'class': 'wd_p27'}) else 'N/A'
        birth_date = infobox.find('th', text='Born').find_next('td').text.strip() if infobox and infobox.find('th', text='Born') else 'N/A'

        # Additional information you may want to extract
        top_song = soup.find('span', {'class': 'wd_p800'}).text.strip() if soup.find('span', {'class': 'wd_p800'}) else 'N/A'

        return {
            'genre': genre,
            'country' : country,
            'country_of_origin': country_of_origin,
            'birth_date': birth_date,
            'top_song': top_song,
        }
    except Exception as e:
        print(f"Error: {e}")
        return None
    finally:
        driver.quit()

# Example usage
artist_name = 'DJ Snake'
artist_info = get_artist_info(artist_name)

if artist_info:
    print(f"Information for {artist_name}:")
    print(f"Genre: {artist_info['genre']}")
    print(f"Country: {artist_info['country']}")
    print(f"Country of Origin: {artist_info['country_of_origin']}")
    print(f"Date of Birth: {artist_info['birth_date']}")
    print(f"Top Songs: {artist_info['top_song']}")
else:
    print(f"No information found for {artist_name}")


  driver = webdriver.Chrome(webdriver_path)


Error: name 'country' is not defined
No information found for DJ Snake


# Incorporating those information for artists that have upcoming concerts

In [28]:
# Function to get artist information from Wikipedia
def get_artist_info(artist_name):
    webdriver_path = 'C:/Users/annaz/Downloads/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe'
    driver = webdriver.Chrome(webdriver_path)

    try:
        # Search for the artist on Wikipedia
        search_url = f'https://en.wikipedia.org/wiki/{artist_name.replace(" ", "_")}'
        driver.get(search_url)

        # Wait for the page to load (you may need to adjust the wait time)
        driver.implicitly_wait(5)

        # Extract information using BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Extracting information from the infobox
        infobox = soup.find('table', {'class': 'infobox'})

        # Extract genre, country of origin, and other relevant information
        genre = infobox.find('th', text='Genres').find_next('td').text.strip() if infobox and infobox.find('th', text='Genres') else 'N/A'
        country_of_origin = soup.find('span', {'class': 'wd_p27'}).text.strip() if soup.find('span', {'class': 'wd_p27'}) else 'N/A'
        birth_date = infobox.find('th', text='Born').find_next('td').text.strip() if infobox and infobox.find('th', text='Born') else 'N/A'

        # Additional information you may want to extract
        top_song = soup.find('span', {'class': 'wd_p800'}).text.strip() if soup.find('span', {'class': 'wd_p800'}) else 'N/A'

        return {
            'genre': genre,
            'country_of_origin': country_of_origin,
            'birth_date': birth_date,
            'top_song': top_song,
        }
    except Exception as e:
        print(f"Error: {e}")
        return None
    finally:
        driver.quit()
        
        
# Path to your WebDriver (e.g., ChromeDriver)
webdriver_path = 'C:/Users/annaz/Downloads/Downloads/chromedriver-win64/chromedriver-win64/chromedriver.exe'

# Initialize a Selenium WebDriver
driver = webdriver.Chrome(webdriver_path)

# Go to the Ticketmaster Paris page
driver.get('https://www.ticketmaster.fr/fr/paris')
time.sleep(5)

# Get the page source and close the browser
html = driver.page_source
driver.quit()

# Parse the page with BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

concerts = []
for i in range(5):
    event = soup.find('li', id=f'event-result-{i}')
    if event:
        title = event.find('h3', class_='event-result-title').get_text(strip=True)
        date = event.find('time', itemprop='startDate').get_text(strip=True)

        # Extract the arena name
        arena = event.find('span', itemprop='name').get_text(strip=True) if event.find('span', itemprop='name') else "Unknown Arena"

        # Extract the city location
        city_location = event.find('span', itemprop='addressLocality').get_text(strip=True) if event.find('span', itemprop='addressLocality') else "Unknown City"

        location = f"{arena}, {city_location}"
        concerts.append({"title": title, "location": location, "date": date})

# Print the concert information
for concert in concerts:
    print(concert)

def geocode_location(location):
    geolocator = Nominatim(user_agent="geoapiExercises")
    try:
        loc = geolocator.geocode(location)
        return (loc.latitude, loc.longitude)
    except:
        return None

# Geocode each concert location
for concert in concerts:
    coords = geocode_location(concert['location'])
    if coords:
        concert['coords'] = coords
    else:
        concert['coords'] = (0, 0)  # Default coords, adjust as necessary
    time.sleep(1)  # To avoid hitting request limits on the geocoding API

# Get artist information from Wikipedia
for concert in concerts:
    artist_name = concert['title'].split('-')[0].strip()
    artist_info = get_artist_info(artist_name)

    if artist_info:
        print(f"Information for {artist_name}:")
        print(f"Genre: {artist_info['genre']}")
        print(f"Country of Origin: {artist_info['country_of_origin']}")
        print(f"Date of Birth: {artist_info['birth_date']}")
        print(f"Top Songs: {artist_info['top_song']}")


  driver = webdriver.Chrome(webdriver_path)


{'title': 'DJ SNAKE', 'location': 'ACCOR ARENA, PARIS', 'date': '10 mai 2025'}
{'title': 'KAROL G', 'location': 'ACCOR ARENA, PARIS', 'date': '22 juin 2024'}
{'title': 'KAROL G', 'location': 'ACCOR ARENA, PARIS', 'date': '22 juin 2024'}
{'title': 'DOJA CAT', 'location': 'ACCOR ARENA, PARIS', 'date': '21 juin 2024'}
{'title': 'PACKAGE DOJA CAT', 'location': 'ACCOR ARENA, PARIS', 'date': '21 juin 2024'}


  driver = webdriver.Chrome(webdriver_path)


Information for DJ SNAKE:
Genre: N/A
Country of Origin: N/A
Date of Birth: N/A
Top Songs: N/A
Information for KAROL G:
Genre: N/A
Country of Origin: N/A
Date of Birth: N/A
Top Songs: N/A
Information for KAROL G:
Genre: N/A
Country of Origin: N/A
Date of Birth: N/A
Top Songs: N/A
Information for DOJA CAT:
Genre: N/A
Country of Origin: N/A
Date of Birth: N/A
Top Songs: N/A
Information for PACKAGE DOJA CAT:
Genre: N/A
Country of Origin: N/A
Date of Birth: N/A
Top Songs: N/A


Next Step is to fix bugs and display those information on the map.