# Webscrapping Project : Concerts Map 

## Webscrapping concerts data from the website offi

### Imports

In [1]:
import re 
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait

from IPython.display import display
from geopy.geocoders import Nominatim
import folium
import time

from IPython.core.display import display, HTML
from pyngrok import ngrok


### Extracting data 

In [2]:
url        = "https://www.offi.fr/concerts/prochainement.html"
requesting = requests.get(url)
soup       = BeautifulSoup(requesting.content, "html.parser")

In [3]:
concerts_layout=soup.find(class_='mini-fiche-container populaire-layout') # Un layout avec l'ensemble des informations sur les concerts dans des fiches

concerts =[]

for concert in concerts_layout.find_all(class_='column'):
    title = concert.find('span', itemprop='name').get_text(strip=True)
    location = concert.find(class_='event-place pt-2').get_text(strip=True)
    date = concert.find('b').get_text(strip=True)
    genre = [tag.text.strip() for tag in concert.find_all('span', class_='has-border item-info')]
    url = concert.find(itemprop='url')['href']
    concerts.append({"title": title, "location": location, "date": date, "genre": genre, "url": url})
    

print("Nombre de concerts : ", len(concerts),"\n")
for concert in concerts:
    print (concert)

Nombre de concerts :  100 

{'title': 'Boudchart', 'location': "L'Olympia", 'date': 'Lundi 1er Janvier : 16h00, 20h30, Mardi 2 Janvier : 20h30', 'genre': ['Oriental', 'Monde / Traditionnel'], 'url': 'https://www.offi.fr/concerts/lolympia-2874/boudchart-2410651.html'}
{'title': 'Umberto Tozzi', 'location': "L'Olympia", 'date': 'Dimanche 7 Janvier : 20h00', 'genre': ['Variété internationale', 'Pop / Rock'], 'url': 'https://www.offi.fr/concerts/lolympia-2874/umberto-tozzi-2335509.html'}
{'title': 'The Musical Story of Elvis', 'location': "L'Olympia", 'date': 'Lundi 8 Janvier : 20h00', 'genre': ['Rock', 'Pop / Rock'], 'url': 'https://www.offi.fr/concerts/lolympia-2874/the-musical-story-of-elvis-2311439.html'}
{'title': 'Kery James', 'location': 'Salle Pleyel', 'date': 'Mercredi 10 Janvier : 20h00', 'genre': ['Rap', 'Musiques urbaines'], 'url': 'https://www.offi.fr/concerts/salle-pleyel-3159/kery-james-2331357.html'}
{'title': 'Louis Bertignac', 'location': 'Salle Pleyel', 'date': 'Samedi 1

### Map of concerts

In [12]:
def geocode_location(location):
    geolocator = Nominatim(user_agent="geoapiExercises")
    try:
        loc = geolocator.geocode(location)
        return (loc.latitude, loc.longitude)
    except:
        return None

# Geocode each concert location
for concert in concerts:
    coords = geocode_location(concert['location'])
    if coords:
        concert['coords'] = coords
    else:
        concert['coords'] = (0, 0)  # Default coords, adjust as necessary

# Group concerts by location
concerts_by_location = {}
for concert in concerts:
    key = concert['coords']
    if key not in concerts_by_location:
        concerts_by_location[key] = {'name': concert['location'], 'concerts': []}
    concerts_by_location[key]['concerts'].append(concert)

# Create a map and add markers
map = folium.Map(location=[48.8566, 2.3522], zoom_start=12)  # Centered on Paris

for location, data in concerts_by_location.items():
    location_name = data['name']
    concerts_at_location = data['concerts']

    popup_content = f"<h3>{location_name}</h3><ul>"
    for concert in concerts_at_location:
        # Create a hyperlink for each concert
        concert_link = f"<a href='{concert['url']}' target='_blank'>{concert['title']}</a>"
        popup_content += f"<li>{concert_link} - {concert['date']}</li>"
    popup_content += '</ul>'

    # Use BeautifulSoup to format HTML in the popup
    soup = BeautifulSoup(popup_content, 'html.parser')
    popup_content = str(soup)

    folium.Marker(
        location=location,
        popup=folium.Popup(popup_content, max_width=300),
        tooltip=location_name
    ).add_to(map)

# Add layer control and display the map
folium.LayerControl().add_to(map)
display(map)

Un screen de la map est disponible sur le github https://github.com/Timothevtl/concertify

#### Save the map in html

In [21]:
# Save the map as an HTML file
map.save("templates/concert_map.html")

### API

app = Flask(__name__)

@app.route('/')
def index():
    return render_template('concert_map.html')

@app.route('/api/concerts/<float:lat>/<float:lon>')
def get_concerts_at_location(lat, lon):
    concerts_at_location = []
    for concert in concerts:
        if concert['coords'] == (lat, lon):
            concerts_at_location.append({'title': concert['title'], 'date': concert['date']})
    return jsonify(concerts_at_location)

if __name__ == '__main__':
    app.run()


#### Update concerts data in flask app

In [8]:
%run app.py

{'title': 'Boris Grebenshchikov', 'location': 'Le Trianon', 'date': 'Samedi 23 Décembre : 19h00'}
{'title': 'Boudchart', 'location': "L'Olympia", 'date': 'Lundi 1er Janvier : 16h00, 20h30, Mardi 2 Janvier : 20h30'}
{'title': 'Umberto Tozzi', 'location': "L'Olympia", 'date': 'Dimanche 7 Janvier : 20h00'}
{'title': 'The Musical Story of Elvis', 'location': "L'Olympia", 'date': 'Lundi 8 Janvier : 20h00'}
{'title': 'Kery James', 'location': 'Salle Pleyel', 'date': 'Mercredi 10 Janvier : 20h00'}
{'title': 'Louis Bertignac', 'location': 'Salle Pleyel', 'date': 'Samedi 13 Janvier : 20h00'}
{'title': 'Jay Chou', 'location': 'Paris La Défense Arena', 'date': 'Samedi 13 Janvier : 20h00'}
{'title': 'Hugues Aufray', 'location': "L'Olympia", 'date': 'Dimanche 14 Janvier : 20h00'}
{'title': 'One Night of Queen', 'location': 'Le Zénith Paris - La Villette', 'date': 'Mar 16 Janvier : 20h00'}
{'title': 'Slowdive', 'location': 'La Cigale', 'date': 'Mercredi 17 Janvier : 20h00'}
{'title': 'Simple Plan', 

 * Restarting with windowsapi reloader
 * Restarting with windowsapi reloader


# Ideas for future improvement

Use data from apple music and then scrap favourite artists : https://medium.com/swlh/apple-music-activity-analyser-part-1-dd02173f095f

# Pushing updates on github repository

In [None]:
!git clone https://github.com/Timothevtl/concertify.git

Cloning into 'concertify'...


In [None]:
cd concertify


c:\Users\Cyprien\OneDrive - De Vinci\Documents\ESILV\A5\Webscrapping\Project\concertify


In [5]:
!git pull

Merge made by the 'ort' strategy.
 Concertify.ipynb | 369 ++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 267 insertions(+), 102 deletions(-)


In [7]:
!git add .
!git commit -m "Update project.ipynb with new changes"
!git push origin main




[main fe4b7e2] Update project.ipynb with new changes
 1 file changed, 129 insertions(+), 111 deletions(-)


To https://github.com/Timothevtl/concertify.git
   dc73481..fe4b7e2  main -> main
