# Les _notebooks_ et `Git`

::: {.callout-important}
## A ne pas reproduire chez vous

Ce document vise à illustrer les enjeux du contrôle de version avec un _notebook_. Il ne correspond absolument pas à un modèle à reproduire, bien au contraire.
:::

In [3]:
import requests
from tqdm import tqdm
import tempfile

url_bpe = "https://www.insee.fr/fr/statistiques/fichier/8217525/BPE23.parquet"


def download_file_to_tempfile(url: str):
    try:
        # Send a GET request to the URL
        response = requests.get(url, stream=True)
        # Raise an exception for HTTP errors
        response.raise_for_status()
        
        # Get the total file size from the headers (if available)
        total_size = int(response.headers.get('content-length', 0))
        block_size = 1024  # 1 Kilobyte

        # Progress bar setup
        progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True, desc="Downloading file")
        
        # Create a temporary file
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.parquet')
        temp_filename = temp_file.name
        
        # Write the content to the temporary file
        with temp_file as file:
            for chunk in response.iter_content(chunk_size=block_size):
                progress_bar.update(len(chunk))
                file.write(chunk)
        
        progress_bar.close()
        print(f"File downloaded successfully to temp file: {temp_filename}")
        return temp_filename  # Return the path of the temporary file
    
    except requests.exceptions.RequestException as e:
        print(f"Failed to download the file: {e}")
        return None

In [4]:
temp_file_path = download_file_to_tempfile(url_bpe)

Downloading file: 173MiB [00:10, 16.2MiB/s] 

File downloaded successfully to temp file: /tmp/tmpgw8ctvx6.parquet





In [None]:
import duckdb

df = duckdb.sql(
    f"""
        SELECT *
        FROM read_parquet('{temp_file_path}') 
        WHERE
            starts_with(TYPEQU, 'F1')
            AND longitude BETWEEN 2.327943 AND 2.399054
            AND latitude BETWEEN 48.912967 AND 48.939077
    """
).to_df()

In [11]:
df.head(20)

Unnamed: 0,AN,NOMRS,CNOMRS,NUMVOIE,INDREP,TYPVOIE,LIBVOIE,CADR,CODPOS,DEPCOM,...,ZUS,QUALI_ZUS,EPCI,UU2020,BV2022,AAV2020,DENS3,DENS7,LIBCOM,geometry
0,2023,COMPLEXE SPORTIF JEAN MOULIN,TERRAIN DE BOULES COUVERT,,,AV,JEAN MOULIN,,92390,92078,...,HZ,3,200054781,851,75056,1,1,1,VILLENEUVE-LA-GARENNE,POINT (2.33038 48.939)
1,2023,COMPLEXE SPORTIF JEAN MOULIN,TERRAIN DE TENNIS EXTERIEUR 3,,,AV,JEAN MOULIN,,92390,92078,...,HZ,3,200054781,851,75056,1,1,1,VILLENEUVE-LA-GARENNE,POINT (2.33038 48.939)
2,2023,STADE GASTON BOUILLANT,STADE D'ATHLETISME,,,AV,PIERRE DE COUBERTIN,,92390,92078,...,HZ,3,200054781,851,75056,1,1,1,VILLENEUVE-LA-GARENNE,POINT (2.32919 48.9387)
3,2023,ECOLE FOND DE LA NOUE,TERRAIN EXTERIEUR,1.0,,RUE,FOND DE LA NOUE,ETABLISSEMENT SCOLAIRE,92390,92078,...,1121030,3,200054781,851,75056,1,1,1,VILLENEUVE-LA-GARENNE,POINT (2.33054 48.93137)
4,2023,ESPACE PIERRE BROSSOLETTE,SALLE DE DANSE,,,RUE,PIERRE BROSSOLETTE,,92390,92078,...,HZ,3,200054781,851,75056,1,1,1,VILLENEUVE-LA-GARENNE,POINT (2.33435 48.93304)
5,2023,CENTRE CHOREGRAPHIQUE,SALLE DE DANSE,23.0,,QUAI,D ASNIERES,,92390,92078,...,HZ,3,200054781,851,75056,1,1,1,VILLENEUVE-LA-GARENNE,POINT (2.33511 48.93171)
6,2023,STADE GASTON BOUILLANT,TERRAIN DE FOOTBALL,,,AV,PIERRE DE COUBERTIN,,92390,92078,...,HZ,3,200054781,851,75056,1,1,1,VILLENEUVE-LA-GARENNE,POINT (2.32919 48.9387)
7,2023,COMPLEXE SPORTIF JEAN MOULIN,TERRAIN DE FOOTBALL,,,AV,JEAN MOULIN,,92390,92078,...,HZ,3,200054781,851,75056,1,1,1,VILLENEUVE-LA-GARENNE,POINT (2.33038 48.939)
8,2023,ECOLE FOND DE LA NOUE,GRAND GYMNASE,1.0,,RUE,FOND DE LA NOUE,ETABLISSEMENT SCOLAIRE,92390,92078,...,1121030,3,200054781,851,75056,1,1,1,VILLENEUVE-LA-GARENNE,POINT (2.33171 48.93107)
9,2023,ESPACE PIERRE BROSSOLETTE,GYMNASE,,,RUE,PIERRE BROSSOLETTE,,92390,92078,...,HZ,3,200054781,851,75056,1,1,1,VILLENEUVE-LA-GARENNE,POINT (2.33435 48.93304)


Bad pipe message: %s [b'36%2Ct%3D3%2Cp%3D4%24ZcqsKEZtfSq752VDrFf8qw%24Q1pYdw', b'QYAQXEmLGd54voNFZk6RdUsJA26%2Bd4kTqFE\r\npriority: u=0, i']
Bad pipe message: %s [b'accept-langu']
Bad pipe message: %s [b'e: fr-FR,fr;q=0.9,en-US;q=0.8,en;q=0.7\r\naccept-encoding: gzip, deflate, br, zstd\r\nreferer: https', b'/user-louis123-615770-0.user.lab.sspcloud.fr/?folder=/hom', b'onyxia/work\r\nsec-fetch-dest: document\r\nsec-fetch-user: ?1\r\nsec-fetch-mode: navigate\r\nsec-fetch-site:', b'ame-origin\r\naccept: text/html,a', b'lication/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange']
Bad pipe message: %s [b'=b3;q=0.7\r\nuser-agent: Mozilla/5.0 (Windows NT 10.0; Win64']
Bad pipe message: %s [b'x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0']
Bad pipe message: %s [b' Safari/537.36\r\nupgrade-insecure-requests: 1\r']
Bad pipe message: %s [b'ec-ch-ua-', b'atform: "Windows"\r\nsec-ch-ua-mobile: ?0\r\nsec-ch-ua: "Google Chrome";v="129"

In [7]:
!pip install geopandas



In [8]:
import geopandas as gpd

In [9]:
df = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.LONGITUDE, df.LATITUDE), crs="EPSG:4326"
)

In [10]:
import folium

# Create a base map centered around the average of the coordinates provided
map_center = [df['geometry'].y.mean(), df['geometry'].x.mean()]
folium_map = folium.Map(location=map_center, zoom_start=15)

# Iterate through the DataFrame and add markers
for _, row in df.iterrows():
    # Extract latitude and longitude from geometry
    lat, lon = row['geometry'].y, row['geometry'].x
    # Extract label and type for the popup and color
    name = row['NOMRS']
    typequ = row['TYPEQU']

    # Add a marker to the map
    folium.Marker(
        location=[lat, lon],
        popup=f"<b>{name}</b>\n \n \n<b>Catégorie</b>: {typequ}",  # Use NOMRS, Libelle_TYPEQU, and TYPEQU as the popup information
        icon=folium.Icon(icon="info-sign"),
    ).add_to(folium_map)

folium_map