In [1]:
# data
import numpy as np
import pandas as pd

# machine learning
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# data visualization
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
path = "proj_adad.xlsx"

small_dataset = pd.read_excel(path, header=0)

In [3]:
#@title Code — Sélectionner les colonnes
# df = small_dataset.loc[:, ('User_ID', 'Age', 'Gender')]

print(f'Nombre total de lignes : {len(small_dataset.index)}\n\n')
small_dataset.head()

Nombre total de lignes : 5374




Unnamed: 0,Nom,Code_Postal,libelle_geographique,code_insee,Code_Insee_Arrondt,Type_equipement_ou_lieu,Label_et_appellation,Domaine,Adresse_postale,Departement,Precision_equipement,N_Departement,GCD,AAV,Latitude,Longitude,coordonnees_geo
0,village,5460,Abriès-Ristolas,5001,51,Monument,Architecture contemporaine remarquable,Patrimoine,05460 Abriès-Ristolas,Hautes-Alpes,Urbanisme et espaces aménagés,5,6 - Rural à habitat dispersé,30 - Hors attraction des villes,44.7747,6.953637,"44.7747, 6.953637"
1,Ensemble paroissial d'Abriès,5460,Abriès-Ristolas,5001,51,Monument,Monument historique,Patrimoine,pl. de l'Eglise 05460 Abriès-Ristolas,Hautes-Alpes,Monument historique inscrit,5,6 - Rural à habitat dispersé,30 - Hors attraction des villes,44.797697,6.928415,"44.797697, 6.928415"
2,Vieille Halle,5460,Abriès-Ristolas,5001,51,Monument,Monument historique,Patrimoine,05460 Abriès-Ristolas,Hautes-Alpes,,5,6 - Rural à habitat dispersé,30 - Hors attraction des villes,44.804252,6.935427,"44.804252, 6.935427"
3,Tunnel de la Traversette ou Pertuis du Viso,5460,Abriès-Ristolas,5001,51,Monument,Monument historique,Patrimoine,05460 Abriès-Ristolas,Hautes-Alpes,Monument historique inscrit,5,6 - Rural à habitat dispersé,30 - Hors attraction des villes,44.7747,6.953637,"44.7747, 6.953637"
4,Bibliothèque d' Abriès,5460,Abriès-Ristolas,5001,51,Bibliothèque,,Lecture publique,pl. de l'Eglise 05460 Abriès-Ristolas,Hautes-Alpes,Bibliothèque municipale,5,6 - Rural à habitat dispersé,30 - Hors attraction des villes,44.794455,6.928294,"44.7944554765678, 6.92829357048155"


In [4]:
# pre-filter (data pre-processing)

small_dataset_pre_filter = small_dataset[
    (small_dataset["Latitude"].between(42.8, 45.2)) &
    (small_dataset["Longitude"].between(4.2, 7.8))
]

In [5]:
def precise_coords(x):
    s = str(x)
    return len(s.split(".")[1]) if "." in s else 0

In [6]:
mask_keep_precise = (
    small_dataset_pre_filter["Latitude"].apply(precise_coords) >= 3
) & (
    small_dataset_pre_filter["Longitude"].apply(precise_coords) >= 3
)

df = small_dataset_pre_filter[mask_keep_precise]

In [7]:
colors = [
    "red", "blue", "green", "purple", "orange",
    "darkred", "lightred", "beige", "darkblue",
    "darkgreen", "cadetblue", "darkpurple",
    "white", "pink", "lightblue", "lightgreen",
    "gray", "black", "lightgray"
]

df["color"] = (
    df["Type_equipement_ou_lieu"]
      .astype("category")
      .cat.codes
      .apply(lambda i: colors[i % len(colors)])
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["color"] = (


In [8]:
df.describe(include='all')

Unnamed: 0,Nom,Code_Postal,libelle_geographique,code_insee,Code_Insee_Arrondt,Type_equipement_ou_lieu,Label_et_appellation,Domaine,Adresse_postale,Departement,Precision_equipement,N_Departement,GCD,AAV,Latitude,Longitude,coordonnees_geo,color
count,5350,5350.0,5350,5350.0,5350.0,5350,3194,5350,5350,5350,4490,5350.0,5350,5350,5350.0,5350.0,5350,5350
unique,4542,,769,,,19,32,8,4198,6,83,,7,9,,,4304,19
top,Eglise,,Nice,,,Monument,Monument historique,Patrimoine,13104 Arles,Bouches-du-Rhône,Monument historique inscrit,,1 - Grands centres urbains,13 - Pôles de 200 000 à moins de 700 000 hab.,,,"43.543797, 4.630504",cadetblue
freq,75,,256,,,2659,2372,3034,37,1667,1455,,1820,922,,,33,2659
mean,,34858.427477,,34663.165421,347.872336,,,,,,,34.580374,,,43.729772,5.878745,,
std,,35569.585967,,35531.724412,355.376677,,,,,,,35.541136,,,0.402935,0.852002,,
min,,4000.0,,4004.0,41.0,,,,,,,4.0,,,42.999315,4.422904,,
25%,,6230.0,,6088.0,62.0,,,,,,,6.0,,,43.44705,5.161563,,
50%,,13104.0,,13058.0,132.0,,,,,,,13.0,,,43.693506,5.713092,,
75%,,83400.0,,83098.0,832.0,,,,,,,83.0,,,43.943698,6.632803,,


In [9]:
import folium

In [10]:
m = folium.Map(
    location=[43.9, 6.0], # centre PACA
    zoom_start=7
)

for _, row in df.iterrows():
    folium.Marker(
        location=[row["Latitude"], row["Longitude"]],
        popup=f"""
        <b>{row['Label_et_appellation']}</b><br>
        Type: {row['Type_equipement_ou_lieu']}
        """,
        icon=folium.Icon(
            color=row["color"],
            icon="info-sign"
        )
    ).add_to(m)

m

In [11]:
m.save("paca_monuments.html")

In [12]:
grands_centres_urbains_df = (
    df.loc[df["GCD"] == "1 - Grands centres urbains", ["libelle_geographique"]]
      .drop_duplicates()
)
grands_centres_urbains_df.head(100)

Unnamed: 0,libelle_geographique
9,Antibes
11,Cagnes-sur-Mer
12,Cannes
14,Le Cannet
17,Mandelieu-la-Napoule
20,Nice
22,Saint-Laurent-du-Var
24,Vallauris
25,Villeneuve-Loubet
29,Aix-en-Provence


In [13]:
ceintures_urbaines_df = (
    df.loc[df["GCD"] == "4 - Ceintures urbaines", ["libelle_geographique"]]
      .drop_duplicates()
)
ceintures_urbaines_df.head(100)

Unnamed: 0,libelle_geographique
19,Mougins
70,La Fare-les-Oliviers
77,Saint-Chamas
98,Velaux
116,Beaulieu-sur-Mer
...,...
925,Saint-Mandrier-sur-Mer
1330,Opio
1725,Saint-Paul-de-Vence
3510,La Celle
