# ITINERAIRE DE VACANCE - EXPLORATION DES DONNÉES

## 0. Import des modules

In [1]:
import pandas as pd
import json
from pprint import pprint
import seaborn as sns
import geopandas
import contextily as cx
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import folium
from geopy.geocoders import Nominatim # Pour trouver les coordonées d'une ville
from sklearn.neighbors import KNeighborsRegressor
import joblib
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
from sklearn.neighbors import NearestNeighbors
from pyroutelib3 import Router
import streamlit as st
from streamlit_folium import st_folium
import warnings
warnings.filterwarnings('ignore')

## 1. Import des données
### 1.1 JSON
#### 1.1.1 Index

In [2]:
objects_path = '../Data/JSON/objects/'
index_path = '../Data/JSON/index.json'

with open(index_path) as json_data:
    index = json.load(json_data)

L'index contient plusieurs informations:
* `file`: chemin d'accès vers les objets json
* `label`: nom du Point Of Interrest
* `lastUpdateDatatourisme`: dernière mise à jour du POI sur Datatourisme

In [3]:
pprint(index[:2])

#### 1.1.2 Exploration d'un objet json

In [4]:
jsonObject_path = objects_path + index[0]['file']

with open(jsonObject_path) as json_data:
    jsonObject = json.load(json_data)

Nous avons 15 clés dans notre objet json:
* **`@id`: Lien vers le POI.**
* `dc:identifier`: Identifiant unique
* **`@type`: La classification de POI. Par exemple : festival, musée, chambre d’hôtes...**
* `rdfs:comment`: Commentaire du POI
* **`rdfs:label`: Nom du Point Of Interrest**
* `hasBeenCreatedBy`: L'agent qui a créé ce POI dans le système d'information :
    * `@id`: Lien vers l'agent sur DATAtourisme
    * `dc:identifier`: Identifiant unique
    * `schema:email`: E-mail de l'agent
    * `schema:legalName`: Nom officiel de l'agent
    * `@type`: Type de l'agent
    * `foaf:homepage`: Lien vers le site de l'agent
* `hasBeenPublishedBy`: L'agent qui a publié ce POI dans le système d'information:
    * `@id`: Lien vers l'agent sur DATAtourisme
    * `schema:legalName`: Nom officiel de l'agent
    * `@type`: Type de l'agent
    * `foaf:homepage`: Lien vers le site de l'agent
* **`hasContact`: L'agent à contacter pour affaires générales relatives à ce POI:**
    * `@id`: Lien vers l'agent sur DATAtourisme
    * `schema:email`: E-mail de l'agent
    * **`schema:telephone`: Telephone de l'agent**
    * `@type`: Type de l'agent
    * **`foaf:homepage`: Lien vers le site de l'agent**
* **`hasDescription`: Description textuelle courte ou longue du POI pouvant être associée à une audience. Par exemple, un POI peut avoir une description dédiée aux écoles et une autre dédiée au grand public. Si une description n'a aucune audience renseignée, on suppose qu'elle est dédiée à tous les publics:**
    * `@id`: Lien vers la description
    * `@type`: Type(s) de la descripton
    * `hasTranslatedProperty`: Informations de traductions:
        * `@id`: Lien vers les informations de la traduction de la description
        * `dc:contributor`: Traducteur (api.deepl.com/v2/translate)
        * `rdf:language`: Langue de la traduction
        * `rdf:predicate`: Type de la description
    * **`shortDescription`: Dictionnaire des descriptions traduit en plusieurs langues:**
        * `fr`: En fr
        * `de`: ...
* **`hasTheme`: Liste des thèmes:**
    * `@id`: Identifiant du thème au format kb:[theme en anglais sans espace]
    * `@type`: Type du thème
    * `rdfs:label`: Thème en différente langue:
        * `fr`: fr
        * `de`: ...
* `hasTranslatedProperty`: liste d'informations de traductions sur le commentaire:
    * `@id`: Lien vers les informations de la traduction du commentaire
    * `dc:contributor`: Traducteur (api.deepl.com/v2/translate)
    * `rdf:language`: Langue de la traduction
    * `rdf:predicate`: Table de jointure entre @id et comment
* `isLocatedAt`: Informations sur l'emplacement géographique du POI:
    * `@id`: Lien vers les informations sur l'emplacement
    * `schema:address`: Informations sur l'adresse postale
        * `@id`: Lien vers les informations de l'adresse postale
        * **`schema:addressLocality`: Ville**
        * **`schema:postalCode`: Code postale**
        * **`schema:streetAddress`: Adresse postale (certaines données ne sont pas propres)**
        * `@type`: Type de l'adresse
        * `hasAddressCity`: Informations complémentaires sur l'adresse postale
            * `@id`: Identifiant
            * `@type`: Type de localisation (ex: City)
            * **`rdfs:label`: Liste de la ville traduite ['fr']**
            * `insee`: Code Insee
            * `isPartOfDepartment`: Informations du département
                * `@id`: Identifiant
                * `@type`: Type de localisation (ex: Department)
                * **`rdfs:label`: Liste du Department traduite ['fr']**
                * `insee`: Code Insee
                * `isPartOfRegion`: Informations du département:
                    * `@id`: Identifiant
                    * `@type`: Type de localisation (ex: Region)
                    * **`rdfs:label`: Liste de la Region traduite ['fr']**
                    * `insee`: Code Insee
                    * `isPartOfCountry`: Informations du Pays:
                        * `@id`: Identifiant
                        * `@type`: Type de localisation (ex: Country)
                        * **`rdfs:label`: Liste Du Pays traduite ['fr']**
    * `schema:geo`:
        * `@id`: Lien vers les informations coordonnées gépgraphiques
        * **`schema:latitude`: Latitude**
        * **`schema:longitude`: Longitude**
        * `@type`: Type
    * `schema:openingHoursSpecification`: Informations des horraires d'ouverture et fermeture 
        * `@id`: Lien vers les informations des horraires
        * **`schema:closes`: Heure de fermeture**
        * **`schema:opens`: Heure d'ouverture**
        * **`schema:validFrom`: Date de début de période d'ouverture**
        * **`schema:validThrough`: Date de fin de période d'ouverture**
        * `@type`: Type
        * `additionalInformation`: Liste d'inforamtions supplémentaires traduit en plusieurs langues
        * `hasTranslatedProperty`: Liste des informations de traduction des infos supplémentaires
            * `@id`: Lien vers les infos de trad
            * `dc:contributor`: Traducteur (api.deepl.com/v2/translate)
            * `rdf:language`: Langue de la traduction
            * `rdf:predicate`: Table de jointure entre @id et additionalInformation
    * `@type`: Type de place
* `isOwnedBy`: Informations de l'agent propriétaire
    * `@id`: Lien vers l'agent sur DATAtourisme
    * `schema:email`: E-mail de l'agent
    * `schema:legalName`: Nom de l'agent
    * `@type`: Type de l'agent
    * `foaf:homepage`: Lien vers le site de l'agent    
* **`lastUpdate`: Dernière mise à jour du POI**
* `lastUpdateDatatourisme`: Dernière mise à jour du POI sur DATAtourisme

In [5]:
# pprint(jsonObject)

## 2. Préparation des données

### 2.1 Sélection des colonnes et Aplatissement du json

In [6]:
def dataframeFromJson(objects_path, index_path):
    listOfPathJson = []
    
    for obj in index:
        listOfPathJson.append(objects_path + obj['file'])
        
    listOfJson = []
    
    for json_path in listOfPathJson:
        with open(json_path) as json_data:
            jsonObject = json.load(json_data)
            listOfJson.append(jsonObject)
    
    col_select_raw = [
    '@id',
    'rdfs:label.fr',
    '@type',
    'hasContact',
    'hasDescription',
    'hasTheme',
    'isLocatedAt',
    'lastUpdate'
    ]
    
    df = pd.json_normalize(listOfJson, errors='ignore')[col_select_raw]
    
    clean_name = {
        '@id': 'id',
        'rdfs:label.fr': 'nom',
        '@type': 'type',
        'hasContact': 'contact',
        'hasDescription': 'description',
        'hasTheme': 'theme',
        'isLocatedAt': 'location',
        'lastUpdate': 'maj'
    }
    
    return df.rename(columns=clean_name)

In [7]:
df_raw = dataframeFromJson(objects_path, index_path)
df_raw.head(2)

### 2.2 Preprocessing des données

In [8]:
def preprocessData(df):
    df_preprocessed = df.copy()
    
    # Nom
    df_preprocessed['nom'] = df_preprocessed['nom'].map(lambda x: x[0])
    
    # Type
    def prepoType(types):
        if types[-1] == "PointOfInterest" and types[-2] != "PlaceOfInterest":
            return types[-2]
        elif types[-1] == "PointOfInterest" and types[-2] == "PlaceOfInterest":
            return types[-3]
        else:
            return types[-1]
        
        
    df_preprocessed['type'] = df_preprocessed['type'].map(lambda x: prepoType(x))
    
    replace_type_dict = {
    "HolidayResort": "Accommodation",
    "SportsCompetition": "SportsAndLeisurePlace",
    "CampingAndCaravanning": "Accommodation",
    "HotelTrade": "Accommodation",
    "SwimmingPool": "SportsAndLeisurePlace",
    "LodgingBusiness": "Accommodation",
    "WalkingTour": "Activity",
    "ZooAnimalPark": "Activity",
    "Tour": "Activity",
    "TastingProvider": "FoodEstablishment",
    "ReligiousSite": "PlaceToSee",
    "EntertainmentAndEvent": "Activity",
    "District": "PlaceToSee",
    "SquashCourt": "SportsAndLeisurePlace",
    "ThemePark": "Activity",
    "TechnicalHeritage": "PlaceToSee",
    "ParkAndGarden": "PlaceToSee",
    "CollectiveAccommodation": "Accommodation",
    "Ruins": "PlaceToSee",
    "TennisComplex": "SportsAndLeisurePlace",
    "DefenceSite": "PlaceToSee",
    "TouristTrain": "Transport",
    "Transporter": "Transport",
    "CulturalSite": "PlaceToSee",
    "Restaurant": "FoodEstablishment",
    "Museum": "PlaceToSee"
}
    df_preprocessed['type2'] = df_preprocessed['type'].replace(replace_type_dict)
    
    # Contact
    df_preprocessed['contactId'] = df_preprocessed['contact']\
    .map(lambda x: x[0]['@id'] if type(x) != float else None)
    
    df_contact = pd.DataFrame()
    
    for el in df_raw['contact']:
        if type(el) != float:
            df_contact = pd.concat([df_contact, pd.json_normalize(el[0])])
    
    df_contact.set_index("@id", inplace=True)
    
    df_preprocessed = df_preprocessed.join(df_contact, on='contactId', how='left') \
               .drop_duplicates(subset='id') \
               .drop(columns=['contactId', 'contact', '@type'])
    
    df_preprocessed = df_preprocessed.rename(columns={
        'schema:email': 'email',
        'schema:telephone': 'telephone',
        'foaf:homepage': 'homepage'
    })
    
    df_preprocessed[['email', 'telephone', 'homepage']] = \
    df_preprocessed[['email', 'telephone', 'homepage']]\
    .applymap(lambda x: x[0] if type(x) != float else None)
    
    # Description
    df_preprocessed['description'] = \
    df_preprocessed['description']\
    .map(lambda x: x[0]["shortDescription"]["fr"][0] if type(x) != float else None)
    
    # Theme unique
    df_theme = pd.DataFrame()
    
    for el in df_preprocessed['theme']:
        if type(el) != float:
            df_theme = pd.concat([df_theme, pd.json_normalize(el)]).drop_duplicates(['@id'])
            
    df_theme_unique = df_theme.set_index("@id")[['@type', 'rdfs:label.fr']]
    df_theme_unique = df_theme_unique.rename(columns={
        '@type': 'typeTheme',
        'rdfs:label.fr': 'nomTheme'
    })
    df_theme_unique[['typeTheme', 'nomTheme']] = \
    df_theme_unique[['typeTheme', 'nomTheme']]\
    .applymap(lambda x: x[0] if type(x) != float else None)
    
    # Theme
    df_theme = df_preprocessed[['id', 'theme']].copy()
    
    df_theme['theme'] = df_theme['theme']\
    .map(lambda x: [el['@id'] for el in x] if type(x) != float else None)
    
    df_theme = df_theme.explode('theme')
    
    # Location
    df_location = pd.DataFrame()
    
    for el in df_preprocessed['location']:
        if type(el) != float:
            df_location = pd.concat([df_location, pd.json_normalize(el[0])])
    
    df_location = df_location.set_index("@id")
    
    d ={"id_adresse": [],
    "Ville": [],
    "CodePostale": [],
    "Departement": [],
    "Region": []}

    for el in df_location['schema:address']:
        id_adresse = el[0]['@id']
        ville = el[0]['schema:addressLocality']
        cp = el[0]['schema:postalCode']
        dep = el[0]['hasAddressCity']['isPartOfDepartment']['rdfs:label']['fr']
        reg = el[0]['hasAddressCity']['isPartOfDepartment']['isPartOfRegion']['rdfs:label']['fr']
        d['id_adresse'].append(id_adresse)
        d['Ville'].append(ville)
        d['CodePostale'].append(cp)
        d['Departement'].append(dep[0])
        d['Region'].append(reg[0])
        
    df_adresse = pd.DataFrame(d).set_index("id_adresse")
    df_location['id_adresse'] = df_location['schema:address'].map(lambda x: x[0]['@id'])
    df_location = df_location.join(df_adresse, how='left', on='id_adresse')
    
    d ={"id_opening": [],
    "Close": [],
    "Open": [],
    "validFrom": [],
    "validThrough": []}

    for el in df_location['schema:openingHoursSpecification']:
        id_opening = el[0]['@id']
        try:
            Close = el[0]['schema:closes']
        except:
            Close = None
        try:
            Open = el[0]['schema:opens']
        except:
            Open = None
        try:
            validFrom = el[0]['schema:validFrom']
        except:
            validFrom = None
        try:
            validThrough = el[0]['schema:validThrough']
        except:
            validThrough = None
        d['id_opening'].append(id_opening)
        d['Close'].append(Close)
        d['Open'].append(Open)
        d['validFrom'].append(validFrom)
        d['validThrough'].append(validThrough)
        
    df_opening = pd.DataFrame(d).set_index("id_opening")
    df_location['id_opening'] = df_location['schema:openingHoursSpecification'].map(lambda x: x[0]['@id'])
    df_location = df_location.join(df_opening, how='left', on='id_opening')[[
    "schema:geo.schema:latitude",
    "schema:geo.schema:longitude",
    "Ville",
    "CodePostale",
    "Departement",
    "Region",
    "Close",
    "Open",
    "validFrom",
    "validThrough",
]]
    
    df_preprocessed['location'] = df_preprocessed['location'].map(lambda x: x[0]['@id'])
    df_preprocessed = df_preprocessed.join(df_location, on='location', how='left') \
               .drop_duplicates(subset='id') \
               .drop(columns=['location'])
    
    df_preprocessed = df_preprocessed.rename(columns={
    'schema:geo.schema:latitude': 'latitude',
    'schema:geo.schema:longitude': 'longitude'
})
    
    df_preprocessed[['latitude', 'longitude']] = \
    df_preprocessed[['latitude', 'longitude']].astype(float)
    
    return df_preprocessed, df_theme_unique, df_theme

In [9]:
df_preprocessed, df_theme_unique, df_theme = preprocessData(df_raw)

In [10]:
df_preprocessed.to_csv("../Data/POI.csv")
df_theme.to_csv("../Data/theme.csv")
df_theme_unique.to_csv("../Data/theme_unique.csv")

### Application
#### 1.Hotel

In [11]:
def find_coord(ville: str):
    geolocator = Nominatim(user_agent="my_geocoder")
    location = geolocator.geocode(ville)
    return location.latitude, location.longitude

In [12]:
knnHotel = KNeighborsRegressor(n_neighbors=1)

In [13]:
df_preprocessed[df_preprocessed['type2'] == 'Accommodation'][['latitude', 'longitude']]

In [14]:
knnHotel.fit(df_preprocessed[df_preprocessed['type2'] == 'Accommodation'][['latitude', 'longitude']],
             df_preprocessed[df_preprocessed['type2'] == 'Accommodation'][['latitude', 'longitude']])

In [15]:
# joblib.dump(knnHotel, 'knnHotel.joblib')  

#### Itinéraire

In [16]:
def calcul_distance(point1: tuple, point2: tuple, moyenLocomtion: str):
    router = Router(moyenLocomtion)
    p1 = router.findNode(*point1)
    p2 = router.findNode(*point2)
    status, route = router.doRoute(p1, p2)
    
    if status == "success":
        route_latLon = list(map(router.nodeLatLon, route))
        total_distance = 0
        
        for i in range(len(route_latLon) - 1):
            current_coord = route_latLon[i]
            next_coord = route_latLon[i + 1]
            dist = router.distance(current_coord, next_coord)
            total_distance += dist
        return total_distance
    
    else:
        return("Pas de route trouvé ...")

In [17]:
def itineraire_villes(moyenLocomtion: str, *coordonneesHotels: tuple):
    router = Router(moyenLocomtion)
    listePoints = []
    listeCoordo = [coordo for coordo in coordonneesHotels]
    
    for hotel in coordonneesHotels:
        listePoints.append(router.findNode(*hotel))
        
    nombreVilles = len(listePoints)
    matriceDistance = [[0] * nombreVilles for _ in range(nombreVilles)]
    
    for i in range(nombreVilles):
        for j in range(i+1, nombreVilles):
            matriceDistance[i][j] = calcul_distance(listeCoordo[i],
                                                    listeCoordo[j],
                                                    moyenLocomtion)
    
    manager = pywrapcp.RoutingIndexManager(len(matriceDistance), 1, 0)
    routing = pywrapcp.RoutingModel(manager)
    
    def distance_callback(from_index, to_index):
        """Returns the distance between the two nodes."""
        # Convert from routing variable Index to distance matrix NodeIndex.
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return matriceDistance[from_node][to_node]

    transit_callback_index = routing.RegisterTransitCallback(distance_callback)
     
    # Setting first solution heuristic.
    search_parameters = pywrapcp.DefaultRoutingSearchParameters()
    search_parameters.first_solution_strategy = (
        routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)

    # Solve the problem.
    solution = routing.SolveWithParameters(search_parameters)
    
    plan_output = []
    index = routing.Start(0)
    
    while not routing.IsEnd(index):
        plan_output.append(manager.IndexToNode(index))
        previous_index = index
        index = solution.Value(routing.NextVar(index))
        
    plan_output.append(manager.IndexToNode(index))
    return plan_output

In [30]:
def print_map(moyenLocomtion: str, coordonneesHotels):
    router = Router(moyenLocomtion)
    listePoints = []
    listeCoord = []
    
    for hotel in coordonneesHotels:
        listePoints.append(router.findNode(*hotel))
        listeCoord.append(hotel)
    optmizePlan = itineraire_villes(moyenLocomtion, *listeCoord)
    m = folium.Map(location=(router.nodeLatLon(listePoints[0])),
                   tiles="OpenStreetMap",
                   zoom_start=16)
    for index in range(len(listePoints)):
        start = listePoints[optmizePlan[index]]
        end = listePoints[optmizePlan[index + 1]]
        status, route = router.doRoute(start, end)
        
        if status == 'success':
            route_latLon = list(map(router.nodeLatLon, route))
            
        else:
            router = Router("foot")
            
        folium.PolyLine(route_latLon, weight=5, opacity=.4).add_to(m)
        folium.Marker(
        location = router.nodeLatLon(start),
        popup = index, opacity=.8).add_to(m)
    
    return m

In [19]:
def find_iti_POI(hotel: tuple,
                 types: list,
                 max_poi,
                 algo,
                 df):
    
    data = df[df['type'].isin(types)][['latitude', 'longitude']]
    
    if algo == "NN":
        knn = NearestNeighbors(n_neighbors=max_poi)
        knn.fit(data)
        dist, indices = knn.kneighbors([hotel])
        indices = list(indices[0])
        df_POI_selected = data.iloc[indices]
        coordo_POI = list(map(lambda x: tuple(x),
                              df_POI_selected[['latitude', 'longitude']].values)) 
        return coordo_POI

A voir...
```def hotelChoice(ville: str, df):
    lat, lon = find_coord(ville)
    if df[(df["Ville"] == "Calvi") & \
          (df["type2"] == "Accommodation")].shape[0] > 0:```
Si on veut proposer des hotels

In [54]:
def appOneDay(types: list, ville: str, modeTransport: str, maxPoi: int, df, knn):
    latVille, lonVille = find_coord(ville)
    
    if df[(df["Ville"] == ville) & \
          (df["type2"] == "Accommodation")].shape[0] > 0:
        df_hotel = df[(df["Ville"] == ville) & \
          (df["type2"] == "Accommodation")].sample(1, random_state=1)
        latHotel, lonHotel = df_hotel.latitude.values[0], \
        df_hotel.longitude.values[0]
        
    else:
        latHotel, lonHotel = knn.predict([[latVille, lonVille]])[0]
        
    [(latVille, lonVille)]
        
    listeCoordoneesPoi = find_iti_POI(hotel=(latHotel, lonHotel),
                                      types=types,
                                      max_poi=maxPoi,
                                      algo='NN',
                                      df=df)
    
    total_coord = [(latVille, lonVille)] + listeCoordoneesPoi
    
    m = print_map(moyenLocomtion=modeTransport, coordonneesHotels=total_coord)
    return m

In [26]:
NB_JOUR = 0
TYPES = ['CulturalSite', 'WalkingTour', 'FoodEstablishment', 'SportsAndLeisurePlace']
VILLE = 'Calvi'
MODE_TRANSPORT = "foot"
MAX_POI = 5
# TYPES = list(df_preprocessed[df_preprocessed['Ville'] == 'Calvi'].type.unique())
appOneDay(TYPES, VILLE, MODE_TRANSPORT, MAX_POI, df_preprocessed, knnHotel)

In [22]:
NB_JOUR = 0
TYPES = ['CulturalSite', 'WalkingTour', 'SportsAndLeisurePlace']
VILLE = 'Calvi'
MODE_TRANSPORT = "foot"
MAX_POI = 5
# TYPES = list(df_preprocessed[df_preprocessed['Ville'] == 'Calvi'].type.unique())
appOneDay(TYPES, VILLE, MODE_TRANSPORT, MAX_POI, df_preprocessed, knnHotel)

In [55]:
NB_JOUR = 0
TYPES = ['CulturalSite', 'WalkingTour', 'SportsAndLeisurePlace', 'FoodEstablishment']
VILLE = "L'Île-Rousse"
MODE_TRANSPORT = "foot"
MAX_POI = 3
# TYPES = list(df_preprocessed[df_preprocessed['Ville'] == 'Calvi'].type.unique())
appOneDay(TYPES, VILLE, MODE_TRANSPORT, MAX_POI, df_preprocessed, knnHotel)

In [41]:
list(df_preprocessed.type.unique())

['Store',
 'SportsAndLeisurePlace',
 'TouristInformationCenter',
 'CulturalSite',
 'HolidayResort',
 'SportsEvent',
 'CampingAndCaravanning',
 'FoodEstablishment',
 'Transporter',
 'HotelTrade',
 'Restaurant',
 'SwimmingPool',
 'Accommodation',
 'WalkingTour',
 'ZooAnimalPark',
 'Tour',
 'TastingProvider',
 'ReligiousSite',
 'EntertainmentAndEvent',
 'Transport',
 'District',
 'SquashCourt',
 'ThemePark',
 'TechnicalHeritage',
 'ParkAndGarden',
 'CollectiveAccommodation',
 'Ruins',
 'TennisComplex',
 'DefenceSite',
 'Museum',
 'TouristTrain']

In [44]:
df_preprocessed[df_preprocessed['type2']!='Accommodation'].Ville

0                      Calvi
1              Porto-Vecchio
2                      Calvi
3      San-Gavino-di-Carbini
6               L'Île-Rousse
               ...          
618                    Pigna
619                  Corbara
620                    Calvi
621                    Calvi
622                    Calvi
Name: Ville, Length: 501, dtype: object

In [48]:
df_preprocessed[df_preprocessed['Ville'] == 'Porto-Vecchio'].type.value_counts()

type
FoodEstablishment          38
SportsAndLeisurePlace      35
Store                      30
HolidayResort              14
CulturalSite               12
Restaurant                 11
Accommodation              11
CampingAndCaravanning      10
HotelTrade                  5
WalkingTour                 5
TastingProvider             2
District                    2
TechnicalHeritage           2
ThemePark                   1
ReligiousSite               1
Transport                   1
SportsEvent                 1
CollectiveAccommodation     1
Name: count, dtype: int64

In [60]:
list(df_preprocessed.Ville.value_counts().index)

['Porto-Vecchio',
 'Calvi',
 "L'Île-Rousse",
 'Lecci',
 'Bonifacio',
 'Monticello',
 'Algajola',
 'Galéria',
 'Belgodère',
 'Lumio',
 'Calenzana',
 'Figari',
 'Pigna',
 'Ota',
 'Saint-Florent',
 'Corbara',
 'Aregno',
 'Pianottoli-Caldarello',
 'Quenza',
 'Santo-Pietro-di-Tenda',
 'Lama',
 'Palasca',
 'Pioggiola',
 'Santa-Reparata-di-Balagna',
 'Cateri',
 'Montegrosso',
 'Speloncato',
 "Sant'Antonino",
 'Sotta',
 'Occhiatana',
 'Feliceto',
 "Monacia-d'Aullène",
 'Corte',
 'Sari-Solenzara',
 'Moncale',
 'Olmeta-di-Tuda',
 'Olmi-Cappella',
 'Pietralba',
 'Urtaca',
 'Calacuccia',
 'Furiani',
 'Propriano',
 'Ville-di-Paraso',
 'Castello-di-Rostino',
 'Manso',
 'San-Gavino-di-Carbini',
 'Avapessa',
 'Zilia',
 'Nessa',
 'Costa',
 'Muro',
 'Vivario',
 'Mausoléo',
 'Novella',
 'Chisa',
 'Asco',
 'Casaglione',
 'Serra-di-Scopamène']

In [56]:
trad_transport = {
    "À pied": 'foot',
    "Voiture": 'car',
    "Cheval": 'horse'
}

In [None]:
trad_transport[]