In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
df_hotel = pd.read_csv('src/scrap_booking.csv')
df_meteo7j = pd.read_csv('src/meteo_villes_7j.csv', index_col=0)
df_meteo_daily = pd.read_csv('src/meteo_villes_daily.csv', index_col=0)

In [3]:
df_hotel['hotel_nb_reviews'] = df_hotel['hotel_note_nbr_review'].str.split(n=2, expand=True)[0]

In [4]:
df_hotel.head()

Unnamed: 0,hotel_city,hotel_name,hotel_address,hotel_note_name,hotel_note_nbr,hotel_note_nbr_review,hotel_link,hotel_desc,hotel_lat,hotel_lon,hotel_nb_reviews
0,Le Mont Saint Michel,Mont B&B,Pontorson,Superb,9.2,959 reviews,https://www.booking.com/hotel/fr/mont-b-amp-b....,Mont B&B in Pontorson features 3-star accommod...,48.552175,-1.509762,959
1,Le Mont Saint Michel,Maeva,Pontorson,Superb,9.0,13 reviews,https://www.booking.com/hotel/fr/maeva-pontors...,"Maeva is located in Pontorson, 10 km from Mont...",48.552925,-1.512163,13
2,Le Mont Saint Michel,Village de Gîtes de l'Anse de Moidrey,Moidrey,Very good,8.3,187 reviews,https://www.booking.com/hotel/fr/village-de-gi...,Offering an indoor swimming pool and a tennis ...,48.579988,-1.516531,187
3,Le Mont Saint Michel,Clos Margottières,Le Val-Saint-Père,Exceptional,9.6,57 reviews,https://www.booking.com/hotel/fr/clos-margotti...,Clos Margottières is located in Le Val-Saint-P...,48.663746,-1.394829,57
4,Le Mont Saint Michel,La Parenthèse de la Baie,Courtils,Good,7.4,203 reviews,https://www.booking.com/hotel/fr/la-parenthese...,"Set in Courtils in the Lower Normandy region, ...",48.629403,-1.428644,203


In [5]:
df_hotel['hotel_city'] = df_hotel['hotel_city'].replace('Ariège', 'Ariege')

In [6]:
df_hotel['hotel_city'].unique()

array(['Le Mont Saint Michel', 'Saint Malo', 'Bayeux', 'Le Havre',
       'Rouen', 'Paris', 'Amiens', 'Lille', 'Strasbourg',
       'Le Château du Haut-Koenigsbourg', 'Colmar', 'Eguisheim',
       'Besançon', 'Dijon', 'Annecy', 'Grenoble', 'Lyon',
       'Gorges du Verdon', 'Bormes-les-Mimosas', 'Cassis', 'Marseille',
       'Aix-en-Provence', 'Avignon', 'Uzès', 'Nîmes', 'Aigues-Mortes',
       'Saintes-Maries-de-la-Mer', 'Collioure', 'Carcassonne', 'Ariege',
       'Toulouse', 'Montauban', 'Biarritz', 'Bayonne', 'La Rochelle'],
      dtype=object)

In [7]:
df_meteo_daily.head()

Unnamed: 0,index,time,precipitation_sum,rain_sum,temperature_2m_min,temperature_2m_max,city,lat,lon
0,0,2022-12-18,14.8,14.7,-2.8,8.7,Mont Saint Michel,48.64,-1.51
1,1,2022-12-19,4.5,3.2,8.5,12.1,Mont Saint Michel,48.64,-1.51
2,2,2022-12-20,14.1,12.4,9.2,12.8,Mont Saint Michel,48.64,-1.51
3,3,2022-12-21,1.7,1.4,5.5,10.9,Mont Saint Michel,48.64,-1.51
4,4,2022-12-22,7.0,5.6,10.8,13.2,Mont Saint Michel,48.64,-1.51


In [8]:
df_meteo7j.head()

Unnamed: 0,City,verif,lat,lon,mean_7d_precipitation,mean_7d_rain,mean_7d_temp_min,mean_7d_temp_max
0,Mont Saint Michel,"Mont Saint-Michel, Avancée des Bombardes, Le M...",48.64,-1.51,8.67,7.5,7.89,12.31
1,St Malo,"Saint-Malo, Ille-et-Vilaine, Bretagne, France ...",48.65,-2.03,7.29,6.09,7.71,12.17
2,Bayeux,"Bayeux, Calvados, Normandie, France métropolit...",49.28,-0.7,5.57,5.24,7.6,12.26
3,Le Havre,"Le Havre, Seine-Maritime, Normandie, France mé...",49.49,0.11,6.24,5.4,7.83,11.57
4,Rouen,"Rouen, Seine-Maritime, Normandie, France métro...",49.44,1.09,7.49,6.86,6.81,10.94


In [9]:
fig = px.scatter_mapbox(df_meteo7j, lat="lat", lon="lon", size="mean_7d_precipitation", color='mean_7d_temp_min',
                        mapbox_style="open-street-map", zoom = 3.5, color_continuous_scale = 'Reds', width=800, height=500)
fig.show()

In [10]:
fig = px.scatter_mapbox(df_meteo_daily, lat="lat", lon="lon", size="precipitation_sum", color='temperature_2m_min',
                        mapbox_style="open-street-map", zoom = 3.5, color_continuous_scale = 'Reds', animation_frame='time', width=800, height=500)
fig.show()

In [11]:
top5 = [*df_meteo7j.sort_values('mean_7d_precipitation').head(5)['City']]

In [12]:
top5

['Carcassonne', 'Collioure', 'Marseille', 'Ariege', 'Cassis']

In [13]:
df_top_hotels = df_hotel.loc[:-1,:]

In [14]:
for city in top5:
    df = df_hotel[df_hotel['hotel_city'] == city]
    #Let's use only meaningful hotel notes, we will consider that any less than 50 reviews aren't enough to give a viable score
    df = df.dropna(subset='hotel_nb_reviews')
    df['hotel_nb_reviews'] = df['hotel_nb_reviews'].str.replace(',', '')
    df['hotel_nb_reviews'] = df['hotel_nb_reviews'].astype(int)
    df = df[df['hotel_nb_reviews'] > 50]
    #Let's sort the hotels by score
    df = df.sort_values(by = ['hotel_note_nbr'], ascending=[False])
    #And keep only the top 20 hotels
    df = df.iloc[:20,:]
    #Keep a ranking table to sort by best hotel amongst the 20 shortlisted hotels
    df['ranking'] = [x for x in range(len(df),0,-1)]
    #Append the global df
    df_top_hotels = df_top_hotels.append(df, ignore_index = True)


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [15]:
df_top_hotels['hotel_city'].unique()

array(['Carcassonne', 'Collioure', 'Marseille', 'Ariege', 'Cassis'],
      dtype=object)

In [16]:
px.scatter_mapbox(df_top_hotels, lat="hotel_lat", lon="hotel_lon", color = 'hotel_city', mapbox_style="carto-positron", size = 'ranking', title = 'Top 20 hotels in the top 5 places where the weather is best in France in the next 7 days', width=800, height=500, zoom=6)

In [17]:
px.scatter_mapbox(df_top_hotels[df_top_hotels['hotel_city'] == top5[0]], lat="hotel_lat", lon="hotel_lon", mapbox_style="carto-positron", size = 'ranking', title = top5[0], width=800, height=500, zoom=12)

In [18]:
px.scatter_mapbox(df_top_hotels[df_top_hotels['hotel_city'] == top5[1]], lat="hotel_lat", lon="hotel_lon", mapbox_style="carto-positron", size = 'ranking', title = top5[1], width=800, height=500, zoom=12)

In [19]:
px.scatter_mapbox(df_top_hotels[df_top_hotels['hotel_city'] == top5[2]], lat="hotel_lat", lon="hotel_lon", mapbox_style="carto-positron", size = 'ranking', title = top5[2], width=800, height=500, zoom=12)

In [24]:
px.scatter_mapbox(df_top_hotels[df_top_hotels['hotel_city'] == top5[3]], lat="hotel_lat", lon="hotel_lon", mapbox_style="carto-positron", size = 'ranking', title = top5[3], width=800, height=500, zoom=8)

In [26]:
px.scatter_mapbox(df_top_hotels[df_top_hotels['hotel_city'] == top5[4]], lat="hotel_lat", lon="hotel_lon", mapbox_style="carto-positron", size = 'ranking', title = top5[4], width=800, height=500, zoom=12)