# Project: Plan your trip with Kayak #

## 1. Preparation steps ##

### Import libraries ###

In [1]:
import pandas as pd
from datetime import datetime

import requests
import json

### List cities of interest ###

In [2]:
cities = [
    "Mont Saint Michel",
    "St Malo",
    "Bayeux",
    "Le Havre",
    "Rouen",
    "Paris",
    "Amiens",
    "Lille",
    "Strasbourg",
    "Chateau du Haut Koenigsbourg",
    "Colmar",
    "Eguisheim",
    "Besancon",
    "Dijon",
    "Annecy",
    "Grenoble",
    "Lyon",
    "Gorges du Verdon",
    "Bormes les Mimosas",
    "Cassis",
    "Marseille",
    "Aix en Provence",
    "Avignon",
    "Uzes",
    "Nimes",
    "Aigues Mortes",
    "Saintes Maries de la mer",
    "Collioure",
    "Carcassonne",
    "Ariege",
    "Toulouse",
    "Montauban",
    "Biarritz",
    "Bayonne",
    "La Rochelle"
]

## 2. Get weather data with an API ##

### Get cities GPS coordinates ###

In [3]:
### GET CITIES GPS COORDINATES ###

# Initialize variables
id_city = 0
cities_wo_info = []
df_cities = pd.DataFrame(columns = ['id', 'city', 'latitude', 'longitude'])

# Get informations about cities and store them into the df_cities dataframe
for city in cities:
    
    try:

        # Get city coordinates by an API request and store results of interest
        coordinates = requests.get("https://nominatim.openstreetmap.org/search?", params = {"city":city, "country":"France", "format":"json"})
        row_city = {'id':id_city, 'city':city, 'latitude':coordinates.json()[0]['lat'], 'longitude':coordinates.json()[0]['lon']}

        # Increment city id
        id_city += 1

        # Add city info to the dataframe
        df_cities = pd.concat([df_cities, pd.DataFrame([row_city])]).reset_index(drop = True)
    
    except:

        # Save city name without information
        cities_wo_info.append(city)
    

print(f"La recherche des coordonnées GPS pour les destinations demandées est terminée. \
      \nPas d'informations trouvées pour les destinations suivantes (elles ne pourront donc être pas inclues dans les recherches suivantes): {', '.join(cities_wo_info)}.")

La recherche des coordonnées GPS pour les destinations demandées est terminée.       
Pas d'informations trouvées pour les destinations suivantes (elles ne pourront donc être pas inclues dans les recherches suivantes): Gorges du Verdon, Uzes, Ariege.


In [None]:
df_cities.head()

Unnamed: 0,id,city,latitude,longitude
0,0,Mont Saint Michel,48.6359541,-1.511459954959514
1,1,St Malo,48.649518,-2.0260409
2,2,Bayeux,49.2764624,-0.7024738
3,3,Le Havre,49.4938975,0.1079732
4,4,Rouen,49.4404591,1.0939658


### Get cities weather predictions ###

In [None]:
import numpy as np

In [None]:
### GET CITIES WEATHER PREDICTIONS FOR THE 5 NEXT DAYS ###

# Initialize variables
df_weather = pd.DataFrame(columns = ['id', 'date_time', 'temperature', 'humidity', 'wind_speed', 'pop', 'rain'])

# Get informations about weather in each city and store them into the df_weather dataframe
for id_city in range(len(df_cities)):

    # Prepare parameters for API request
    params_current = {
        'lat':df_cities.loc[id_city, 'latitude'],
        'lon':df_cities.loc[id_city, 'longitude'],
        'units':'metric',
        'appid':'7b2fc90b87c669c0d576b2ecc4f7f810',
    }
    params_forecast = {
        'lat':df_cities.loc[id_city, 'latitude'],
        'lon':df_cities.loc[id_city, 'longitude'],
        'units':'metric',
        'cnt':'40',
        'appid':'7b2fc90b87c669c0d576b2ecc4f7f810',
    }

    # Request API to obtain weather infos
    weather_current = requests.get("https://api.openweathermap.org/data/2.5/weather?", params = params_current)
    weather_forecast = requests.get("https://api.openweathermap.org/data/2.5/forecast?", params = params_forecast)
    
    # Store current weather into the df_weather dataframe
    date_time = weather_current.json()['dt']
    temp = weather_current.json()['main']['temp']
    humidity = float(weather_current.json()['main']['humidity'])
    wind_speed = weather_current.json()['wind']['speed']
    pop = np.nan
    rain = np.nan
    row_weather = {'id':id_city, 'date_time':date_time, 'temperature':temp, 'humidity':humidity, 'wind_speed':wind_speed, 'pop':pop, 'rain':rain}
    df_weather = pd.concat([df_weather, pd.DataFrame([row_weather])]).reset_index(drop = True)

    # Store weather data into the df_weather dataframe
    # Use an iteration to catch every 3h hours data for the 5 days per city
    for iter in range(40):

        # Save informations as individual variables
        date_time = weather_forecast.json()['list'][iter]['dt']
        temp = weather_forecast.json()['list'][iter]['main']['temp']
        humidity = float(weather_forecast.json()['list'][iter]['main']['humidity'])
        wind_speed = weather_forecast.json()['list'][iter]['wind']['speed']
        pop = weather_forecast.json()['list'][iter]['pop']
        
        # rain parameter isn't always alvaible so raise a try / except condition
        try:
            rain = weather_forecast.json()['list'][iter]['rain']['3h']
        except:
            rain = np.nan

        # Load informations into the df_weather dataframe
        row_weather = {'id':id_city, 'date_time':date_time, 'temperature':temp, 'humidity':humidity, 'wind_speed':wind_speed, 'pop':pop, 'rain':rain}
        df_weather = pd.concat([df_weather, pd.DataFrame([row_weather])]).reset_index(drop = True)

In [None]:
df_weather.head()

Unnamed: 0,id,date_time,temperature,humidity,wind_speed,pop,rain
0,0,1681836406,12.39,79.0,6.14,,
1,0,1681840800,12.39,83.0,6.09,0.61,0.53
2,0,1681851600,10.72,87.0,4.19,0.0,
3,0,1681862400,9.01,91.0,4.75,0.0,
4,0,1681873200,6.53,95.0,3.84,0.0,


### Get the top 5 destinations ###

In [None]:
# Define the top 5 destinations based on 1/ humidity 2/ rain 3/ temperature 4/ wind speed and 5/ pop
df_weather_mean = df_weather.groupby('id')[['humidity', 'rain', 'pop', 'temperature', 'wind_speed']].mean().sort_values(['humidity', 'rain', 'pop', 'temperature', 'wind_speed'], ascending = [True, True, True, False, True]).reset_index()

top5 = df_weather_mean.join(df_cities, on = 'id', lsuffix = '_weather', rsuffix = '_city').iloc[:5]
display(top5)

Unnamed: 0,id_weather,humidity,rain,pop,temperature,wind_speed,id_city,city,latitude,longitude
0,20,51.804878,0.36,0.027,16.039268,3.055366,20,Aix en Provence,43.5298424,5.4474738
1,21,58.682927,0.183333,0.05875,15.487805,3.510732,21,Avignon,43.9492493,4.8059012
2,23,59.829268,0.26,0.04975,15.295122,3.4,23,Nimes,43.8374249,4.3600687
3,18,61.853659,,0.0115,15.998537,3.968537,18,Cassis,43.2140359,5.5396318
4,19,62.439024,0.19,0.0145,16.118537,4.275366,19,Marseille,43.2961743,5.3699525


### Group cities and median weather informations in one dataframe ###

In [None]:
### JOIN DATA FROM CITIES AND WEATHER IN ONE DATAFRAME ###

df_cities_weather = pd.merge(left = df_cities, right = df_weather_mean, how = 'outer', on = 'id', suffixes = ('_city', '_weather'))
df_cities_weather.head()

Unnamed: 0,id,city,latitude,longitude,humidity,rain,pop,temperature,wind_speed
0,0,Mont Saint Michel,48.6359541,-1.511459954959514,82.195122,0.402857,0.183,9.08878,4.631707
1,1,St Malo,48.649518,-2.0260409,79.878049,0.500833,0.15625,9.375366,5.451463
2,2,Bayeux,49.2764624,-0.7024738,83.04878,0.402308,0.20575,8.376829,5.00561
3,3,Le Havre,49.4938975,0.1079732,78.634146,0.450769,0.19625,8.998537,5.493659
4,4,Rouen,49.4404591,1.0939658,79.439024,0.502,0.222,8.737805,4.774878


### Graphical representation ###

In [None]:
import plotly.express as px

In [None]:
df_cities_weather['latitude'] = df_cities_weather['latitude'].astype(float)
df_cities_weather['longitude'] = df_cities_weather['longitude'].astype(float)

In [None]:
fig = px.scatter_mapbox(df_cities_weather, lat = "latitude", lon = "longitude", color = "temperature", size = "pop", size_max = 25, \
                        zoom = 4, mapbox_style = "carto-positron", color_continuous_scale = "Bluered", width = 700, height = 600, )
fig.update_layout(
    title = {'text': "Top cities in France to visit according to the weather forecast for the next five days<br>- the color indicates the average temperature forecast and the size indicates the average probability of precipitation -",
    'x': 0.5, 'xanchor': 'center'},
    font = {'size':8}
)
fig.show()

## 3. Scrape Booking.com to obtain hotels' informations ##

*   hotel name,
*   Url to its booking.com page,
*   Its coordinates: latitude and longitude
*   Score given by the website users
*   Text description of the hotel

In [3]:
### SCRAPE BOOKING TO OBTAIN HOTEL LIST FOR ONE CITY

!python spider_booking.py

2023-04-23 19:11:16 [scrapy.utils.log] INFO: Scrapy 2.6.2 started (bot: scrapybot)
2023-04-23 19:11:16 [scrapy.utils.log] INFO: Versions: lxml 4.9.1.0, libxml2 2.9.14, cssselect 1.1.0, parsel 1.6.0, w3lib 1.21.0, Twisted 22.2.0, Python 3.9.13 (main, Aug 25 2022, 23:51:50) [MSC v.1916 64 bit (AMD64)], pyOpenSSL 22.0.0 (OpenSSL 1.1.1q  5 Jul 2022), cryptography 37.0.1, Platform Windows-10-10.0.19045-SP0
2023-04-23 19:11:16 [scrapy.crawler] INFO: Overridden settings:
{'LOG_LEVEL': 20, 'USER_AGENT': 'Firefox/111.0.'}
2023-04-23 19:11:16 [scrapy.extensions.telnet] INFO: Telnet Password: 53270dcdd142864e
2023-04-23 19:11:16 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.feedexport.FeedExporter',
 'scrapy.extensions.logstats.LogStats']
2023-04-23 19:11:17 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloade