In [1]:
from bs4 import BeautifulSoup
import requests

import pandas as pd
import json

from time import sleep
from datetime import datetime, timezone

import boto3

import plotly.io as pio
pio.renderers.default = "iframe_connected"
import plotly.express as px

In [2]:
cities_list = ["Mont Saint Michel", "St Malo", "Bayeux", "Le Havre", "Rouen", "Paris", "Amiens", "Lille", "Strasbourg", 
               "Chateau du Haut Koenigsbourg", "Colmar", "Eguisheim", "Besancon", "Dijon", "Annecy", "Grenoble", "Lyon", 
               "Gorges du Verdon", "Bormes les Mimosas", "Cassis", "Marseille", "Aix en Provence", "Avignon", "Uzes", 
               "Nimes", "Aigues Mortes", "Saintes Maries de la mer", "Collioure", "Carcassonne", "Ariege", "Toulouse", 
               "Montauban", "Biarritz", "Bayonne", "La Rochelle"]

path = './cities.csv'

# 2) Get weather data from each destination  

## 2.1) Get gps coordinates from nominatim API 🌍

In [3]:
base_url_geo = "https://nominatim.openstreetmap.org/search?"

def nominatim_geocode(address, format='json', limit=1, **kwargs):
    '''
    This wrapper around nominatim API
    Documentation : https://nominatim.org/release-docs/develop/api/Search/
    '''
    params = {"q":address, "format": format, "limit": limit, **kwargs}
    response = requests.get(base_url_geo, params=params)
    response.raise_for_status() # try except r.raise_for_status() ==  None
    sleep(1)
    return response.json()

🗒 **_raise_for_status_** is used to handle exceptions if the status code is not 200  
🗒 **_time.sleep_** is used to delay code execution for some amount of time. Many requests, fired in rapid succession can, depending on the server in question, quickly take up all of the free connections and effectively become a **DoS Attack**. To allow for breathing space, as well as to make sure we don't negatively impact either the users of the website or the website itself, we'd limit the number of requests sent by delaying each one.

## 2.2) Save and structure retrieved data 📚

In [4]:
columns = ['city', 'latitude', 'longitude']
data = []

for city in cities_list:
    response = nominatim_geocode(address = city, country = 'France')
    row =[city, response[0]['lat'], response[0]['lon']]
    data.append(row)
    
geo_df = pd.DataFrame(data=data, columns=columns)
geo_df

Unnamed: 0,city,latitude,longitude
0,Mont Saint Michel,48.6359541,-1.511459954959514
1,St Malo,48.649518,-2.0260409
2,Bayeux,49.2764624,-0.7024738
3,Le Havre,49.4938975,0.1079732
4,Rouen,49.4404591,1.0939658
5,Paris,48.8588897,2.3200410217200766
6,Amiens,49.8941708,2.2956951
7,Lille,50.6365654,3.0635282
8,Strasbourg,48.584614,7.7507127
9,Chateau du Haut Koenigsbourg,48.249489800000006,7.34429620253195


## 2.3) Get weather data from One Call API ⛅

In [5]:
base_url_weather = 'https://api.openweathermap.org/data/2.5//onecall?'

def OneCall_weather(lat, lon, exclude, API_key = '4553685c373893d94b854a6c35825c33', units ='metric'):
    '''
    API: One Call
    weather data params: (la,  lon, exclude, api key) 
    url : https://api.openweathermap.org/data/2.5/onecall?lat={lat}&lon={lon}&exclude={part}&appid={API key}&units={units}
    format : json (default)
    Documentation : https://openweathermap.org/api/one-call-api
    '''
    params= {'lat': lat, 'lon': lon, 'exclude': exclude, 'APPID':API_key, 'units': units}
    response = requests.get(base_url_weather, params=params)
    response.raise_for_status()
    sleep(1)
    return response.json()

In [6]:
def convertDt(unixDt):
    utc_time = datetime.fromtimestamp(unixDt, timezone.utc)
    local_time = utc_time.astimezone()
    
    return (local_time.strftime("%Y-%m-%d %H:%M:%S (%Z)"))

In [7]:
columns = ['city', 'latitude', 'longitude', 'day_time', 'temperature', 'precipitation_p', 'humidity', 'weather']
weather_desc =[]
for i in range(len(geo_df)):
    
    city = geo_df.loc[i, 'city']
    latitude = geo_df.loc[i, 'latitude']
    longitude = geo_df.loc[i, 'longitude']
    
    response_weather = OneCall_weather(lat = latitude, lon = longitude, exclude = 'current,minutely,hourly,alerts')

    for i in range(1, 8, 1):
        
        latitude = response_weather['lat']
        longitude = response_weather['lon']
        day_time = convertDt(int(response_weather['daily'][i]['dt']))
        
        temperature = response_weather['daily'][i]['temp']['day']
        precipitation_p = response_weather['daily'][i]['pop']
        humidity = response_weather['daily'][i]['humidity']
        weather = response_weather['daily'][i]['weather'][0]['description']
        
        weather_desc.append([city, latitude, longitude, day_time, temperature, precipitation_p, humidity, weather])
    
weather_df = pd.DataFrame(weather_desc, columns =columns)

# create a city unique identifer to be used as primary key later
weather_df['city_id'] = weather_df['latitude'].astype(str) + weather_df['longitude'].astype(str)
weather_df

Unnamed: 0,city,latitude,longitude,day_time,temperature,precipitation_p,humidity,weather,city_id
0,Mont Saint Michel,48.6360,-1.5115,2021-12-05 11:00:00 (UTC),8.35,0.94,71,moderate rain,48.636-1.5115
1,Mont Saint Michel,48.6360,-1.5115,2021-12-06 11:00:00 (UTC),6.63,1.00,87,light rain,48.636-1.5115
2,Mont Saint Michel,48.6360,-1.5115,2021-12-07 11:00:00 (UTC),7.49,1.00,83,moderate rain,48.636-1.5115
3,Mont Saint Michel,48.6360,-1.5115,2021-12-08 11:00:00 (UTC),6.35,0.93,76,moderate rain,48.636-1.5115
4,Mont Saint Michel,48.6360,-1.5115,2021-12-09 11:00:00 (UTC),9.14,0.98,82,light rain,48.636-1.5115
...,...,...,...,...,...,...,...,...,...
240,La Rochelle,46.1591,-1.1520,2021-12-07 11:00:00 (UTC),8.55,1.00,80,light rain,46.1591-1.152
241,La Rochelle,46.1591,-1.1520,2021-12-08 11:00:00 (UTC),8.48,0.99,65,light rain,46.1591-1.152
242,La Rochelle,46.1591,-1.1520,2021-12-09 11:00:00 (UTC),9.78,1.00,71,moderate rain,46.1591-1.152
243,La Rochelle,46.1591,-1.1520,2021-12-10 11:00:00 (UTC),8.40,1.00,86,moderate rain,46.1591-1.152


## 2.4) Cities where the weather will be the nicest ☀️ 😎

In [9]:
# mean and sd
weather_means = weather_df.groupby(['city', 'latitude', 'longitude']).mean()
weather_means = weather_means.sort_values(['temperature','precipitation_p', 'humidity'], ascending = (False, True, True))
weather_means.round(1)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,temperature,precipitation_p,humidity
city,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bormes les Mimosas,43.1572,6.3293,9.4,0.5,51.9
Saintes Maries de la mer,43.4523,4.4287,9.2,0.6,62.6
Collioure,42.5251,3.0832,9.2,0.5,64.0
Aigues Mortes,43.5658,4.1913,9.2,0.6,61.6
Bayonne,43.4933,-1.4751,9.0,1.0,85.3
Biarritz,43.4711,-1.5527,8.9,1.0,84.7
La Rochelle,46.1591,-1.152,8.8,1.0,74.1
Cassis,43.214,5.5396,8.8,0.5,64.7
Marseille,43.2962,5.37,8.7,0.5,66.3
Nimes,43.8374,4.3601,8.2,0.6,60.1


🗒 Comparing two cities with reference to the three criteria (temperature, humidity and percipitation_p) depends on one's perception of what could be a good weather. It depends on one's life style and especially its region climate. 
🗒  We could have used the Universal Thermal Climate Index (UTCI), however, the One Call API doesn't supply such data.
🗒  According to this article (Weather perception and its impact on out-of-home leisure activity participation decisions: https://www.tandfonline.com/doi/full/10.1080/21680566.2020.1733703), temperature, percipitation and UTCI are the most important factors that may influence someone's decision of going out. That's why, we will sort our data giving three available criteria in this order :  temperature, percipitation, humidity 

In [10]:
def select_best(n, list_):
    # select the first n cities 

    for i in range(n):
        list_.append(weather_means.index[i][0:3])
    return list_

n_best =5
cities_best = pd.DataFrame(select_best(n_best,[]), columns=['City', 'lat', 'lon'])

temp_list =[]

for i in cities_best.index:
    (city,lat,lon) = cities_best.loc[i]
    temp_list.append(weather_means.loc[(city,lat,lon),'temperature'])
    
cities_best['temperature'] = temp_list
cities_best
    

Unnamed: 0,City,lat,lon,temperature
0,Bormes les Mimosas,43.1572,6.3293,9.427143
1,Saintes Maries de la mer,43.4523,4.4287,9.24
2,Collioure,42.5251,3.0832,9.234286
3,Aigues Mortes,43.5658,4.1913,9.15
4,Bayonne,43.4933,-1.4751,9.042857


## 2.5) Export to a csv file 📁

In [11]:
def read_csv(path):
    return pd.read_csv(pathindex=False)  

def write_csv(data, path):
    data.to_csv(path)

In [12]:
# boto3
session = boto3.Session(aws_access_key_id="AKIA3EO3T37GTEZLQ4JX", 
                        aws_secret_access_key="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")

s3 = session.resource("s3")

bucket = s3.create_bucket(Bucket="jedha-project-3")

csv = weather_df.to_csv(index=False)

put_object = bucket.put_object(Key="cities_weather.csv", Body=csv)

## 2.6) Visualize results with map chart 📊

In [13]:
# read csv file or use dataframe 
cities_best

Unnamed: 0,City,lat,lon,temperature
0,Bormes les Mimosas,43.1572,6.3293,9.427143
1,Saintes Maries de la mer,43.4523,4.4287,9.24
2,Collioure,42.5251,3.0832,9.234286
3,Aigues Mortes,43.5658,4.1913,9.15
4,Bayonne,43.4933,-1.4751,9.042857


In [14]:
fig = px.scatter_mapbox(cities_best, lat="lat", lon="lon", color="temperature",zoom=10, mapbox_style="carto-positron")
#fig = px.scatter_mapbox(df_car, lat="centroid_lat", lon="centroid_lon", color="peak_hour", size="car_hours", zoom=10, mapbox_style="carto-positron")
fig.show()

In [15]:
df_car = px.data.carshare()
df_car.head()

Unnamed: 0,centroid_lat,centroid_lon,car_hours,peak_hour
0,45.471549,-73.588684,1772.75,2
1,45.543865,-73.562456,986.333333,23
2,45.48764,-73.642767,354.75,20
3,45.52287,-73.595677,560.166667,23
4,45.453971,-73.738946,2836.666667,19
