In [1]:
from bs4 import BeautifulSoup
import requests

import pandas as pd
import json

from time import sleep
from datetime import datetime, timezone

import boto3

#import plotly.io as pio
#pio.renderers.default = "svg"
#import plotly.express as px



In [2]:
'''
cities_list = ["Mont Saint Michel", "St Malo", "Bayeux", "Le Havre", "Rouen", "Paris", "Amiens", "Lille", "Strasbourg", 
               "Chateau du Haut Koenigsbourg", "Colmar", "Eguisheim", "Besancon", "Dijon", "Annecy", "Grenoble", "Lyon", 
               "Gorges du Verdon", "Bormes les Mimosas", "Cassis", "Marseille", "Aix en Provence", "Avignon", "Uzes", 
               "Nimes", "Aigues Mortes", "Saintes Maries de la mer", "Collioure", "Carcassonne", "Ariege", "Toulouse", 
               "Montauban", "Biarritz", "Bayonne", "La Rochelle"]
'''
cities_list = ["Mont Saint Michel", "St Malo", "Bayeux", "Le Havre", "Rouen"]
path = './cities.csv'

# 2) Get weather data from each destination  

## 2.1) Get gps coordinates from nominatim API 🌍

In [3]:
base_url_geo = "https://nominatim.openstreetmap.org/search?"

def nominatim_geocode(address, format='json', limit=1, **kwargs):
    '''
    This wrapper around nominatim API
    Documentation : https://nominatim.org/release-docs/develop/api/Search/
    '''
    params = {"q":address, "format": format, "limit": limit, **kwargs}
    response = requests.get(base_url_geo, params=params)
    response.raise_for_status() # try except r.raise_for_status() ==  None
    sleep(1)
    return response.json()

🗒 **_raise_for_status_** is used to handle exceptions if the status code is not 200  
🗒 **_time.sleep_** is used to delay code execution for some amount of time. Many requests, fired in rapid succession can, depending on the server in question, quickly take up all of the free connections and effectively become a **DoS Attack**. To allow for breathing space, as well as to make sure we don't negatively impact either the users of the website or the website itself, we'd limit the number of requests sent by delaying each one.

## 2.2) Save and structure retrieved data 📚

In [4]:
columns = ['city', 'latitude', 'longitude']
data = []

for city in cities_list:
    response = nominatim_geocode(address= None, city=city, country= 'France')
    row =[city, response[0]['lat'], response[0]['lon']]
    data.append(row)
    
geo_df = pd.DataFrame(data=data, columns=columns)
geo_df

Unnamed: 0,city,latitude,longitude
0,Mont Saint Michel,48.6359541,-1.511459954959514
1,St Malo,48.649518,-2.0260409
2,Bayeux,49.2764624,-0.7024738
3,Le Havre,49.4938975,0.1079732
4,Rouen,49.4411823,1.085229501995006


## 2.3) Get weather data from One Call API ⛅

In [5]:
base_url_weather = 'https://api.openweathermap.org/data/2.5//onecall?'

def OneCall_weather(lat, lon, exclude, API_key = '4553685c373893d94b854a6c35825c33', units ='metric'):
    '''
    API: One Call
    weather data params: (la,  lon, exclude, api key) 
    url : https://api.openweathermap.org/data/2.5/onecall?lat={lat}&lon={lon}&exclude={part}&appid={API key}&units={units}
    format : json (default)
    Documentation : https://openweathermap.org/api/one-call-api
    '''
    params= {'lat': lat, 'lon': lon, 'exclude': exclude, 'APPID':API_key, 'units': units}
    response = requests.get(base_url_weather, params=params)
    response.raise_for_status()
    sleep(1)
    return response.json()

In [6]:
def convertDt(unixDt):
    utc_time = datetime.fromtimestamp(unixDt, timezone.utc)
    local_time = utc_time.astimezone()
    
    return (local_time.strftime("%Y-%m-%d %H:%M:%S (%Z)"))

In [7]:
columns = ['city', 'latitude', 'longitude', 'day_time', 'temperature', 'precipitation_p', 'humidity', 'weather']
weather_desc =[]
for i in range(len(geo_df)):
    
    city = geo_df.loc[i, 'city']
    latitude = geo_df.loc[i, 'latitude']
    longitude = geo_df.loc[i, 'longitude']
    
    response_weather = OneCall_weather(lat = latitude, lon = longitude, exclude = 'current,minutely,hourly,alerts')

    for i in range(1, 8, 1):
        
        latitude = response_weather['lat']
        longitude = response_weather['lon']
        day_time = convertDt(int(response_weather['daily'][i]['dt']))
        
        temperature = response_weather['daily'][i]['temp']['day']
        precipitation_p = response_weather['daily'][i]['pop']
        humidity = response_weather['daily'][i]['humidity']
        weather = response_weather['daily'][i]['weather'][0]['description']
        
        weather_desc.append([city, latitude, longitude, day_time, temperature, precipitation_p, humidity, weather])
    
weather_df = pd.DataFrame(weather_desc, columns =columns)

# create a city unique identifer to be used as primary key later
weather_df['city_id'] = weather_df['latitude'].astype(str) + weather_df['longitude'].astype(str)
weather_df

Unnamed: 0,city,latitude,longitude,day_time,temperature,precipitation_p,humidity,weather,city_id
0,Mont Saint Michel,48.636,-1.5115,2021-12-05 11:00:00 (UTC),8.78,0.98,68,moderate rain,48.636-1.5115
1,Mont Saint Michel,48.636,-1.5115,2021-12-06 11:00:00 (UTC),6.82,1.0,90,light rain,48.636-1.5115
2,Mont Saint Michel,48.636,-1.5115,2021-12-07 11:00:00 (UTC),7.84,1.0,80,moderate rain,48.636-1.5115
3,Mont Saint Michel,48.636,-1.5115,2021-12-08 11:00:00 (UTC),6.73,1.0,75,moderate rain,48.636-1.5115
4,Mont Saint Michel,48.636,-1.5115,2021-12-09 11:00:00 (UTC),6.59,1.0,83,light rain,48.636-1.5115
5,Mont Saint Michel,48.636,-1.5115,2021-12-10 11:00:00 (UTC),8.4,1.0,85,moderate rain,48.636-1.5115
6,Mont Saint Michel,48.636,-1.5115,2021-12-11 11:00:00 (UTC),7.55,0.96,76,light rain,48.636-1.5115
7,St Malo,48.6495,-2.026,2021-12-05 11:00:00 (UTC),9.54,1.0,63,light rain,48.6495-2.026
8,St Malo,48.6495,-2.026,2021-12-06 11:00:00 (UTC),8.67,1.0,90,moderate rain,48.6495-2.026
9,St Malo,48.6495,-2.026,2021-12-07 11:00:00 (UTC),8.15,1.0,81,moderate rain,48.6495-2.026


## 2.4) Cities where the weather will be the nicest ☀️ 😎

In [8]:
# mean and sd
weather_means = weather_df.groupby(['city', 'latitude', 'longitude']).mean()
weather_means = weather_means.sort_values(['temperature','precipitation_p', 'humidity'], ascending = (False, True, True))
weather_means.round(1)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,temperature,precipitation_p,humidity
city,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
St Malo,48.6495,-2.026,8.7,1.0,75.0
Mont Saint Michel,48.636,-1.5115,7.5,1.0,79.6
Le Havre,49.4939,0.108,6.3,0.9,78.6
Bayeux,49.2765,-0.7025,5.8,0.9,82.6
Rouen,49.4412,1.0852,5.1,0.9,80.6


🗒 Comparing two cities with reference to the three criteria (temperature, humidity and percipitation_p) depends on one's perception of what could be a good weather. It depends on one's life style and especially its region climate. 
🗒  We could have used the Universal Thermal Climate Index (UTCI), however, the One Call API doesn't supply such data.
🗒  According to this article (Weather perception and its impact on out-of-home leisure activity participation decisions: https://www.tandfonline.com/doi/full/10.1080/21680566.2020.1733703), temperature, percipitation and UTCI are the most important factors that may influence someone's decision of going out. That's why, we will sort our data giving three available criteria in this order :  temperature, percipitation, humidity 

In [9]:
# mean and sd
weather_means = weather_df.groupby(['city', 'latitude', 'longitude']).mean()
weather_means = weather_means.sort_values(['temperature', 'precipitation_p', 'humidity'], ascending = (False, True, True))
weather_means

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,temperature,precipitation_p,humidity
city,latitude,longitude,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
St Malo,48.6495,-2.026,8.658571,1.0,75.0
Mont Saint Michel,48.636,-1.5115,7.53,0.991429,79.571429
Le Havre,49.4939,0.108,6.295714,0.922857,78.571429
Bayeux,49.2765,-0.7025,5.774286,0.938571,82.571429
Rouen,49.4412,1.0852,5.14,0.861429,80.571429


In [10]:
def select_best(n, list_):
    # select the first n cities 
    n_best = 3
    
    for i in range(n_best):
        list_.append(weather_means.index[i][0])
    return list_

cities_best = select_best(3,[])
cities_best

['St Malo', 'Mont Saint Michel', 'Le Havre']

## 2.5) Export to a csv file 📁

In [11]:
def read_csv(path):
    return pd.read_csv(pathindex=False)  

def write_csv(data, path):
    data.to_csv(path)

In [12]:
# boto3
session = boto3.Session(aws_access_key_id="AKIA3EO3T37GTEZLQ4JX", 
                        aws_secret_access_key="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")

s3 = session.resource("s3")

bucket = s3.create_bucket(Bucket="jedha-project-3")

csv = weather_df.to_csv(index=False)

put_object = bucket.put_object(Key="cities_weather.csv", Body=csv)

## 2.6) Visualize results with map chart 📊

In [13]:
# read csv file or use dataframe 