# Jedha Kayak project #block1
Notebook for development

In [4]:
# import
import os
import uuid
import requests
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dotenv import load_dotenv
from IPython.display import display
from geopy.geocoders import Nominatim

In [5]:
# env variables
load_dotenv()
OPEN_WEATHER_TOKEN = os.getenv('Open_weather_token')

## 1. Get weather data

In [6]:
french_top_35 = ["Mont Saint Michel", "St Malo", "Bayeux", "Le Havre",
                "Rouen", "Paris", "Amiens", "Lille", "Strasbourg",
                "Chateau du Haut Koenigsbourg", "Colmar", "Eguisheim",
                "Besancon", "Dijon", "Annecy", "Grenoble", "Lyon",
                "Gorges du Verdon", "Bormes les Mimosas", "Cassis",
                "Marseille", "Aix en Provence", "Avignon", "Uzes", "Nimes",
                "Aigues Mortes", "Saintes Maries de la mer", "Collioure",
                "Carcassonne", "Ariege", "Toulouse", "Montauban", "Biarritz",
                "Bayonne", "La Rochelle"]

In [7]:
# init dataframe
weather_df = pd.DataFrame(french_top_35, columns=["cities"])
display(weather_df.sample(2))

Unnamed: 0,cities
31,Montauban
0,Mont Saint Michel


In [8]:
# coordinates of cities
def cities_to_coords(city_name) :
    """ get cities coordiniates.
    """

    # geocoder
    geolocator = Nominatim(user_agent="app")
    location = geolocator.geocode(city_name)

    # full_address, latitude, longitude
    full_address = location.address
    latitude = location.latitude
    longitude = location.longitude

    # return 
    return pd.Series([full_address, latitude, longitude])

## RUN
if not "full_address" in weather_df.columns :
    weather_df[["full_address", "latitude", "longitude"]] = weather_df["cities"].apply(cities_to_coords)
    display(weather_df.sample(2))

else : 
    display(weather_df.sample(2))
    print("full_address, latitude & longitude columns exists !")

Unnamed: 0,cities,full_address,latitude,longitude
31,Montauban,"Montauban, Tarn-et-Garonne, Occitanie, France ...",44.017584,1.354999
26,Saintes Maries de la mer,"Saintes-Maries-de-la-Mer, Arles, Bouches-du-Rh...",43.452277,4.428717


In [9]:
def volume_rain(lat, lon, exclude, appid) :
    """ volume of rain in each city over 7 next days.
    """

    # open weather one call api
    params = (
        ('lat', lat),
        ('lon', lon),
        ('exclude', exclude),
        ('appid', appid)
    )

    response = requests.get('https://api.openweathermap.org/data/2.5/onecall', params=params)

    # normalize reponse
    open_weather_7days = pd.json_normalize(response.json()["daily"])

    # clean rain column
    open_weather_7days["rain"] = open_weather_7days["rain"].apply(lambda x : 0 if np.isnan(x) else x) 

    # calculate volumn 
    n_days = open_weather_7days.shape[0]
    volume_rain_7days =  np.sum(open_weather_7days["rain"] * open_weather_7days["pop"]) / n_days

    # return 
    return round(volume_rain_7days,3)

In [10]:
# fill weather_df
exclude = 'current,minutely,hourly,alerts'
appid = OPEN_WEATHER_TOKEN
if not "volume_rain_7days" in weather_df.columns :
    weather_df["volume_rain_7days"] = weather_df[["latitude", "longitude"]].transpose().apply(lambda x : volume_rain(x[0], x[1], exclude, appid))

display(weather_df.sample(2))

Unnamed: 0,cities,full_address,latitude,longitude,volume_rain_7days
25,Aigues Mortes,"Aigues-Mortes, Nîmes, Gard, Occitanie, France ...",43.565823,4.191284,2.645
23,Uzes,"Uzès, Nîmes, Gard, Occitanie, France métropoli...",44.012128,4.419672,2.102


In [11]:
# unique identifier (uuid)
if not "uuid" in weather_df.columns :
    weather_df['uuid'] = weather_df.index.to_series().map(lambda x: uuid.uuid4())

display(weather_df.sample(2))

Unnamed: 0,cities,full_address,latitude,longitude,volume_rain_7days,uuid
21,Aix en Provence,"Aix-en-Provence, Bouches-du-Rhône, Provence-Al...",43.529842,5.447474,2.566,fc388387-8378-47b6-9c60-2834649475ee
15,Grenoble,"Grenoble, Isère, Auvergne-Rhône-Alpes, France ...",45.18756,5.735782,7.462,590c8222-37f8-443a-b577-38826ab507e7


In [12]:
# reorder columns
keep_col = ['uuid', 'cities', 'full_address', 'latitude', 'longitude', 'volume_rain_7days']
weather_df = weather_df[keep_col]

display(weather_df.sample(2))

Unnamed: 0,uuid,cities,full_address,latitude,longitude,volume_rain_7days
25,30fa5619-03b3-45aa-845e-c46de7589a71,Aigues Mortes,"Aigues-Mortes, Nîmes, Gard, Occitanie, France ...",43.565823,4.191284,2.645
27,db214571-646b-4e91-bab1-8e7475e57327,Collioure,"Collioure, Céret, Pyrénées-Orientales, Occitan...",42.52505,3.083155,1.168


In [13]:
# save dataframe
weather_data = "../data/temp/weather_data.csv"
overwrite = False 

if not os.path.exists(weather_data) or overwrite==True:
    weather_df.to_csv(weather_data)

else :
    print(f"{weather_data} exists !")

../data/temp/weather_data.csv exists !


In [18]:
# fetch best destinations
top_5_destinations = weather_df.sort_values(by=['volume_rain_7days'])[:5]
top_20_destinations = weather_df.sort_values(by=['volume_rain_7days'])[:20]
display(top_5_destinations.sample(2))
display(top_20_destinations.sample(2))


Unnamed: 0,uuid,cities,full_address,latitude,longitude,volume_rain_7days
9,bb9ce686-e536-4202-ada1-9ee4db6d47d8,Chateau du Haut Koenigsbourg,"Château du Haut-Kœnigsbourg, Chemin fermé suit...",48.24949,7.344296,2.275
17,a7ad6334-307c-451f-9a55-449e22544a73,Gorges du Verdon,"Gorges du Verdon, Route des Crêtes, Les Ferrai...",43.749656,6.328562,1.45


Unnamed: 0,uuid,cities,full_address,latitude,longitude,volume_rain_7days
20,03752e13-f4e7-4ae6-ada0-9c63311a42b7,Marseille,"Marseille, Bouches-du-Rhône, Provence-Alpes-Cô...",43.296174,5.369953,2.306
23,7c8e4332-0cbf-41af-aa1b-1bec0dadbd8b,Uzes,"Uzès, Nîmes, Gard, Occitanie, France métropoli...",44.012128,4.419672,2.102


In [21]:
# plot function
def plot(df, lat_col, lon_col, color_col, mapbox_token_file, zoom=5) :
    # read token file
    px.set_mapbox_access_token(open(mapbox_token_file).read())

    # fig
    fig = px.scatter_mapbox(df, lat=lat_col, lon=lon_col,     
                        color=color_col, size=color_col,
                        color_continuous_scale=px.colors.cyclical.IceFire, 
                        size_max=15, zoom=zoom)

    fig.show()

In [22]:
# top 5 destinations plot
# df
df = top_5_destinations.copy()
df = df.rename(columns={"volume_rain_7days": "Volume of rain"})

# cols
lat_col = "latitude"
lon_col = "longitude"
color_col = "Volume of rain"
# token
mapbox_token_file = "../.mapbox_token"

plot(df, lat_col, lon_col, color_col, mapbox_token_file)

In [26]:
# top 20 destinations plot
# df
df = top_20_destinations.copy()
df = df.rename(columns={"volume_rain_7days": "Volume of rain"})

# cols
lat_col = "latitude"
lon_col = "longitude"
color_col = "Volume of rain"
# token
mapbox_token_file = "../.mapbox_token"

plot(df, lat_col, lon_col, color_col, mapbox_token_file, zoom=3.5)