# Project : Plan your trip with Kayak

In [1]:
import pandas as pd
pd.options.display.max_columns = 100

## 'Extract' : Collect the data

In [2]:
cities = ["Mont Saint Michel",
"St Malo",
"Bayeux",
"Le Havre",
"Rouen",
"Paris",
"Amiens",
"Lille",
"Strasbourg",
"Chateau du Haut Koenigsbourg",
"Colmar",
"Eguisheim",
"Besancon",
"Dijon",
"Annecy",
"Grenoble",
"Lyon",
"Gorges du Verdon",
"Bormes les Mimosas",
"Cassis",
"Marseille",
"Aix en Provence",
"Avignon",
"Uzes",
"Nimes",
"Aigues Mortes",
"Saintes Maries de la mer",
"Collioure",
"Carcassonne",
"Ariege",
"Toulouse",
"Montauban",
"Biarritz",
"Bayonne",
"La Rochelle"]

### Get weather data via an API :

In [3]:
import requests
import time

city = cities[0]
r = requests.get('https://nominatim.openstreetmap.org/search?format=json&city='+city)
city_json = r.json()[0] #We consider the first search result as the most relevant one

city_json

{'place_id': 156094736,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'way',
 'osm_id': 211285890,
 'boundingbox': ['48.6349172', '48.637031', '-1.5133292', '-1.5094796'],
 'lat': '48.6359541',
 'lon': '-1.511459954959514',
 'display_name': 'Mont Saint-Michel, Le Mont-Saint-Michel, Avranches, Manche, Normandie, France métropolitaine, 50170, France',
 'class': 'place',
 'type': 'islet',
 'importance': 0.755436556781574}

In [4]:
name = city_json['display_name']
lat = city_json['lat']
lon = city_json['lon']
print(f"{name} : \n -- latitude : {lat} \n -- longitude : {lon}")

Mont Saint-Michel, Le Mont-Saint-Michel, Avranches, Manche, Normandie, France métropolitaine, 50170, France : 
 -- latitude : 48.6359541 
 -- longitude : -1.511459954959514


#### Generalization to the 35 cities list - Collecting the geoloc data and storing it into a list of json files

In [5]:
import os
import logging

geolocs_info = []

for city in cities:
    r = requests.get('https://nominatim.openstreetmap.org/search?format=json&city='+city)
    city_json = r.json()[0]
    print(city)
    #logging.info(f"Storing data for {city_json['display_name']}...", end ='')
    geolocs_info.append(city_json)
    #logging.info("done.")
    time.sleep(1) #wait for 1sec between requests to respect Nominatim's Usage Policy
    

Mont Saint Michel
St Malo
Bayeux
Le Havre
Rouen
Paris
Amiens
Lille
Strasbourg
Chateau du Haut Koenigsbourg
Colmar
Eguisheim
Besancon
Dijon
Annecy
Grenoble
Lyon


IndexError: list index out of range

We have a problem with "Gorges du Verdon", probably because it's not litterally a city and therefore the API does not find anything in response to the request type 'city='. 
We will hence do a type 'q=' request which perform a more generalist search query: 

In [6]:
import os
import logging

geolocs_info = []

for city in cities:
    r = requests.get('https://nominatim.openstreetmap.org/search?format=json&q=France,'+city) #We add 'France' in the query to be more specific
    city_json = r.json()[0]
    city_json['city'] = city
    print(city, "...done")
    #logging.info(f"Storing data for {city_json['display_name']}...", end ='')
    geolocs_info.append(city_json)
    #logging.info("done.")
    time.sleep(1) #wait for 1sec between requests to respect Nominatim's Usage Policy
    

Mont Saint Michel ...done
St Malo ...done
Bayeux ...done
Le Havre ...done
Rouen ...done
Paris ...done
Amiens ...done
Lille ...done
Strasbourg ...done
Chateau du Haut Koenigsbourg ...done
Colmar ...done
Eguisheim ...done
Besancon ...done
Dijon ...done
Annecy ...done
Grenoble ...done
Lyon ...done
Gorges du Verdon ...done
Bormes les Mimosas ...done
Cassis ...done
Marseille ...done
Aix en Provence ...done
Avignon ...done
Uzes ...done
Nimes ...done
Aigues Mortes ...done
Saintes Maries de la mer ...done
Collioure ...done
Carcassonne ...done
Ariege ...done
Toulouse ...done
Montauban ...done
Biarritz ...done
Bayonne ...done
La Rochelle ...done


In [7]:
geolocs_info

[{'place_id': 156094680,
  'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
  'osm_type': 'way',
  'osm_id': 211285890,
  'boundingbox': ['48.6349172', '48.637031', '-1.5133292', '-1.5094796'],
  'lat': '48.6359541',
  'lon': '-1.511459954959514',
  'display_name': 'Mont Saint-Michel, Plateforme du Saut-\xadGaultier, Le Mont-Saint-Michel, Avranches, Manche, Normandie, France métropolitaine, 50170, France',
  'class': 'tourism',
  'type': 'attraction',
  'importance': 0.8654365567815739,
  'icon': 'https://nominatim.openstreetmap.org/ui/mapicons/poi_point_of_interest.p.20.png',
  'city': 'Mont Saint Michel'},
 {'place_id': 297756747,
  'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
  'osm_type': 'relation',
  'osm_id': 905534,
  'boundingbox': ['48.5979853', '48.6949736', '-2.0765246', '-1.9367259'],
  'lat': '48.649518',
  'lon': '-2.0260409',
  'display_name': 'Saint-Malo, Ille-et-Vilaine, Bretagne, France métro

Formatting the data into a Pandas dataframe : 

In [8]:
geolocs_df = pd.DataFrame(geolocs_info)
display(geolocs_df)

Unnamed: 0,place_id,licence,osm_type,osm_id,boundingbox,lat,lon,display_name,class,type,importance,icon,city
0,156094680,"Data © OpenStreetMap contributors, ODbL 1.0. h...",way,211285890,"[48.6349172, 48.637031, -1.5133292, -1.5094796]",48.6359541,-1.511459954959514,"Mont Saint-Michel, Plateforme du Saut-­Gaultie...",tourism,attraction,0.865437,https://nominatim.openstreetmap.org/ui/mapicon...,Mont Saint Michel
1,297756747,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,905534,"[48.5979853, 48.6949736, -2.0765246, -1.9367259]",48.649518,-2.0260409,"Saint-Malo, Ille-et-Vilaine, Bretagne, France ...",boundary,administrative,0.786467,https://nominatim.openstreetmap.org/ui/mapicon...,St Malo
2,297981358,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,145776,"[49.2608124, 49.2934736, -0.7275671, -0.6757378]",49.2764624,-0.7024738,"Bayeux, Calvados, Normandie, France métropolit...",boundary,administrative,0.7927,https://nominatim.openstreetmap.org/ui/mapicon...,Bayeux
3,298137491,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,104492,"[49.4516697, 49.5401463, 0.0667992, 0.1955556]",49.4938975,0.1079732,"Le Havre, Seine-Maritime, Normandie, France mé...",boundary,administrative,0.932333,https://nominatim.openstreetmap.org/ui/mapicon...,Le Havre
4,297518815,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,75628,"[49.4172001, 49.4652601, 1.0300648, 1.1521157]",49.4404591,1.0939658,"Rouen, Seine-Maritime, Normandie, France métro...",boundary,administrative,0.860073,https://nominatim.openstreetmap.org/ui/mapicon...,Rouen
5,297417241,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,7444,"[48.8155755, 48.902156, 2.224122, 2.4697602]",48.8588897,2.3200410217200766,"Paris, Île-de-France, France métropolitaine, F...",boundary,administrative,1.05171,https://nominatim.openstreetmap.org/ui/mapicon...,Paris
6,297534793,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,114347,"[49.846837, 49.9505487, 2.2235574, 2.3457767]",49.8941708,2.2956951,"Amiens, Somme, Hauts-de-France, France métropo...",boundary,administrative,0.844949,https://nominatim.openstreetmap.org/ui/mapicon...,Amiens
7,297472400,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,58404,"[50.6008264, 50.6612596, 2.9679677, 3.125725]",50.6365654,3.0635282,"Lille, Nord, Hauts-de-France, France métropoli...",boundary,administrative,0.873204,https://nominatim.openstreetmap.org/ui/mapicon...,Lille
8,297508568,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,71033,"[48.491861, 48.6461896, 7.6881371, 7.8360646]",48.584614,7.7507127,"Strasbourg, Bas-Rhin, Grand Est, France métrop...",boundary,administrative,0.894805,https://nominatim.openstreetmap.org/ui/mapicon...,Strasbourg
9,120791766,"Data © OpenStreetMap contributors, ODbL 1.0. h...",way,61044809,"[48.249302, 48.2496794, 7.3436964, 7.3449443]",48.249489800000006,7.34429620253195,"Château du Haut-Kœnigsbourg, Chemin fermé suit...",historic,castle,0.663955,https://nominatim.openstreetmap.org/ui/mapicon...,Chateau du Haut Koenigsbourg


In [9]:
geolocs_df = geolocs_df.drop(columns = ['licence','osm_type','osm_id'])

In [10]:
!pip install python-dotenv



In [11]:
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.getenv('api_key') #API_key obtained after free subscription
units = 'metric'
lat, lon = (geolocs_df[_][0] for _ in ['lat','lon'])
r = requests.get(f'https://api.openweathermap.org/data/2.5/onecall?lat={lat}&lon={lon}&appid={api_key}&units={units}')
weather_json = r.json() #We consider the first search result as the most relevant one
weather_json['city']=cities[0]
weather_json

{'lat': 48.636,
 'lon': -1.5115,
 'timezone': 'Europe/Paris',
 'timezone_offset': 7200,
 'current': {'dt': 1659476127,
  'sunrise': 1659415270,
  'sunset': 1659469402,
  'temp': 18.39,
  'feels_like': 18.77,
  'pressure': 1017,
  'humidity': 95,
  'dew_point': 17.58,
  'uvi': 0,
  'clouds': 48,
  'visibility': 10000,
  'wind_speed': 3.23,
  'wind_deg': 349,
  'wind_gust': 6.05,
  'weather': [{'id': 802,
    'main': 'Clouds',
    'description': 'scattered clouds',
    'icon': '03n'}]},
 'minutely': [{'dt': 1659476160, 'precipitation': 0},
  {'dt': 1659476220, 'precipitation': 0},
  {'dt': 1659476280, 'precipitation': 0},
  {'dt': 1659476340, 'precipitation': 0},
  {'dt': 1659476400, 'precipitation': 0},
  {'dt': 1659476460, 'precipitation': 0},
  {'dt': 1659476520, 'precipitation': 0},
  {'dt': 1659476580, 'precipitation': 0},
  {'dt': 1659476640, 'precipitation': 0},
  {'dt': 1659476700, 'precipitation': 0},
  {'dt': 1659476760, 'precipitation': 0},
  {'dt': 1659476820, 'precipitation'

In [12]:
weather_json.keys()

dict_keys(['lat', 'lon', 'timezone', 'timezone_offset', 'current', 'minutely', 'hourly', 'daily', 'city'])

In [13]:
len(weather_json['daily'])

8

In [14]:
weather_json['daily'][0].keys()

dict_keys(['dt', 'sunrise', 'sunset', 'moonrise', 'moonset', 'moon_phase', 'temp', 'feels_like', 'pressure', 'humidity', 'dew_point', 'wind_speed', 'wind_deg', 'wind_gust', 'weather', 'clouds', 'pop', 'uvi'])

The dates are in UNIX (=POSIX) format (number of seconds since 1st January 1970).
To convert it to datetime format we can use datetime.fromtimestamp method :

In [15]:
from datetime import datetime
date_example = datetime.fromtimestamp(weather_json['current']['dt'])
print(date_example)

2022-08-02 21:35:27


In [16]:
first_day_example = datetime.fromtimestamp(weather_json['daily'][0]['dt'])
last_day_example = datetime.fromtimestamp(weather_json['daily'][-1]['dt'])
print(first_day_example)
print(last_day_example)

2022-08-02 12:00:00
2022-08-09 12:00:00


We see that the 'daily' information is available from current day to the day D+7 included.  
Reminder : the time is set in UTC (Paris is UTC+1)

In [17]:
weather_0 = {key:weather_json[key] for key in ['city','daily']}
weather_0

{'city': 'Mont Saint Michel',
 'daily': [{'dt': 1659441600,
   'sunrise': 1659415270,
   'sunset': 1659469402,
   'moonrise': 1659432600,
   'moonset': 1659477300,
   'moon_phase': 0.15,
   'temp': {'day': 26.03,
    'min': 15.66,
    'max': 27.71,
    'night': 18.35,
    'eve': 23.36,
    'morn': 17.17},
   'feels_like': {'day': 26.03, 'night': 18.72, 'eve': 23.5, 'morn': 17.24},
   'pressure': 1018,
   'humidity': 53,
   'dew_point': 15.2,
   'wind_speed': 5.74,
   'wind_deg': 334,
   'wind_gust': 8.24,
   'weather': [{'id': 804,
     'main': 'Clouds',
     'description': 'overcast clouds',
     'icon': '04d'}],
   'clouds': 100,
   'pop': 0,
   'uvi': 7.44},
  {'dt': 1659528000,
   'sunrise': 1659501751,
   'sunset': 1659555713,
   'moonrise': 1659523260,
   'moonset': 0,
   'moon_phase': 0.18,
   'temp': {'day': 25.43,
    'min': 16.66,
    'max': 26.5,
    'night': 18.21,
    'eve': 22.5,
    'morn': 18.09},
   'feels_like': {'day': 25.39, 'night': 18.57, 'eve': 22.61, 'morn': 18.

In [18]:
weather_0['daily'][0]['feels_like']

{'day': 26.03, 'night': 18.72, 'eve': 23.5, 'morn': 17.24}

In [19]:
weather_0_df = pd.DataFrame(weather_0)
weather_0_df

Unnamed: 0,city,daily
0,Mont Saint Michel,"{'dt': 1659441600, 'sunrise': 1659415270, 'sun..."
1,Mont Saint Michel,"{'dt': 1659528000, 'sunrise': 1659501751, 'sun..."
2,Mont Saint Michel,"{'dt': 1659614400, 'sunrise': 1659588232, 'sun..."
3,Mont Saint Michel,"{'dt': 1659700800, 'sunrise': 1659674714, 'sun..."
4,Mont Saint Michel,"{'dt': 1659787200, 'sunrise': 1659761196, 'sun..."
5,Mont Saint Michel,"{'dt': 1659873600, 'sunrise': 1659847678, 'sun..."
6,Mont Saint Michel,"{'dt': 1659960000, 'sunrise': 1659934161, 'sun..."
7,Mont Saint Michel,"{'dt': 1660046400, 'sunrise': 1660020645, 'sun..."


In [20]:
list(weather_0.keys())

['city', 'daily']

In [None]:
weather_0_df = pd.json_normalize(weather_0, record_path = ['daily'], meta = [key for key in weather_0.keys() if key != 'daily'])
weather_0_df

In [None]:
len(pd.json_normalize(weather_0, record_path = ['daily'])['weather'][0])

In [None]:
weather_0_df['weather'].explode()

In [None]:
pd.json_normalize(weather_0_df['weather'].explode())

In [None]:
weather_0_df = pd.concat(
    [
        weather_0_df.drop(columns = ['weather']), 
        pd.json_normalize(weather_0_df['weather'].explode()).add_prefix('weather.')
    ], 
    axis = 1)

In [None]:
weather_0_df

In [None]:
city_df_0 = geolocs_df.merge(weather_0_df, on = ['city'])
city_df_0

### Generalization to the 35 cities - storing all localization and weather data in a single dataframe

In [None]:
city_dataframes = []
for i, city  in enumerate(cities):    
    lat, lon = (geolocs_df[_][i] for _ in ['lat','lon'])
    r = requests.get(f'https://api.openweathermap.org/data/2.5/onecall?lat={lat}&lon={lon}&appid={api_key}&units={units}')
    weather_json = r.json() #We consider the first search result as the most relevant one
    weather_json['city']= city
    weather_json = {key:weather_json[key] for key in ['city','daily']}
    
    weather_df = pd.json_normalize(weather_json, record_path = ['daily'], meta = [key for key in weather_0.keys() if key != 'daily'])
    weather_df = pd.concat(
    [
        weather_df.drop(columns = ['weather']), 
        pd.json_normalize(weather_df['weather'].explode()).add_prefix('weather.')
    ], 
    axis = 1)
    
    city_df = geolocs_df.merge(weather_df, on = ['city'])
    
    city_dataframes.append(city_df)
    
city_dataframes[1]

In [None]:
cities_df = pd.concat(city_dataframes, axis = 0)
cities_df

### Selecting criterias and saving csv file

In [None]:
import numpy as np
criterias = {
    'feels_like.day': np.mean,
    'pop': np.mean
}

In [None]:
cities_df_grouped = cities_df.groupby('city').agg(criterias)
ideal_temperature = 30
cities_df_grouped['delta_with_ideal_temp'] = cities_df_grouped['feels_like.day'].apply(lambda temp : round(abs(temp - ideal_temperature)))
cities_df_grouped['pop_%'] = cities_df_grouped['pop']*100 

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
scaled_criterias = pd.DataFrame(sc.fit_transform(cities_df_grouped.iloc[:,2:]), index = cities_df_grouped.index, columns = ['delta_temp_scaled', 'pop_scaled']) 
scaled_criterias['score_to_minimize'] = scaled_criterias['delta_temp_scaled'] + scaled_criterias['pop_scaled']

cities_df_grouped = pd.concat([cities_df_grouped, scaled_criterias], axis = 1)
cities_df_grouped = cities_df_grouped.sort_values(['score_to_minimize'], ascending = True).reset_index(drop = False)
cities_df_grouped.insert(0, 'id', cities_df_grouped.index)
cities_df_grouped['rank'] = cities_df_grouped.index + 1
cities_df_grouped

In [None]:
cities_df_grouped.to_csv('weather_data.csv', index = False)