# Challenge
**Author:** Wenyue Zhu

Gathering the Weather Data for 5 Western European countries and comparing the precipitation levels of each

In [453]:
import json
import pandas as pd
import requests 
import csv
import numpy as np
from lets_plot import *
from lets_plot.mapping import *
LetsPlot.setup_html()
import numpy as np

# Data Collection

**1. Finding and extracting the latitude and longitude values for any number of country specified under the selected_countries and selected_cities variables**

In [454]:
selected_countries = ['FR', 'GB', 'DE', 'ES', 'CH', 'IT']
selected_cities    = ['Paris', 'London', 'Berlin', 'Madrid', 'Bern', 'Rome']

In [455]:
def get_lat_lon_multiple(country_codes, cities):
    filepath = '../data/world_cities.csv'
    world_cities = pd.read_csv(filepath)
    locations = {}

    for country_code, city in zip(country_codes, cities):

        city_data = world_cities[(world_cities['country'] == country_code) & 
                                 (world_cities['name'] == city)]

        latitude = city_data.iloc[0]['lat']
        longitude = city_data.iloc[0]['lng']
        
        locations[f"{city}"] = (latitude, longitude)
    
    return locations

locations = get_lat_lon_multiple(selected_countries, selected_cities)
print(locations)

{'Paris': (np.float64(48.85341), np.float64(2.3488)), 'London': (np.float64(51.50853), np.float64(-0.12574)), 'Berlin': (np.float64(52.52437), np.float64(13.41053)), 'Madrid': (np.float64(40.4165), np.float64(-3.70256)), 'Bern': (np.float64(46.94809), np.float64(7.44744)), 'Rome': (np.float64(41.89193), np.float64(12.51133))}


In [456]:
with open('../data/lat_lng.json', 'w') as file:
    json.dump(locations, file)

**2. Using the extracted lat, lng values to extract the rain data from the countries previously specified**

In [457]:
def get_forecast_data(latitude, longitude, start_date, end_date):

    base_forecast_url = "https://archive-api.open-meteo.com/v1/archive?"
    params_lat_long = "latitude=" + str(latitude) + "&longitude="  + str(longitude)
    params_others = f"&start_date={start_date}&end_date={end_date}&daily=rain_sum&timezone=auto"

    final_url = base_forecast_url + params_lat_long + params_others
        
    response = requests.get(final_url)

    forecast_data = response.json()
    forecast_temperatures = forecast_data['daily']
        
    return forecast_temperatures

In [458]:
total_rain = {}

for city, (lat, lon) in locations.items():
    year_rain = get_forecast_data(lat, lon, start_date='2023-01-01', end_date='2023-12-31')
    total_rain[city] = year_rain

print(total_rain)

{'Paris': {'time': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05', '2023-01-06', '2023-01-07', '2023-01-08', '2023-01-09', '2023-01-10', '2023-01-11', '2023-01-12', '2023-01-13', '2023-01-14', '2023-01-15', '2023-01-16', '2023-01-17', '2023-01-18', '2023-01-19', '2023-01-20', '2023-01-21', '2023-01-22', '2023-01-23', '2023-01-24', '2023-01-25', '2023-01-26', '2023-01-27', '2023-01-28', '2023-01-29', '2023-01-30', '2023-01-31', '2023-02-01', '2023-02-02', '2023-02-03', '2023-02-04', '2023-02-05', '2023-02-06', '2023-02-07', '2023-02-08', '2023-02-09', '2023-02-10', '2023-02-11', '2023-02-12', '2023-02-13', '2023-02-14', '2023-02-15', '2023-02-16', '2023-02-17', '2023-02-18', '2023-02-19', '2023-02-20', '2023-02-21', '2023-02-22', '2023-02-23', '2023-02-24', '2023-02-25', '2023-02-26', '2023-02-27', '2023-02-28', '2023-03-01', '2023-03-02', '2023-03-03', '2023-03-04', '2023-03-05', '2023-03-06', '2023-03-07', '2023-03-08', '2023-03-09', '2023-03-10', '2023-03-11', 

In [459]:
with open('../data/historical_rain.json', 'w') as file:
    json.dump(total_rain,file)

# 2. Simple Data Analysis across five countries

In [460]:
with open('../data/historical_rain.json', 'r') as file:
    rain_data = json.load(file)

type(rain_data)

dict

In [461]:
df = pd.DataFrame()
for city in rain_data:
    df_city = pd.DataFrame(rain_data[city])
    df_city['city'] = city
    df = pd.concat([df, df_city])

print(df)

           time  rain_sum   city
0    2023-01-01       2.0  Paris
1    2023-01-02       7.4  Paris
2    2023-01-03       1.7  Paris
3    2023-01-04       2.7  Paris
4    2023-01-05       0.4  Paris
..          ...       ...    ...
360  2023-12-27       0.0   Rome
361  2023-12-28       0.0   Rome
362  2023-12-29       0.0   Rome
363  2023-12-30       0.1   Rome
364  2023-12-31       0.4   Rome

[2190 rows x 3 columns]


# Highest Rainfall in a single day in a year

In [462]:
max_rain = df['rain_sum'].max()
for index, row in df[df['rain_sum'] == max_rain].iterrows():
    date = row['time']
    place = row['city']
    print(f"Most rainfall is: {max_rain}, at {date}, in {place}")

Most rainfall is: 62.7, at 2023-09-03, in Madrid


# Share of rainfall in 2023 across the countries

In [463]:
rain_total = df['rain_sum'].sum()
print(rain_total)

5292.9


In [464]:
total_rain_dict = {}
for city in df['city'].unique():
    total_rain = df[df['city'] == city]['rain_sum'].sum()
    print(f'The total rain in {city} is {total_rain}')
    total_rain_dict[city] = total_rain
print(total_rain_dict)

The total rain in Paris is 963.8
The total rain in London is 769.5
The total rain in Berlin is 777.8000000000001
The total rain in Madrid is 516.4000000000001
The total rain in Bern is 1340.3
The total rain in Rome is 925.0999999999999
{'Paris': np.float64(963.8), 'London': np.float64(769.5), 'Berlin': np.float64(777.8000000000001), 'Madrid': np.float64(516.4000000000001), 'Bern': np.float64(1340.3), 'Rome': np.float64(925.0999999999999)}


In [465]:
df_total_rain_dict = pd.DataFrame(list(total_rain_dict.items()), columns=['city', 'Total Rain'])
df_total_rain_dict

Unnamed: 0,city,Total Rain
0,Paris,963.8
1,London,769.5
2,Berlin,777.8
3,Madrid,516.4
4,Bern,1340.3
5,Rome,925.1


In [466]:
tooltip_content = layer_tooltips().line('Total Rain|@{..count..} (@{..proppct..})')\
                                  .line('total|@{..sum..}')

In [467]:
ggplot(df_total_rain_dict) + \
    geom_pie(aes(fill=as_discrete('city', order_by='..count..'), weight='Total Rain'),
             tooltips=tooltip_content.format('..sum..', '.1f')) + \
    ggsize(600, 400)

In [468]:
ggplot(df_total_rain_dict) + \
    geom_pie(aes(fill=as_discrete('city', order_by='..count..'), weight='Total Rain'),
             tooltips=tooltip_content.format('..sum..', '.1f'),
             size=20, hole=.3,
             labels=layer_labels().line('@city').size(14)) + \
    ggsize(600, 400) + \
    theme_void() + \
    theme(legend_position='none')