In [10]:
import pandas as pd
import geopandas as gpd
import numpy as np
import requests
import pickle
import datetime as dt
import meteostat as met

In [2]:
from geopy.geocoders import Nominatim
# import urllib.parse

def get_coordinates(city_name):
    # encoded_city = urllib.parse.quote(city_name, safe='')
    geolocator = Nominatim(user_agent="geoapiExercises")
    location = geolocator.geocode(city_name)
    
    if location:
        return location.latitude, location.longitude, location. altitude
    else:
        return None

In [3]:
def get_city_coords(cities_df):
    lat, lon = [], []
    for search_string in cities_df['string']:
        coords = get_coordinates(search_string)
        if coords:
            lat.append(coords[0])
            lon.append(coords[1])
        else:
            lat.append(np.nan)
    return lat, lon

In [4]:
import requests

def format_points(points):
    points_list = []
    for point in points:
        points_list.append({
            "latitude":point[0],
            "longitude":point[1]
        })
    return points_list


def get_altitudes(points):
    formated_points = format_points(points)
    url = "https://api.open-elevation.com/api/v1/lookup"
    data = {"locations": formated_points}

    response = requests.post(url, json=data)

    if response.status_code == 200:
        results = response.json()
        altitudes = [result.get('elevation', None) for result in results['results']]
        return altitudes
    else:
        print(f"Failed to fetch altitudes. Status code: {response.status_code}")
        return None


In [5]:
cities = pd.read_csv('city_list.csv')
cities['string'] = cities['Cidade'] + ', ' +cities['Estado'] +', Brasil'
cities.index.rename('city_id', inplace=True)

cities['lat'], cities['lon'] = get_city_coords(cities)
cities['Altitude'] = get_altitudes(cities[['lat','lon']].values)
cities['geometry'] = gpd.points_from_xy(cities['lon'], cities['lat'], cities['Altitude'])

In [6]:
with open('city_db.pkl', 'wb') as file:
    pickle.dump(cities, file)

In [19]:
# Set time period
start = dt.datetime(2021, 1, 1)
end = dt.datetime(2023, 10, 31)

met_data = {}
for city_id, row in cities.iterrows():
    met_point = met.Point(row['lat'], row['lon'], row['Altitude'])
    met_point.radius = 120000
    
    data = met.Hourly(met_point, start, end)
    data = data.fetch()
    
    met_data[city_id] = data



In [22]:
for city_id, data in met_data.items():
    missing_data = data.isnull().sum() * 100 / len(data)
    print('City_id: ',city_id, ' ', missing_data['prcp'])

City_id:  0   0.1331020852660025
City_id:  1   0.1331020852660025
City_id:  2   0.1331020852660025
City_id:  3   0.1331020852660025
City_id:  4   0.1371354817892147
City_id:  5   0.1331020852660025
City_id:  6   0.1371354817892147
City_id:  7   0.1371354817892147
City_id:  8   0.1371354817892147
City_id:  9   0.1331020852660025
City_id:  10   0.1371354817892147
City_id:  11   0.1331020852660025
City_id:  12   0.08066793046424393
City_id:  13   0.1331020852660025
City_id:  14   0.1331020852660025
City_id:  15   0.1371354817892147
City_id:  16   0.1331020852660025
City_id:  17   0.1331020852660025
City_id:  18   0.1331020852660025
City_id:  19   0.1331020852660025
City_id:  20   0.1371354817892147
City_id:  21   0.1331020852660025
City_id:  22   0.1331020852660025
City_id:  23   0.1371354817892147
City_id:  24   0.1371354817892147
City_id:  25   0.1371354817892147
City_id:  26   0.1331020852660025


In [23]:
with open('met_data.pkl', 'wb') as file:
    pickle.dump(met_data, file)

In [None]:
from ydata_profiling import ProfileReport
eda = ProfileReport(data)
display(eda)