In [1]:
import openmeteo_requests

import requests_cache
import pandas as pd
import numpy as np
from retry_requests import retry
from geopy.geocoders import Nominatim

In [2]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

geolocator = Nominatim(user_agent="VldApp061085")

In [3]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": [35.6766, 35.4657, 34.6924, 35.1845, 43.058, 33.5677, 38.2677, 34.9844, 35.8875, 34.4112],
	"longitude": [139.6911, 139.6154, 135.512, 136.9515, 141.4286, 130.3717, 140.8691, 135.7572, 139.6512, 132.4528],
	"start_date": "2024-01-01",
	"end_date": "2024-03-31",
	"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "weather_code", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "is_day", "sunshine_duration"],
	"timezone": "Asia/Tokyo"
}
responses = openmeteo.weather_api(url, params=params)

location = geolocator.reverse("35.56883755634787, 139.64242960634186", language='en')

print(location.raw['address'])

cit = geolocator.geocode("Sendai")

cit

In [4]:
df_list = []

for response in responses:
    location = geolocator.reverse(str(response.Latitude())+","+str(response.Longitude()), language='en')
    address = location.raw['address']
    #print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
    #print(f"Elevation {response.Elevation()} m asl")
    #print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
    #print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
    print(f"{address.get('city', '')} done")
    #print()
    
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
    hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
    hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
    hourly_weather_code = hourly.Variables(5).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(6).ValuesAsNumpy()
    hourly_wind_speed_100m = hourly.Variables(7).ValuesAsNumpy()
    hourly_wind_direction_10m = hourly.Variables(8).ValuesAsNumpy()
    hourly_wind_direction_100m = hourly.Variables(9).ValuesAsNumpy()
    hourly_is_day = hourly.Variables(10).ValuesAsNumpy()
    hourly_sunshine_duration = hourly.Variables(11).ValuesAsNumpy()
    
    hourly_data = {"date": pd.date_range(
        start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
        end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
        freq = pd.Timedelta(seconds = hourly.Interval()),
        inclusive = "left"
    )}
    hourly_data["temperature_2m"] = hourly_temperature_2m
    hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
    hourly_data["dew_point_2m"] = hourly_dew_point_2m
    hourly_data["apparent_temperature"] = hourly_apparent_temperature
    hourly_data["precipitation"] = hourly_precipitation
    hourly_data["weather_code"] = hourly_weather_code
    hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
    hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
    hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
    hourly_data["wind_direction_100m"] = hourly_wind_direction_100m
    hourly_data["is_day"] = hourly_is_day
    hourly_data["sunshine_duration"] = hourly_sunshine_duration
    
    hourly_dataframe = pd.DataFrame(data = hourly_data)
    hourly_dataframe['location_id'] = response.LocationId()
    hourly_dataframe['latitude'] = response.Latitude()
    hourly_dataframe['longitude'] = response.Longitude()
    hourly_dataframe['elevation'] = response.Elevation()
    hourly_dataframe['city'] = address.get('city', '')
    hourly_dataframe['timezone'] = response.TimezoneAbbreviation()
    hourly_dataframe['UtcOffsetSeconds'] = response.UtcOffsetSeconds()
    df_list.append(hourly_dataframe)

Shibuya done
Yokohama done
Osaka done
Nagoya done
Sapporo done
Fukuoka done
Sendai done
Kyoto done
Saitama done
Hiroshima done


In [5]:
df = pd.concat(df_list)

In [6]:
df.dtypes

date                    datetime64[ns, UTC]
temperature_2m                      float32
relative_humidity_2m                float32
dew_point_2m                        float32
apparent_temperature                float32
precipitation                       float32
weather_code                        float32
wind_speed_10m                      float32
wind_speed_100m                     float32
wind_direction_10m                  float32
wind_direction_100m                 float32
is_day                              float32
sunshine_duration                   float32
location_id                           int64
latitude                            float64
longitude                           float64
elevation                           float64
city                                 object
timezone                             object
UtcOffsetSeconds                      int64
dtype: object

In [7]:
df.head()

Unnamed: 0,date,temperature_2m,relative_humidity_2m,dew_point_2m,apparent_temperature,precipitation,weather_code,wind_speed_10m,wind_speed_100m,wind_direction_10m,wind_direction_100m,is_day,sunshine_duration,location_id,latitude,longitude,elevation,city,timezone,UtcOffsetSeconds
0,2023-12-31 15:00:00+00:00,10.17,64.489006,3.77,4.171356,0.0,1.0,31.072817,49.162083,316.877869,317.077179,0.0,0.0,0,35.676624,139.691116,37.0,Shibuya,b'JST',32400
1,2023-12-31 16:00:00+00:00,10.12,59.023479,2.47,2.560192,0.0,2.0,40.102448,59.829357,319.73349,320.371033,0.0,0.0,0,35.676624,139.691116,37.0,Shibuya,b'JST',32400
2,2023-12-31 17:00:00+00:00,10.07,57.558666,2.07,2.457815,0.0,2.0,39.972969,59.673187,316.094604,318.423981,0.0,0.0,0,35.676624,139.691116,37.0,Shibuya,b'JST',32400
3,2023-12-31 18:00:00+00:00,9.62,56.423988,1.37,1.923639,0.0,1.0,39.724167,58.307774,313.53125,316.250702,0.0,0.0,0,35.676624,139.691116,37.0,Shibuya,b'JST',32400
4,2023-12-31 19:00:00+00:00,8.87,55.422287,0.42,0.331301,0.0,0.0,44.436306,65.330826,319.600159,321.488098,0.0,0.0,0,35.676624,139.691116,37.0,Shibuya,b'JST',32400


In [8]:
df_final = (df
 .rename(columns={'date':'date_UTC'})
 .assign(date=lambda x: x['date_UTC'].dt.tz_localize(None)+ pd.Timedelta(seconds=32400),
         timezone=lambda x: x['timezone'].apply(lambda y: y.decode('utf-8')),
         weather_code=lambda x: x['weather_code'].astype('int'),
         is_day=lambda x: x['is_day'].astype('int'),
        )
)

In [9]:
df_final.sample(200).to_csv("test.csv", index=False)