# ETL desde API Historical Weather

Importar librerías

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from datetime import datetime, timedelta

Request a la API

In [3]:
fecha_inicial = '2022-01-01'        # fecha inicial colocar la del día anterior al requerido, ya que hay una diferencia horaria de 4 horas
fecha_final = '2024-02-29'          # con respecto a GMT

In [4]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 40.7143,
	"longitude": -74.006,
	"start_date": fecha_inicial,
	"end_date": fecha_final,
	"hourly": ["temperature_2m", "relative_humidity_2m", "rain", "snowfall"],
	"timezone": "America/New_York"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_rain = hourly.Variables(2).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(3).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["rain"] = hourly_rain
hourly_data["snowfall"] = hourly_snowfall

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

Coordinates 40.738136291503906°N -74.04254150390625°E
Elevation 51.0 m asl
Timezone b'America/New_York' b'EDT'
Timezone difference to GMT+0 -14400 s
                           date  temperature_2m  relative_humidity_2m  rain  \
0     2022-01-01 04:00:00+00:00          7.5225            100.000000   0.0   
1     2022-01-01 05:00:00+00:00          7.6725             99.659775   0.0   
2     2022-01-01 06:00:00+00:00          7.7725             99.660034   0.0   
3     2022-01-01 07:00:00+00:00          7.4725             99.659233   0.0   
4     2022-01-01 08:00:00+00:00          7.4225             99.659088   0.1   
...                         ...             ...                   ...   ...   
18955 2024-02-29 23:00:00+00:00          1.3225             36.604572   0.0   
18956 2024-03-01 00:00:00+00:00         -0.2775             45.395584   0.0   
18957 2024-03-01 01:00:00+00:00         -0.1775             45.968769   0.0   
18958 2024-03-01 02:00:00+00:00         -1.3275             5

In [5]:
hourly_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18960 entries, 0 to 18959
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype              
---  ------                --------------  -----              
 0   date                  18960 non-null  datetime64[ns, UTC]
 1   temperature_2m        18960 non-null  float32            
 2   relative_humidity_2m  18960 non-null  float32            
 3   rain                  18960 non-null  float32            
 4   snowfall              18960 non-null  float32            
dtypes: datetime64[ns, UTC](1), float32(4)
memory usage: 444.5 KB


In [5]:
hourly_dataframe.head()

Unnamed: 0,date,temperature_2m,relative_humidity_2m,rain,snowfall
0,2009-01-01 04:00:00+00:00,-7.5665,45.961708,0.0,0.0
1,2009-01-01 05:00:00+00:00,-7.6165,46.333775,0.0,0.0
2,2009-01-01 06:00:00+00:00,-7.8165,46.858982,0.0,0.0
3,2009-01-01 07:00:00+00:00,-7.7665,47.667458,0.0,0.0
4,2009-01-01 08:00:00+00:00,-8.1665,47.544762,0.0,0.0
