In [1]:
import os
import requests
from uber_time_estimation.configs.settings import ROOT_DIR
# Set the base URL
BASE_URL = "https://d37ci6vzurychx.cloudfront.net/trip-data/"
SAVE_DIR = ROOT_DIR / "data" / "nyc_tlc_yellow_taxi_data"

# Years and months to download (customize as needed)
years = range(2009, 2025)  # 2009 to 2024
months = range(1, 13)  # Jan to Dec

# Create directory if it doesn't exist
os.makedirs(SAVE_DIR, exist_ok=True)

from tqdm.notebook import tqdm  # Use notebook-friendly tqdm

# Loop over each year and month with nested progress bars
for year in tqdm(years, desc="Years", unit="year", position=0):
    for month in tqdm(months, desc="Months", unit="month", leave=False, position=1):
        file_name = f"yellow_tripdata_{year}-{month:02}.parquet"
        file_url = BASE_URL + file_name
        save_path = os.path.join(SAVE_DIR, file_name)

        # Skip if already downloaded
        if os.path.exists(save_path):
            print(f"Already downloaded: {file_name}")
            continue

        try:
            response = requests.get(file_url, stream=True)
            if response.status_code == 200:
                with open(save_path, 'wb') as f:
                    for chunk in tqdm(response.iter_content(chunk_size=8192), desc=f"Downloading {file_name}", unit="KB", leave=False, position=2):
                        f.write(chunk)
            else:
                print(f"File not found (maybe doesn't exist): {file_name}")
        except Exception as e:
            print(f"Error downloading {file_name}: {e}")


Years:   0%|          | 0/16 [00:00<?, ?year/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2009-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2009-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2009-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2009-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2009-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2009-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2009-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2009-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2009-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2009-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2009-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2009-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2010-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2010-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2010-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2010-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2010-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2010-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2010-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2010-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2010-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2010-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2010-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2010-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2011-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2011-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2011-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2011-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2011-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2011-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2011-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2011-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2011-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2011-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2011-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2011-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2012-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2012-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2012-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2012-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2012-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2012-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2012-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2012-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2012-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2012-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2012-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2012-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2013-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2013-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2013-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2013-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2013-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2013-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2013-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2013-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2013-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2013-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2013-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2013-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2014-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2014-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2014-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2014-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2014-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2014-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2014-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2014-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2014-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2014-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2014-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2014-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2015-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2015-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2015-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2015-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2015-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2015-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2015-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2015-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2015-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2015-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2015-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2015-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2016-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2016-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2016-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2016-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2016-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2016-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2016-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2016-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2016-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2016-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2016-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2016-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2017-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2017-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2017-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2017-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2017-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2017-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2017-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2017-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2017-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2017-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2017-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2017-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2018-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2018-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2018-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2018-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2018-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2018-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2018-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2018-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2018-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2018-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2018-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2018-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2019-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2019-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2019-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2019-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2019-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2019-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2019-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2019-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2019-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2019-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2019-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2019-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2020-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2020-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2020-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2020-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2020-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2020-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2020-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2020-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2020-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2020-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2020-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2020-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2021-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2021-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2021-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2021-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2021-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2021-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2021-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2021-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2021-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2021-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2021-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2021-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2022-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2022-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2022-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2022-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2022-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2022-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2022-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2022-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2022-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2022-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2022-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2022-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2023-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2023-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2023-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2023-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2023-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2023-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2023-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2023-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2023-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2023-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2023-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2023-12.parquet: 0KB [00:00, ?KB/s]

Months:   0%|          | 0/12 [00:00<?, ?month/s]

Downloading yellow_tripdata_2024-01.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2024-02.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2024-03.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2024-04.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2024-05.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2024-06.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2024-07.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2024-08.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2024-09.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2024-10.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2024-11.parquet: 0KB [00:00, ?KB/s]

Downloading yellow_tripdata_2024-12.parquet: 0KB [00:00, ?KB/s]

In [None]:
import openmeteo_requests

import pandas as pd
import requests_cache
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 40.7357,
	"longitude": -74.1724,
	"start_date": "2009-04-27",
	"end_date": "2025-05-11",
	"hourly": ["rain", "snowfall", "snow_depth", "precipitation", "apparent_temperature", "relative_humidity_2m", "temperature_2m", "dew_point_2m"],
	"timezone": "America/New_York"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_rain = hourly.Variables(0).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(1).ValuesAsNumpy()
hourly_snow_depth = hourly.Variables(2).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(3).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(4).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(5).ValuesAsNumpy()
hourly_temperature_2m = hourly.Variables(6).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(7).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["rain"] = hourly_rain
hourly_data["snowfall"] = hourly_snowfall
hourly_data["snow_depth"] = hourly_snow_depth
hourly_data["precipitation"] = hourly_precipitation
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)
