# Data Extraction

In [1]:
import os
from pathlib import Path

import earthaccess as ea
import netCDF4 as nc
import numpy as np
import seaborn as sns
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import pandas as pd
import xarray as xr
from dotenv import load_dotenv, find_dotenv
from matplotlib.path import Path as PolygonPath
from src.services.utils import get_logger
from src.services.google import Google

logger = get_logger()
env_loaded = load_dotenv(find_dotenv())
if env_loaded:
    logger.info("Environment variables loaded successfully.")
else:
    logger.error("Failed to load environment variables.")

  from .autonotebook import tqdm as notebook_tqdm


[32m2025-10-04T18:28:28.829601-0500[0m | [1mINFO[0m | Environment variables loaded successfully.


## Get data from different sources

### Earth data login

In [2]:
EARTH_ACCESS_USERNAME = os.getenv("EARTH_ACCESS_USERNAME")
EARTH_ACCESS_PASSWORD = os.getenv("EARTH_ACCESS_PASSWORD")
auth = ea.login(EARTH_ACCESS_USERNAME, EARTH_ACCESS_PASSWORD)

In [1]:
short_name = "TEMPO_NO2_L3"  # collection name to search for in the EarthData
# short_name = "OMHCHOd"
version = "V03"
# version = "003"

date_start = "2025-01-01 00:00:00"
date_end = "2025-01-01 15:59:59"
polygon_coords = [
    (-120.0091050, 41.9727325),
    (-124.6045661, 41.8898826),
    (-120.4462801, 33.9044735),
    (-117.1073262, 32.6184122),
    (-114.2955756, 32.6554188),
    (-114.1637748, 34.3047333),
    (-114.7349117, 35.0995465),
    (-120.0948112, 39.0254518),
    (-120.0091050, 41.9727325),
]

In [2]:
from src.services.earth_data import EarthDataClient

eac = EarthDataClient()
df = eac.get_data(
    dataset_name=short_name,
    dataset_version=version,
    start_date=date_start,
    end_date=date_end,
    polygon=polygon_coords,
)
df.head()

  from .autonotebook import tqdm as notebook_tqdm


earthdata-client | [32m2025-10-04T18:50:15.398342-0500[0m | [1mINFO[0m | Found 2 granules.


QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 1733.18it/s]
PROCESSING TASKS | : 100%|██████████| 1/1 [00:16<00:00, 16.38s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 18893.26it/s]

earthdata-client | [32m2025-10-04T18:50:31.807162-0500[0m | [1mINFO[0m | Opening file: /var/folders/78/l07nwp291d17m9j1z3ylv1n40000gn/T/earthdata_oxnu76kf/TEMPO_NO2_L3_V03_20250101T144826Z_S005.nc



QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 3890.82it/s]
PROCESSING TASKS | : 100%|██████████| 1/1 [00:16<00:00, 16.46s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 16070.13it/s]

earthdata-client | [32m2025-10-04T18:50:53.606690-0500[0m | [1mINFO[0m | Opening file: /var/folders/78/l07nwp291d17m9j1z3ylv1n40000gn/T/earthdata_oxnu76kf/TEMPO_NO2_L3_V03_20250101T144826Z_S005.nc
earthdata-client | [32m2025-10-04T18:50:53.608700-0500[0m | [1mINFO[0m | Opening file: /var/folders/78/l07nwp291d17m9j1z3ylv1n40000gn/T/earthdata_oxnu76kf/TEMPO_NO2_L3_V03_20250101T154826Z_S006.nc





Unnamed: 0,latitude,longitude,time,weight
7217793,32.630001,-117.129997,2025-01-01 14:48:44.028173568,4.162722
7217794,32.630001,-117.110001,2025-01-01 14:48:44.028173568,4.162722
7217795,32.630001,-117.089996,2025-01-01 14:48:44.028173568,4.360855
7217796,32.630001,-117.07,2025-01-01 14:48:44.028173568,4.162722
7217797,32.630001,-117.050003,2025-01-01 14:48:44.028173568,4.162722


In [3]:
print(f"Total records retrieved: {len(df)}")
df.dropna(inplace=True)
print(f"Total records after dropping NaNs: {len(df)}")

Total records retrieved: 214348
Total records after dropping NaNs: 214348


In [4]:
df.isna().sum()

latitude     0
longitude    0
time         0
weight       0
dtype: int64

## Load data to NO2 historical data

In [1]:
import time
import concurrent.futures
from tqdm import tqdm
from src.etl.extract_load_no2 import extract_and_load_no2

short_name = "TEMPO_NO2_L3"  # collection name to search for in the EarthData
version = "V03"

polygon_coords = [
    (-120.0091050, 41.9727325),
    (-124.6045661, 41.8898826),
    (-120.4462801, 33.9044735),
    (-117.1073262, 32.6184122),
    (-114.2955756, 32.6554188),
    (-114.1637748, 34.3047333),
    (-114.7349117, 35.0995465),
    (-120.0948112, 39.0254518),
    (-120.0091050, 41.9727325),
]

def process_day(i):
    date_start = f"2024-04-{i:02d} 00:00:00"
    date_end = f"2024-04-{i:02d} 23:59:59"
    print(f"Processing data for {date_start} to {date_end}")
    try:
        extract_and_load_no2(
            dataset_name=short_name,
            dataset_version=version,
            start_date=date_start,
            end_date=date_end,
            polygon=polygon_coords,
        )
        return f"Success for {date_start}"
    except Exception as e:
        return f"Error for {date_start}: {e}"

start_run_time = time.time()

# Run in parallel using ThreadPoolExecutor
days = list(range(20, 32))
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
    futures = [executor.submit(process_day, i) for i in days]
    for future in tqdm(concurrent.futures.as_completed(futures), total=len(days)):
        result = future.result()
        print(result)

end_run_time = time.time()
elapsed_time = end_run_time - start_run_time
print(f"ETL process completed in {elapsed_time:.2f} seconds.")

  from .autonotebook import tqdm as notebook_tqdm


Processing data for 2024-04-20 00:00:00 to 2024-04-20 23:59:59Processing data for 2024-04-21 00:00:00 to 2024-04-21 23:59:59

Processing data for 2024-04-22 00:00:00 to 2024-04-22 23:59:59
Processing data for 2024-04-23 00:00:00 to 2024-04-23 23:59:59
Processing data for 2024-04-24 00:00:00 to 2024-04-24 23:59:59
Processing data for 2024-04-25 00:00:00 to 2024-04-25 23:59:59
Processing data for 2024-04-26 00:00:00 to 2024-04-26 23:59:59
Processing data for 2024-04-27 00:00:00 to 2024-04-27 23:59:59
Processing data for 2024-04-28 00:00:00 to 2024-04-28 23:59:59
Processing data for 2024-04-29 00:00:00 to 2024-04-29 23:59:59


  8%|▊         | 1/12 [00:05<00:58,  5.32s/it]

Processing data for 2024-04-30 00:00:00 to 2024-04-30 23:59:59Processing data for 2024-04-31 00:00:00 to 2024-04-31 23:59:59
Error for 2024-04-24 00:00:00: 'Google' object has no attribute 'bigquery'

Error for 2024-04-23 00:00:00: 'Google' object has no attribute 'bigquery'
Error for 2024-04-26 00:00:00: 'Google' object has no attribute 'bigquery'
Error for 2024-04-28 00:00:00: 'Google' object has no attribute 'bigquery'
Error for 2024-04-27 00:00:00: 'Google' object has no attribute 'bigquery'
src.etl.extract_load_no2 | [32m2025-10-05T11:00:57.978934-0500[0m | [1mINFO[0m | BigQuery client initialized (project=nasa-challenge-2025, location=US)
src.etl.extract_load_no2 | [32m2025-10-05T11:00:57.980221-0500[0m | [1mINFO[0m | Extracting data for TEMPO_NO2_L3 version V03 from 2024-04-29 00:00:00 to 2024-04-29 23:59:59
Error for 2024-04-22 00:00:00: 'Google' object has no attribute 'bigquery'
src.etl.extract_load_no2 | [32m2025-10-05T11:00:57.989639-0500[0m | [1mINFO[0m | Extra



src.etl.extract_load_no2 | [32m2025-10-05T11:00:58.315223-0500[0m | [1mINFO[0m | Extracting data for TEMPO_NO2_L3 version V03 from 2024-04-20 00:00:00 to 2024-04-20 23:59:59
src.etl.extract_load_no2 | [32m2025-10-05T11:00:58.342555-0500[0m | [1mINFO[0m | Extracting data for TEMPO_NO2_L3 version V03 from 2024-04-21 00:00:00 to 2024-04-21 23:59:59
src.etl.extract_load_no2 | [32m2025-10-05T11:00:58.632623-0500[0m | [1mINFO[0m | Extracting data for TEMPO_NO2_L3 version V03 from 2024-04-30 00:00:00 to 2024-04-30 23:59:59
src.etl.extract_load_no2 | [32m2025-10-05T11:00:59.297584-0500[0m | [1mINFO[0m | Found 14 granules.


QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 2066.16it/s]


src.etl.extract_load_no2 | [32m2025-10-05T11:00:59.310830-0500[0m | [1mINFO[0m | Found 14 granules.



QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 3533.53it/s]

[A

src.etl.extract_load_no2 | [32m2025-10-05T11:00:59.334917-0500[0m | [1mINFO[0m | Found 14 granules.




QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 4369.07it/s]


[A[A

src.etl.extract_load_no2 | [32m2025-10-05T11:00:59.360719-0500[0m | [1mINFO[0m | Found 14 granules.





QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 2178.86it/s]



[A[A[A

src.etl.extract_load_no2 | [32m2025-10-05T11:00:59.409342-0500[0m | [1mINFO[0m | Found 9 granules.






QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 3028.38it/s]




[A[A[A[A

src.etl.extract_load_no2 | [32m2025-10-05T11:00:59.664589-0500[0m | [1mINFO[0m | Extracting data for TEMPO_NO2_L3 version V03 from 2024-04-31 00:00:00 to 2024-04-31 23:59:59


 58%|█████▊    | 7/12 [00:07<00:04,  1.23it/s]

Error for 2024-04-31 00:00:00: day is out of range for month: 2024-04-31 00:00:00


PROCESSING TASKS | : 100%|██████████| 1/1 [00:25<00:00, 25.09s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 17476.27it/s]
QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 2723.57it/s]




PROCESSING TASKS | : 100%|██████████| 1/1 [00:25<00:00, 25.53s/it]




COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 14665.40it/s]




QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 1960.87it/s]




[A[A[A[A

PROCESSING TASKS | : 100%|██████████| 1/1 [00:26<00:00, 26.31s/it]


COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 7639.90it/s]


QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 1302.17it/s]


[A[A
PROCESSING TASKS | : 100%|██████████| 1/1 [00:28<00:00, 28.12s/it]

COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 12671.61it/s]

QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 1789.38it/s]

[A


PROCESSING TASKS | : 100%|██████████| 1/1 [00:31<00:00, 31.12s/it]



COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:0

src.etl.extract_load_no2 | [32m2025-10-05T11:06:05.100720-0500[0m | [1mINFO[0m | Downloaded 9 files. Processing...






PROCESSING TASKS | : 100%|██████████| 1/1 [01:10<00:00, 70.33s/it]



COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 26546.23it/s]



QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 2571.61it/s]



PROCESSING TASKS | : 100%|██████████| 1/1 [01:08<00:00, 68.91s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 3650.40it/s]
QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 1377.44it/s]


PROCESSING TASKS | : 100%|██████████| 1/1 [01:20<00:00, 80.22s/it]


COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 28532.68it/s]


QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 2624.72it/s]


[A[A

src.etl.extract_load_no2 | [32m2025-10-05T11:06:53.657864-0500[0m | [1mINFO[0m | Extracted 964566 records from TEMPO_NO2_L3 version V03.
src.etl.extract_load_no2 | [32m2025-10-05T11:06:54.237049-0500[0m | [1mINFO[0m | Data cleaned. 964566 records remaining after cleaning.
src.etl.extract_load_no2 | [32m2025-10-05T11:06:54.237643-0500[0m | [1mINFO[0m | Uploading data to BigQuery...
src.etl.extract_load_no2 | [32m2025-10-05T11:06:55.625102-0500[0m | [1mINFO[0m | Dataset earth_data already exists (project=nasa-challenge-2025).
src.etl.extract_load_no2 | [32m2025-10-05T11:06:55.626951-0500[0m | [1mINFO[0m | BigQuery load configured to WRITE_APPEND and CREATE_IF_NEEDED
src.etl.extract_load_no2 | [32m2025-10-05T11:07:02.607087-0500[0m | [1mINFO[0m | Data uploaded successfully.
src.etl.extract_load_no2 | [32m2025-10-05T11:07:02.609251-0500[0m | [1mINFO[0m | Data successfully loaded into BigQuery.


 67%|██████▋   | 8/12 [06:09<04:17, 64.46s/it]

Success for 2024-04-30 00:00:00





PROCESSING TASKS | : 100%|██████████| 1/1 [00:32<00:00, 32.19s/it]



COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 12157.40it/s]



QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 2165.36it/s]



[A[A[A
PROCESSING TASKS | : 100%|██████████| 1/1 [01:19<00:00, 79.56s/it]

COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 27235.74it/s]

QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 4128.25it/s]

PROCESSING TASKS | : 100%|██████████| 1/1 [00:31<00:00, 31.25s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 16194.22it/s]
QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 2150.93it/s]


PROCESSING TASKS | : 100%|██████████| 1/1 [00:31<00:00, 31.19s/it]


COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 28532.68it/s]


QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 3968.12it/s]


[A[A


PROCESSING TASKS | : 100%|██████████| 1/1 [00:23<00:00, 23.27s/it]



COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 18893.

src.etl.extract_load_no2 | [32m2025-10-05T11:08:39.720419-0500[0m | [1mINFO[0m | Downloaded 14 files. Processing...






PROCESSING TASKS | : 100%|██████████| 1/1 [00:57<00:00, 57.37s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 22671.91it/s]
QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 1138.83it/s]


PROCESSING TASKS | : 100%|██████████| 1/1 [01:12<00:00, 72.16s/it]


COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 23831.27it/s]

src.etl.extract_load_no2 | [32m2025-10-05T11:09:45.484081-0500[0m | [1mINFO[0m | Downloaded 14 files. Processing...





src.etl.extract_load_no2 | [32m2025-10-05T11:09:55.985989-0500[0m | [1mINFO[0m | Extracted 1500436 records from TEMPO_NO2_L3 version V03.
src.etl.extract_load_no2 | [32m2025-10-05T11:10:00.440367-0500[0m | [1mINFO[0m | Data cleaned. 1500436 records remaining after cleaning.
src.etl.extract_load_no2 | [32m2025-10-05T11:10:00.441487-0500[0m | [1mINFO[0m | Uploading data to BigQuery...
src.etl.extract_load_no2 | [32m2025-10-05T11:10:01.782596-0500[0m | [1mINFO[0m | Dataset earth_data already exists (project=nasa-challenge-2025).
src.etl.extract_load_no2 | [32m2025-10-05T11:10:04.030092-0500[0m | [1mINFO[0m | BigQuery load configured to WRITE_APPEND and CREATE_IF_NEEDED
src.etl.extract_load_no2 | [32m2025-10-05T11:10:16.927249-0500[0m | [1mINFO[0m | Data uploaded successfully.
src.etl.extract_load_no2 | [32m2025-10-05T11:10:16.928368-0500[0m | [1mINFO[0m | Data successfully loaded into BigQuery.


 75%|███████▌  | 9/12 [09:24<04:31, 90.52s/it]

Success for 2024-04-21 00:00:00



PROCESSING TASKS | : 100%|██████████| 1/1 [02:28<00:00, 148.62s/it]

COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 1275.25it/s]

QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 266.37it/s]

[A

src.etl.extract_load_no2 | [32m2025-10-05T11:11:08.071038-0500[0m | [1mINFO[0m | Extracted 1500436 records from TEMPO_NO2_L3 version V03.
src.etl.extract_load_no2 | [32m2025-10-05T11:11:08.900638-0500[0m | [1mINFO[0m | Data cleaned. 1500436 records remaining after cleaning.
src.etl.extract_load_no2 | [32m2025-10-05T11:11:08.901171-0500[0m | [1mINFO[0m | Uploading data to BigQuery...
src.etl.extract_load_no2 | [32m2025-10-05T11:11:09.216956-0500[0m | [1mINFO[0m | Dataset earth_data already exists (project=nasa-challenge-2025).
src.etl.extract_load_no2 | [32m2025-10-05T11:11:09.219110-0500[0m | [1mINFO[0m | BigQuery load configured to WRITE_APPEND and CREATE_IF_NEEDED


PROCESSING TASKS | : 100%|██████████| 1/1 [01:51<00:00, 111.58s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 21399.51it/s]

src.etl.extract_load_no2 | [32m2025-10-05T11:11:09.380426-0500[0m | [1mINFO[0m | Downloaded 14 files. Processing...





src.etl.extract_load_no2 | [32m2025-10-05T11:11:17.519960-0500[0m | [1mINFO[0m | Data uploaded successfully.
src.etl.extract_load_no2 | [32m2025-10-05T11:11:17.520508-0500[0m | [1mINFO[0m | Data successfully loaded into BigQuery.


 83%|████████▎ | 10/12 [10:24<02:47, 83.85s/it]

Success for 2024-04-29 00:00:00



PROCESSING TASKS | : 100%|██████████| 1/1 [01:17<00:00, 77.56s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 27594.11it/s]
QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 1088.02it/s]


src.etl.extract_load_no2 | [32m2025-10-05T11:12:18.807236-0500[0m | [1mINFO[0m | Extracted 1500436 records from TEMPO_NO2_L3 version V03.
src.etl.extract_load_no2 | [32m2025-10-05T11:12:19.581745-0500[0m | [1mINFO[0m | Data cleaned. 1500436 records remaining after cleaning.
src.etl.extract_load_no2 | [32m2025-10-05T11:12:19.582174-0500[0m | [1mINFO[0m | Uploading data to BigQuery...
src.etl.extract_load_no2 | [32m2025-10-05T11:12:19.823110-0500[0m | [1mINFO[0m | Dataset earth_data already exists (project=nasa-challenge-2025).
src.etl.extract_load_no2 | [32m2025-10-05T11:12:19.824357-0500[0m | [1mINFO[0m | BigQuery load configured to WRITE_APPEND and CREATE_IF_NEEDED
src.etl.extract_load_no2 | [32m2025-10-05T11:12:26.074368-0500[0m | [1mINFO[0m | Data uploaded successfully.
src.etl.extract_load_no2 | [32m2025-10-05T11:12:26.075717-0500[0m | [1mINFO[0m | Data successfully loaded into BigQuery.


 92%|█████████▏| 11/12 [11:33<01:20, 80.16s/it]

Success for 2024-04-25 00:00:00


PROCESSING TASKS | : 100%|██████████| 1/1 [00:29<00:00, 29.84s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 11096.04it/s]

src.etl.extract_load_no2 | [32m2025-10-05T11:12:42.154952-0500[0m | [1mINFO[0m | Downloaded 14 files. Processing...





src.etl.extract_load_no2 | [32m2025-10-05T11:14:04.984290-0500[0m | [1mINFO[0m | Extracted 1500436 records from TEMPO_NO2_L3 version V03.
src.etl.extract_load_no2 | [32m2025-10-05T11:14:05.814221-0500[0m | [1mINFO[0m | Data cleaned. 1500436 records remaining after cleaning.
src.etl.extract_load_no2 | [32m2025-10-05T11:14:05.814934-0500[0m | [1mINFO[0m | Uploading data to BigQuery...
src.etl.extract_load_no2 | [32m2025-10-05T11:14:06.046719-0500[0m | [1mINFO[0m | Dataset earth_data already exists (project=nasa-challenge-2025).
src.etl.extract_load_no2 | [32m2025-10-05T11:14:06.049453-0500[0m | [1mINFO[0m | BigQuery load configured to WRITE_APPEND and CREATE_IF_NEEDED
src.etl.extract_load_no2 | [32m2025-10-05T11:14:12.910791-0500[0m | [1mINFO[0m | Data uploaded successfully.
src.etl.extract_load_no2 | [32m2025-10-05T11:14:12.911907-0500[0m | [1mINFO[0m | Data successfully loaded into BigQuery.


100%|██████████| 12/12 [13:20<00:00, 66.69s/it]

Success for 2024-04-20 00:00:00
ETL process completed in 800.32 seconds.





In [1]:
import time
from src.etl.extract_load_no2 import extract_and_load_no2

short_name = "TEMPO_NO2_L3"  # collection name to search for in the EarthData
# short_name = "OMHCHOd"
version = "V03"
# version = "003"

date_start = "2024-04-01 00:00:00"
date_end = "2024-04-01 23:59:59"
polygon_coords = [
    (-120.0091050, 41.9727325),
    (-124.6045661, 41.8898826),
    (-120.4462801, 33.9044735),
    (-117.1073262, 32.6184122),
    (-114.2955756, 32.6554188),
    (-114.1637748, 34.3047333),
    (-114.7349117, 35.0995465),
    (-120.0948112, 39.0254518),
    (-120.0091050, 41.9727325),
]

start_run_time = time.time()
for i in range(1, 32):
    date_start = f"2024-04-{i:02d} 00:00:00"
    date_end = f"2024-04-{i:02d} 23:59:59"
    print(f"Processing data for {date_start} to {date_end}")
    _ = extract_and_load_no2(
        dataset_name=short_name,
        dataset_version=version,
        start_date=date_start,
        end_date=date_end,
        polygon=polygon_coords,
    )
end_run_time = time.time()
elapsed_time = end_run_time - start_run_time
print(f"ETL process completed in {elapsed_time:.2f} seconds.")

  from .autonotebook import tqdm as notebook_tqdm


Processing data for 2024-04-01 00:00:00 to 2024-04-01 23:59:59
src.etl.extract_load_no2 | [32m2025-10-05T09:25:07.301841-0500[0m | [1mINFO[0m | BigQuery client initialized (project=nasa-challenge-2025, location=US)
src.etl.extract_load_no2 | [32m2025-10-05T09:25:07.302898-0500[0m | [1mINFO[0m | Extracting data for TEMPO_NO2_L3 version V03 from 2024-04-01 00:00:00 to 2024-04-01 23:59:59




src.etl.extract_load_no2 | [32m2025-10-05T09:25:08.391120-0500[0m | [1mINFO[0m | Found 13 granules.


QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 877.84it/s]
PROCESSING TASKS | : 100%|██████████| 1/1 [00:26<00:00, 26.55s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 39945.75it/s]
QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 1933.75it/s]
PROCESSING TASKS | : 100%|██████████| 1/1 [00:22<00:00, 22.86s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 22192.08it/s]
QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 1748.36it/s]
PROCESSING TASKS | : 100%|██████████| 1/1 [00:28<00:00, 28.32s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 16578.28it/s]
QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 1660.45it/s]
PROCESSING TASKS | : 100%|██████████| 1/1 [00:38<00:00, 38.82s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1 [00:00<00:00, 24385.49it/s]
QUEUEING TASKS | : 100%|██████████| 1/1 [00:00<00:00, 4568.96it/s]
PROCESSING TASKS | : 100%|██████████| 1/1 [00:37<00:00, 37.82s/it]
COLLECTING RESULTS | : 100%|██████████| 1/1

KeyboardInterrupt: 

In [None]:
google = Google()
_ = google.bigquery.upload_data_from_dataframe(
    df,
    dataset="earth_data",
    table_id="no2_historical"
)

[32m2025-10-04T17:12:05.948804-0500[0m | [1mINFO[0m | BigQuery client initialized (project=%s, location=%s)




[32m2025-10-04T17:12:07.074801-0500[0m | [1mINFO[0m | Dataset %s already exists (project=%s).
[32m2025-10-04T17:12:11.427748-0500[0m | [1mINFO[0m | Data uploaded successfully.
