# Extracting data from ms planetary computer ERA5 

In [54]:
import warnings
warnings.filterwarnings("ignore")

import planetary_computer as pc
import xarray as xr
import pandas as pd
import numpy as np
from tqdm import tqdm
from scipy.spatial import cKDTree

import pystac_client
import planetary_computer as pc
import requests
import time

from odc.stac import stac_load

from datetime import date
from tqdm import tqdm
import os

tqdm.pandas()

## Dados Di√°rios

In [62]:
def get_rain_accumulations_nasa(row):
    try:
        lat, lon = row['Latitude'], row['Longitude']
        date = pd.to_datetime(row['Sample Date'], dayfirst=True)
        
        # Definir as datas para a NASA (YYYYMMDD)
        end_date = date.strftime('%Y%m%d')
        start_date = (date - pd.Timedelta(days=12)).strftime('%Y%m%d')
        
        # URL da API NASA POWER (Par√¢metro PRECTOTCORR = Chuva Total Corrigida)
        url = (f"https://power.larc.nasa.gov/api/temporal/daily/point?"
               f"parameters=PRECTOTCORR&community=AG&longitude={lon}&latitude={lat}&"
               f"start={start_date}&end={end_date}&format=JSON")
        
        response = requests.get(url, timeout=10)
        
        if response.status_code == 200:
            data = response.json()
            # Pega os valores di√°rios (vir√£o 13 valores: do dia -12 at√© o dia 0)
            precip_dict = data['properties']['parameter']['PRECTOTCORR']
            precip_values = list(precip_dict.values())
            
            # 2. Calculamos os ac√∫mulos fatiando a lista do final para o in√≠cio
            # precip_values[-1] √© o dia da amostra
            
            # Ac√∫mulo de 3 dias (dia da amostra + 3 dias anteriores = √∫ltimas 4 posi√ß√µes)
            rain_3d = sum(precip_values[-4:])
            
            # Ac√∫mulo de 7 dias (dia da amostra + 7 dias anteriores = √∫ltimas 8 posi√ß√µes)
            rain_7d = sum(precip_values[-8:])
            
            # Ac√∫mulo de 12 dias (todos os valores da lista)
            rain_12d = sum(precip_values)
            
            return pd.Series({
                'rain_3d': rain_3d,
                'rain_7d': rain_7d,
                'rain_12d': rain_12d
            })
        else:
            return pd.Series({'rain_3d': np.nan, 'rain_7d': np.nan, 'rain_12d': np.nan})

    except Exception:
        return pd.Series({'rain_3d': np.nan, 'rain_7d': np.nan, 'rain_12d': np.nan})

## Downloading the data ( dados mensais )

In [63]:
Water_Quality_df = pd.read_csv('water_quality_training_dataset.csv')
Water_Quality_df_100 = Water_Quality_df[0:100]

## Download Data ( dados di√°rios )

In [69]:
chunk_size = 600
output_path = "../Datasets/chuva_acumulada_nasa_power.csv"

print("üåßÔ∏è Iniciando extra√ß√£o de chuva acumulada (NASA POWER)...")

for i in range(0, len(Water_Quality_df), chunk_size):
    subset = Water_Quality_df.iloc[i : i + chunk_size]
    print(f"üì¶ Bloco {i//chunk_size + 1}...")
    
    # Aplicar a fun√ß√£o
    res = subset.progress_apply(get_rain_accumulations_nasa, axis=1)
    
    # Salvar
    file_exists = os.path.isfile(output_path)
    res.to_csv(output_path, mode='a', index=False, header=not file_exists)
    
    # Pausa de 2 segundos para n√£o ser bloqueado pela NASA
    time.sleep(1.1)

print("‚úÖ Chuva acumulada extra√≠da com sucesso!")

res.head()

üåßÔ∏è Iniciando extra√ß√£o de chuva acumulada (NASA POWER)...
üì¶ Bloco 1...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:36<00:00,  1.66s/it]


üì¶ Bloco 2...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:37<00:00,  1.66s/it]


üì¶ Bloco 3...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:36<00:00,  1.66s/it]


üì¶ Bloco 4...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [15:58<00:00,  1.60s/it]


üì¶ Bloco 5...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:18<00:00,  1.63s/it]


üì¶ Bloco 6...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:21<00:00,  1.64s/it]


üì¶ Bloco 7...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:13<00:00,  1.62s/it]


üì¶ Bloco 8...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:17<00:00,  1.63s/it]


üì¶ Bloco 9...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:32<00:00,  1.65s/it]


üì¶ Bloco 10...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:55<00:00,  1.69s/it]


üì¶ Bloco 11...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:20<00:00,  1.63s/it]


üì¶ Bloco 12...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:19<00:00,  1.63s/it]


üì¶ Bloco 13...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:41<00:00,  1.67s/it]


üì¶ Bloco 14...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:47<00:00,  1.68s/it]


üì¶ Bloco 15...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 600/600 [16:23<00:00,  1.64s/it]


üì¶ Bloco 16...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 319/319 [08:26<00:00,  1.59s/it]


‚úÖ Chuva acumulada extra√≠da com sucesso!


Unnamed: 0,rain_3d,rain_7d,rain_12d
9000,5.19,5.47,7.03
9001,0.84,1.81,1.95
9002,5.18,8.42,8.43
9003,1.2,5.21,5.31
9004,4.18,4.55,4.64


In [70]:
precip_acumulada_nasa = pd.read_csv("../Datasets/chuva_acumulada_nasa_power.csv")
precip_acumulada_nasa.head()

Unnamed: 0,rain_3d,rain_7d,rain_12d
0,0.97,1.27,1.43
1,30.82,70.43,80.46
2,27.54,56.23,64.82
3,33.39,61.63,67.28
4,33.39,61.63,67.28


In [71]:

era5_precip_features_training = Water_Quality_df[['Latitude', 'Longitude', 'Sample Date']].copy()

era5_precip_features_training['rain_7d_nasa'] = precip_acumulada_nasa['rain_7d']
era5_precip_features_training['rain_3d_nasa'] = precip_acumulada_nasa['rain_3d']
era5_precip_features_training['rain_12d_nasa'] = precip_acumulada_nasa['rain_12d']

era5_precip_features_training.to_csv("../Datasets/nasa_precip_features_training.csv", index=False)
print("‚úÖ Dataset final salvo com sucesso!")

era5_precip_features_training.head()

‚úÖ Dataset final salvo com sucesso!


Unnamed: 0,Latitude,Longitude,Sample Date,rain_7d_nasa,rain_3d_nasa,rain_12d_nasa
0,-28.760833,17.730278,02-01-2011,1.27,0.97,1.43
1,-26.861111,28.884722,03-01-2011,70.43,30.82,80.46
2,-26.45,28.085833,03-01-2011,56.23,27.54,64.82
3,-27.671111,27.236944,03-01-2011,61.63,33.39,67.28
4,-27.356667,27.286389,03-01-2011,61.63,33.39,67.28


## Getting validation Data

In [72]:
Validation_df=pd.read_csv('submission_template.csv')
Validation_df.head()

Unnamed: 0,Latitude,Longitude,Sample Date,Total Alkalinity,Electrical Conductance,Dissolved Reactive Phosphorus
0,-32.043333,27.822778,01-09-2014,,,
1,-33.329167,26.0775,16-09-2015,,,
2,-32.991639,27.640028,07-05-2015,,,
3,-34.096389,24.439167,07-02-2012,,,
4,-32.000556,28.581667,01-10-2014,,,


In [73]:
output_path = "../Datasets/chuva_acumulada_nasa_power_validation.csv"

print("üåßÔ∏è Iniciando extra√ß√£o de chuva acumulada para valida√ß√£o (NASA POWER)...")

for i in range(0, len(Validation_df), chunk_size):
    subset = Validation_df.iloc[i : i + chunk_size]
    print(f"üì¶ Bloco {i//chunk_size + 1}...")
    
    # Aplicar a fun√ß√£o
    res = subset.progress_apply(get_rain_accumulations_nasa, axis=1)
    
    # Salvar
    file_exists = os.path.isfile(output_path)
    res.to_csv(output_path, mode='a', index=False, header=not file_exists)
    
    # Pausa de 2 segundos para n√£o ser bloqueado pela NASA
    time.sleep(1.1)

print("‚úÖ Chuva acumulada extra√≠da com sucesso!")

res.head()

üåßÔ∏è Iniciando extra√ß√£o de chuva acumulada para valida√ß√£o (NASA POWER)...
üì¶ Bloco 1...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [05:27<00:00,  1.64s/it]


‚úÖ Chuva acumulada extra√≠da com sucesso!


Unnamed: 0,rain_3d,rain_7d,rain_12d
0,1.16,1.76,6.44
1,2.54,4.49,5.69
2,2.23,2.29,2.37
3,5.03,66.84,67.0
4,68.61,73.63,74.0


In [74]:
precip_acumulada_nasa_validation = pd.read_csv("../Datasets/chuva_acumulada_nasa_power_validation.csv")
precip_acumulada_nasa_validation.head()

Unnamed: 0,rain_3d,rain_7d,rain_12d
0,1.16,1.76,6.44
1,2.54,4.49,5.69
2,2.23,2.29,2.37
3,5.03,66.84,67.0
4,68.61,73.63,74.0


In [75]:
era5_precip_features_validation = Validation_df[['Latitude', 'Longitude', 'Sample Date']].copy()

era5_precip_features_validation['rain_7d_nasa'] = precip_acumulada_nasa_validation['rain_7d']
era5_precip_features_validation['rain_3d_nasa'] = precip_acumulada_nasa_validation['rain_3d']
era5_precip_features_validation['rain_12d_nasa'] = precip_acumulada_nasa_validation['rain_12d']
era5_precip_features_validation.to_csv("../Datasets/nasa_precip_features_validation.csv", index=False)
print("‚úÖ Dataset final salvo com sucesso!")

era5_precip_features_validation.head()

‚úÖ Dataset final salvo com sucesso!


Unnamed: 0,Latitude,Longitude,Sample Date,rain_7d_nasa,rain_3d_nasa,rain_12d_nasa
0,-32.043333,27.822778,01-09-2014,1.76,1.16,6.44
1,-33.329167,26.0775,16-09-2015,4.49,2.54,5.69
2,-32.991639,27.640028,07-05-2015,2.29,2.23,2.37
3,-34.096389,24.439167,07-02-2012,66.84,5.03,67.0
4,-32.000556,28.581667,01-10-2014,73.63,68.61,74.0
