## DATA FETCH FROM ERA5

### Time Period
Data yang diambil akan diambil data 20 tahun untuk averaging faktor-faktor musiman seperti el-nino, dll. 

### Data yang dibutuhkan untuk solar

- Surface Solar Radiation Downwards (SSRD) 
Untuk menghitung berapa energi solar yang sampai ke permukaan bumi. 
- 2m Temperature (t2m)
Semakin tinggi temperature, semakin rendah efisiensi solar panel


### Data yang dibutuhkan untuk wind

- u-component of wind
- v-component of wind

Dipakai untuk menghitung vektor kecepatan angin. 

In [1]:
%pip install cdsapi
%pip install pandas xarray cfgrib numpy

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
%load_ext autoreload
%autoreload 2

In [None]:
import cdsapi
import pandas as pd
import os
import glob
import xarray as xr
import numpy as np

In [9]:
# Params

#Data Fetch
START_YEAR = 2023
END_YEAR = 2024
MONTH = 1 
PRESSURE_LEVEL = 975
DATASET = "reanalysis-era5-pressure-levels"
OUTPUT_FOLDER = "../data/wind_data_975hpa/"

#GRIB to df
INPUT_FOLDER = "../data/wind_data_975hpa/"


In [None]:
#Fetch data from cds

# Ensure the output directory exists
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# Initialize the API client
client = cdsapi.Client()

# Generate a list of the first day of each month for the specified year
date_range = pd.date_range(start=f'{START_YEAR}-01-01', end=f'{END_YEAR}-12-31', freq='YS')

# Loop through each month
for date in date_range:
    year = date.strftime("%Y")
    month = date.strftime("%m")
    
    output_filename = f"wind_data_{year}_{month}.grib"
    output_file_path = os.path.join(OUTPUT_FOLDER, output_filename)
    
    if os.path.exists(output_file_path):
        print(f"Skipping {output_filename}, file already exists.")
        continue

    print(f"--- Downloading data for {year}-{month} ---")

    try:
        request = {
            "product_type": "reanalysis",
            "format": "grib",
            "variable": [
                "u_component_of_wind",
                "v_component_of_wind"
            ],
            'pressure_level': PRESSURE_LEVEL,
            "year": year,
            "month": month,
            "day": [str(d).zfill(2) for d in range(1, 32)], # API handles non-existent days
            'time': [f'{h:02d}:00' for h in range(0, 24)], # All 24 hours
            "area": [6, 95, -11, 141], # North, West, South, East
        }

        # Retrieve and download the data
        client.retrieve(DATASET, request, output_file_path)
        
        print(f"Successfully downloaded to: {output_file_path}")

    except Exception as e:
        print(f"!!! FAILED to download data for {year}-{month}. Error: {e}")
      
print("\n--- Automation complete. ---")

2025-07-29 07:06:56,741 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.


--- Downloading data for 2023-01 ---


2025-07-29 07:06:57,419 INFO Request ID is 359b0129-7b9c-4378-85b3-fa6d6da96ba7
2025-07-29 07:06:57,728 INFO status has been updated to accepted
2025-07-29 07:07:07,540 INFO status has been updated to successful


fbf3d81b3a25bb766336b5607d712de7.grib:   0%|          | 0.00/36.4M [00:00<?, ?B/s]

Successfully downloaded to: ../data/wind_data_975hpa/wind_data_2023_01.grib
--- Downloading data for 2024-01 ---


2025-07-29 07:07:44,909 INFO Request ID is 399a384b-d51d-4b82-913c-4bf61ce22a6e
2025-07-29 07:07:45,349 INFO status has been updated to accepted
2025-07-29 07:08:00,313 INFO status has been updated to successful


b4ce9f188872cea9e4eeff279a94c192.grib:   0%|          | 0.00/36.4M [00:00<?, ?B/s]

Successfully downloaded to: ../data/wind_data_975hpa/wind_data_2024_01.grib

--- Automation complete. ---


In [None]:

# Convert to df and coarse. 

#Note: Data increment in 0.25 Coarsen factor for now is 2 / increment per 0.5


# Find all .grib files
grib_files = sorted(glob.glob(os.path.join(INPUT_FOLDER, '*.grib')))

if not grib_files:
    print(f"Error: No .grib files found in '{INPUT_FOLDER}'")
else:
    print(f"Found {len(grib_files)} GRIB files to process.")
    
    all_coarse_dataframes = []
    for file_path in grib_files:
        print(f"Processing and coarsening {os.path.basename(file_path)}...")
        try:
            with xr.open_dataset(file_path, engine='cfgrib') as ds:
                coarse_ds = ds.coarsen(latitude=2, longitude=2, boundary='trim').mean()
                
                df = coarse_ds.to_dataframe()
                all_coarse_dataframes.append(df)
        except Exception as e:
            print(f"!!! Could not process {file_path}. Error: {e}")

    if all_coarse_dataframes:
        combined_df = pd.concat(all_coarse_dataframes).reset_index()
        
        combined_df['wind_speed'] = np.sqrt(combined_df['u']**2 + combined_df['v']**2)

        print("\n--- Processing Complete --- ✅")
        print("Final Coarse DataFrame Head:")
        print(combined_df.head())

        

In [None]:
combined_df.info()