# Fire data preprocessing

## defining functions for download, unpacking processing and saving the fire data

for instalation of cdsapi follow https://cds.climate.copernicus.eu/api-how-to

### this function wil downlaod both pixel and grid data 

In [1]:
import cdsapi

def data_download(year, month, region):

    c = cdsapi.Client()
    c.retrieve(
        'satellite-fire-burned-area',
        {
            'origin': 'esa_cci',
            'sensor': 'modis',
            'variable': 'pixel_variables',
            'version': '5_1_1cds',
            'region': f'{region}',
            'year': f'{year}',
            'month': f'{str(month).zfill(2)}',
            'nominal_day': '01',
            'format': 'tgz',
        },
        f'../data/{region}_{year}_{str(month).zfill(2)}_pixel.tar.gz')
    
    c = cdsapi.Client()

    c.retrieve(
        'satellite-fire-burned-area',
        {
            'origin': 'esa_cci',
            'sensor': 'modis',
            'variable': 'grid_variables',
            'version': '5_1_1cds',
            'region': f'{region}',
            'year': f'{year}',
            'month': f'{str(month).zfill(2)}',
            'nominal_day': '01',
            'format': 'tgz',
        },
        f'../data/{region}_{year}_{str(month).zfill(2)}_grid.tar.gz')
       
    return unpack_data()


### this function unpacks data

In [2]:
import os

def unpack_data():
    import os
    import glob
    data_dir = '../data'

# Find all .tar.gz files in the data directory
    tar_files = glob.glob(os.path.join(data_dir, '*.tar.gz'))

# Unpack and remove each .tar.gz file
    for tar_file in tar_files:
        os.system(f'tar -xf {tar_file} -C {data_dir}')
        os.remove(tar_file)

## Preprocessing data and saving the output as fire.csv

### this is used to round lat and lon to shrink the data

In [20]:
import numpy as np

def round_to_nearest_025(x):
    return np.round(x * 4) / 4

 ### this is for preprocessing fire pixel data

In [21]:
import xarray as xr
import pandas as pd

def pixel_preprocessing(file_path):
    pixel_ds = xr.open_dataset(file_path, engine='netcdf4')

    pixel_ds['lat'] = xr.apply_ufunc(round_to_nearest_025, pixel_ds['lat'])
    pixel_ds['lon'] = xr.apply_ufunc(round_to_nearest_025, pixel_ds['lon'])

    filtered_data = pixel_ds.squeeze('time')
    filtered_data = filtered_data.sel(lat=slice(52, 33), lon=slice(-10, 50))
    filtered_data = filtered_data[['CL',
                                   'lat_bounds',
                                   'lon_bounds'
                                   ]]

    filtered_data = filtered_data.drop_duplicates(dim=['lat','lon'])

    pixel_df = filtered_data.to_dataframe().reset_index()
    pixel_df = pixel_df.drop(['lat_bounds', 'lon_bounds', 'bounds'], axis = 1)

    pixel_df['year'] = pixel_df['time'].dt.year
    pixel_df['month'] = pixel_df['time'].dt.month
    pixel_df = pixel_df.drop('time', axis= 1).dropna()
    return pixel_df

 ### this is for preprocessing fire grid data

In [22]:
def fire_preprocessing(file_path):
    ds_fire = xr.open_dataset(file_path, engine='netcdf4')
    
    filtered_data = ds_fire.squeeze('time')
    filtered_data = filtered_data.sel(lat=slice(52, 33), lon=slice(-10, 50))
    filtered_data = filtered_data[['burned_area',
                                   'fraction_of_burnable_area',
                                   'lat_bounds',
                                   'lon_bounds'
                                   ]]
    filtered_data = filtered_data.where(filtered_data['fraction_of_burnable_area'] > 0, drop = True)
#    filtered_data = filtered_data.where(filtered_data['burned_area'] > 1, drop = True)

    fire_df = filtered_data.to_dataframe().reset_index()
    fire_df['year'] = fire_df['time'].dt.year
    fire_df['month'] = fire_df['time'].dt.month
    fire_df = fire_df.drop(['lat', 'lon', 'time', 'bounds'], axis= 1).drop_duplicates().dropna()
    
    return fire_df

## with this function you can preprocess bot grid and pixel data at the same time and combine them into single output in fire.csv folder

### here you can use the functions to download the data and unpack it

In [7]:
#just select date and region
#modifying forloop should not be necessary

start_year = 2001
end_year = 2019
start_month = 5
end_month = 8
region = 'europe'

for i in range(start_year, end_year + 1):
    for j in range(start_month, end_month + 1):
        data_download(i, j, region)

2023-06-07 12:11:43,572 INFO Welcome to the CDS
2023-06-07 12:11:43,573 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/satellite-fire-burned-area
2023-06-07 12:11:43,651 INFO Request is queued
2023-06-07 12:11:44,674 INFO Request is running
2023-06-07 12:11:51,883 INFO Request is completed
2023-06-07 12:11:51,886 INFO Downloading https://download-0020.copernicus-climate.eu/cache-compute-0020/cache/data6/dataset-satellite-fire-burned-area-a1c68812-5ffc-42cd-bc50-0ea5f030712d.tar.gz to ../data/europe_2001_05_pixel.tar.gz (27M)
2023-06-07 12:11:57,146 INFO Download rate 5.1M/s   
2023-06-07 12:11:57,252 INFO Welcome to the CDS
2023-06-07 12:11:57,254 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/satellite-fire-burned-area
2023-06-07 12:11:57,329 INFO Request is queued
2023-06-07 12:11:58,376 INFO Request is running
2023-06-07 12:11:59,949 INFO Request is completed
2023-06-07 12:11:59,950 INFO Downloading https://download-0004-clone.

### this will call the preprocessing functions on all the .nc files in ../data folder, preprocess it and save the result in ../data folder in fire.csv file

In [77]:
def fire_gird_and_pixel(data_dir):
    
    first_iteration = True
    column_order = ['lat_bounds',
                        'lon_bounds',
                        'year',
                        'month',
                        'fraction_of_burnable_area',
                        'burned_area',
                        'CL'
                        ]
    
    #create dict of file pairs to make sure crrect files are procesed together
    file_pairs = {}

    for file_name in os.listdir(data_dir):
        if file_name.endswith('.nc') and 'FIRE' in file_name:
            file_prefix = file_name[:8]

            if file_prefix in file_pairs:
                file_pairs[file_prefix].append(file_name)
            else:
                file_pairs[file_prefix] = [file_name]
    
    # the two files grid and pixel are processed here
    for file_prefix, file_pair in file_pairs.items():
        if len(file_pair) == 2:
            for file_name in file_pair:
                file_path = os.path.join(data_dir, file_name)
                if 'AREA' in file_name:
                    df_pixel = pixel_preprocessing(file_path)
                else:
                    df_grid = fire_preprocessing(file_path)
    
            merged_df = df_pixel.merge(
                        df_grid,
                        how='right',
                        left_on=['lat', 'lon', 'year', 'month'],
                        right_on=['lat_bounds', 'lon_bounds', 'year', 'month']) # merges pixel and grid data
            
            merged_df = merged_df.drop(['lat', 'lon'], axis = 1).drop_duplicates() # drops unnecesary columns
            merged_df = merged_df[column_order] # reorder columns
            
            if first_iteration:
                merged_df.to_csv(os.path.join(data_dir, 'fire2.csv'), index=False)
                first_iteration = False
            else:
                merged_df.to_csv(os.path.join(data_dir, 'fire2.csv'), mode='a', header=False, index=False)

In [78]:
fire_gird_and_pixel('../data')