# Fire data preprocessing

## defining two functions for download and unpacking the data

for instalation of cdsapi follow https://cds.climate.copernicus.eu/api-how-to

In [9]:
import cdsapi

def data_download(year, month, region):

    c = cdsapi.Client()
    c.retrieve(
        'satellite-fire-burned-area',
        {
            'origin': 'esa_cci',
            'sensor': 'modis',
            'variable': 'grid_variables',
            'version': '5_1_1cds',
            'region': f'{region}',
            'year': f'{year}',
            'month': f'{str(month).zfill(2)}',
            'nominal_day': '01',
            'format': 'tgz',
        },
        f'../data/{region}_{year}_{str(month).zfill(2)}.tar.gz')
       
    return unpack_data()


In [10]:
def unpack_data():
    import os
    import glob
    data_dir = '../data'

# Find all .tar.gz files in the data directory
    tar_files = glob.glob(os.path.join(data_dir, '*.tar.gz'))

# Unpack and remove each .tar.gz file
    for tar_file in tar_files:
        os.system(f'tar -xf {tar_file} -C {data_dir}')
        os.remove(tar_file)

### here you can use the functions to download the data and unpack it

In [11]:
#just select date and region
#modifying forloop should not be necessary

start_year = 2001
end_year = 2019
start_month = 8
end_month = 8
region = 'europe'

for i in range(start_year, end_year + 1):
    for j in range(start_month + 1 , end_month + 2):
        data_download(i, j, region)

2023-06-06 09:32:37,503 INFO Welcome to the CDS
2023-06-06 09:32:37,504 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/satellite-fire-burned-area
2023-06-06 09:32:37,548 INFO Request is queued
2023-06-06 09:32:38,585 INFO Request is running
2023-06-06 09:32:40,141 INFO Request is completed
2023-06-06 09:32:40,143 INFO Downloading https://download-0018.copernicus-climate.eu/cache-compute-0018/cache/data6/dataset-satellite-fire-burned-area-1ae1e615-4ae5-408e-9bf4-de9773d05e3f.tar.gz to ../data/europe_2001_09.tar.gz (1.9M)
2023-06-06 09:32:40,814 INFO Download rate 2.9M/s   
2023-06-06 09:32:40,909 INFO Welcome to the CDS
2023-06-06 09:32:40,910 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/satellite-fire-burned-area
2023-06-06 09:32:40,996 INFO Request is queued
2023-06-06 09:32:42,017 INFO Request is running
2023-06-06 09:32:43,543 INFO Request is completed
2023-06-06 09:32:43,545 INFO Downloading https://download-0016.copernicus-

## Preprocessing data and saving the output as fire.csv

### function

In [12]:
import os
import xarray as xr
import pandas as pd

def fire_preprocessing(file_path):
    ds_fire = xr.open_dataset(file_path, engine='netcdf4')
    
    filtered_data = ds_fire.squeeze('time')
    filtered_data = filtered_data.sel(lat=slice(52, 33), lon=slice(-10, 30))
    filtered_data = filtered_data[['burned_area',
                                   'fraction_of_burnable_area'
                                   ]]
    filtered_data = filtered_data.where(filtered_data['fraction_of_burnable_area'] > 0, drop = True)
    
    fire_df = filtered_data.to_dataframe().reset_index()
    fire_df['year'] = fire_df['time'].dt.year
    fire_df['month'] = fire_df['time'].dt.month
    fire_df = fire_df.drop('time', axis= 1).drop_duplicates().dropna()
    
    return fire_df

### this will call the preprocessing function on all the .nc files in ../data folder, preprocess it and save the result in ../data folder in fire.csv file

In [14]:
data_dir = '../data'
dfs = []

for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc') and 'FIRE' in file_name:
        file_path = os.path.join(data_dir, file_name)
        df = fire_preprocessing(file_path)
        dfs.append(df)

final_df = pd.concat(dfs, ignore_index=True)
final_df.to_csv(os.path.join(data_dir, 'fire.csv'), index=False)