# Climate Data Preprocessing

In [2]:
# extract climate data to put it in a pandas dataframe "data":
# lon: longitute > -10 to 30
# lat: latitude > 33 to 52
# year: year > 2001 to 2019
# month: august (7)
# tas: mean temperature 
# tasmax: maximum temperature
# pr: total precipitation
# each value for august from 2001 to 2019 and per location > 19*41*20 = 15580 rows

import xarray as xr
import numpy as np
import pandas as pd

data = pd.DataFrame({'lon': pd.Series(dtype='float64'),
                      'lat': pd.Series(dtype='float64'),
                      'year': pd.Series(dtype='int64'),
                      'month': pd.Series(dtype='int64'),
                      'tas': pd.Series(dtype='float32'),
                      'tasmax': pd.Series(dtype='float32'),
                      'pr': pd.Series(dtype='float32')})


for i in range(19):
    
    temp_mean = xr.open_dataset(f'../data/climate/CRU_mean_temperature_mon_0.5x0.5_global_{2001+i}_v4.03.nc').to_dataframe()
    temp_max = xr.open_dataset(f'../data/climate/CRU_maximum_temperature_mon_0.5x0.5_global_{2001+i}_v4.03.nc').to_dataframe()
    pre_tot = xr.open_dataset(f'../data/climate/CRU_total_precipitation_mon_0.5x0.5_global_{2001+i}_v4.03.nc').to_dataframe()
    temp_mean.reset_index(inplace=True)
    temp_max.reset_index(inplace=True)
    pre_tot.reset_index(inplace=True)
    
    frame = pd.concat([temp_mean['lon'],temp_mean['lat'],temp_mean['time'],temp_mean['tas'],temp_max['tasmax'],pre_tot['pr']],axis=1)
    frame['month'] = frame['time'].dt.month
    frame['year'] = frame['time'].dt.year
    frame.drop('time', inplace=True, axis=1)  
    frame = frame[(frame['lat']>=32.75) & (frame['lat']<=52.25) & (frame['lon']>=-10.25) & (frame['lon']<=30.25) & (frame['month']==7)]
    frame['lon'] = frame['lon'].round(decimals=0)
    frame['lat'] = frame['lat'].round(decimals=0)
    frame = frame.groupby(['lon','lat','month','year']).agg({'tas':'mean','tasmax':'max','pr':'mean'})
    frame.reset_index(inplace=True)
    
    data = pd.concat([data,frame])

data.reset_index(inplace=True) 
data.drop('index', inplace=True, axis=1)  

data

Unnamed: 0,lon,lat,year,month,tas,tasmax,pr
0,-10.0,33.0,2001,7,,,
1,-10.0,34.0,2001,7,,,
2,-10.0,35.0,2001,7,,,
3,-10.0,36.0,2001,7,,,
4,-10.0,37.0,2001,7,,,
...,...,...,...,...,...,...,...
15575,30.0,48.0,2019,7,20.799999,26.700001,35.599998
15576,30.0,49.0,2019,7,19.925001,26.300001,39.450001
15577,30.0,50.0,2019,7,19.799999,25.500000,54.950001
15578,30.0,51.0,2019,7,19.225000,24.600000,71.050003


In [3]:
data.to_csv('../data/climate.csv')