#Create the environment

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
%cd /content/drive/My Drive/ESoWC

/content/drive/My Drive/ESoWC


In [8]:
import pandas as pd
import xarray as xr

import numpy as np
import pandas as pd
from sklearn import preprocessing
import seaborn as sns

In [9]:
fn_land = 'Data/land_cover_data.nc'
fn_weather = 'Data/05_2019_weather_and_CO_for_model.nc'
fn_conc = 'Data/totalcolConcentretations_featured.nc'
fn_traffic = 'Data/emissions_traffic_hourly_merged.nc'

#Load datasets

##Land

In [10]:
# Open netCDF file 
land = xr.open_dataset(fn_land)
land

In [11]:
land_fixed = land.drop_vars('NO emissions') #They are already in the weather dataset
hours = np.arange(0,24,1)
land_fixed = land_fixed.expand_dims({'Hours':hours})
land_fixed = land_fixed.assign_coords(time=land_fixed.time.dt.day)
land_fixed = land_fixed.rename({'time':'Days'})
land_fixed = land_fixed.rename({'lon':'longitude'})
land_fixed = land_fixed.rename({'lat':'latitude'})
land_fixed = land_fixed.transpose('latitude','longitude','Days','Hours')  
land_fixed

##Weather

In [12]:
weather = xr.open_dataset(fn_weather)
weather

In [13]:
#This variable is too much correlated with the tcw
weather_fixed = weather.drop_vars('tcwv')

weather_fixed = weather_fixed.transpose('latitude','longitude','Days','Hours')  
weather_fixed

##Conc

In [14]:
conc = xr.open_dataset(fn_conc)
conc

In [15]:
dataframe = conc.to_dataframe()

dataframe['Days'] = dataframe.index.get_level_values("time").day 
dataframe['Hours'] = dataframe.index.get_level_values("time").hour
dataframe.set_index('Days', append=True, inplace=True)
dataframe.set_index('Hours', append=True, inplace  =True)
dataframe = dataframe.reset_index()
dataframe= dataframe.drop(["time"], axis=1)
dataframe = dataframe.set_index(['latitude', 'longitude', 'Days', 'Hours'])

dataframe.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,NO_tc,CO2_tc,CH4_tc,NO2_tc,CO_tc,O3_tc,NO_tc_add_trend,CO2_tc_add_trend,CH4_tc_add_trend,NO2_tc_add_trend,CO_tc_add_trend,O3_tc_add_trend
latitude,longitude,Days,Hours,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
43.0,4.0,1,1,,,,,,,,,,,,
43.0,4.0,1,2,4.242915e-07,412.077229,1872.138289,5e-06,0.001009,0.00792,1e-06,412.620326,1878.817495,3e-06,0.001065,0.007345
43.0,4.0,1,3,6.189903e-07,412.136477,1873.024578,4e-06,0.00101,0.007859,1e-06,412.588503,1878.369642,3e-06,0.001063,0.007345
43.0,4.0,1,4,8.136891e-07,412.195724,1873.910866,4e-06,0.001011,0.007797,1e-06,412.55668,1877.921788,3e-06,0.001062,0.007346
43.0,4.0,1,5,1.008388e-06,412.254971,1874.797155,3e-06,0.001012,0.007736,1e-06,412.524857,1877.473935,3e-06,0.001061,0.007346


In [16]:
conc_fidex=dataframe.to_xarray()
conc_fidex

#Traffic

In [17]:
traffic = xr.open_dataset(fn_traffic)
traffic

In [18]:
traffic_fixed=traffic.drop_vars('emissions')
lat_bins = np.arange(43,51.25,0.25)
lon_bins = np.arange(4,12.25,0.25)
traffic_fixed = traffic_fixed.sortby(['latitude','longitude','hour'])  
traffic_fixed = traffic_fixed.interp(latitude=lat_bins, longitude=lon_bins, method="linear")
days = np.arange(1,32,1)
traffic_fixed=traffic_fixed.expand_dims({'Days':days})
traffic_fixed=traffic_fixed.rename({'hour':'Hours'})
traffic_fixed=traffic_fixed.transpose('latitude','longitude','Days','Hours')  
traffic_fixed

#Merge

In [19]:
tot_dataset = weather_fixed.merge(land_fixed)
tot_dataset = tot_dataset.merge(conc_fidex)
tot_dataset = tot_dataset.merge(traffic_fixed)

tot_dataset

In [20]:
tot_dataset.to_netcdf('Data/dataset_complete_for_model_CO.nc', 'w', 'NETCDF4')

#Check

In [21]:
weather_fixed.to_dataframe().isnull().sum()

EMISSIONS_2019       26136
u10                   2178
v10                   2178
hcc                   2178
lcc                   2178
tcc                   2178
cvl                   2178
cvh                   2178
sp                    2178
tmp                   2178
sp_hum                2178
rel_hum               2178
tcw                   2178
tot_wind              2178
tmp_shift_8           2186
tot_wind_shift_12     2190
rel_hum_shift_8       2186
dtype: int64

In [22]:
land_fixed.to_dataframe().isnull().sum()

height    23760
built     23760
dtype: int64

In [23]:
conc_fidex.to_dataframe().isnull().sum()

NO_tc               13073
CO2_tc              13073
CH4_tc              13073
NO2_tc              13073
CO_tc               13073
O3_tc               13073
NO_tc_add_trend     13073
CO2_tc_add_trend    13073
CH4_tc_add_trend    13073
NO2_tc_add_trend    13073
CO_tc_add_trend     13073
O3_tc_add_trend     13073
dtype: int64

In [24]:
traffic_fixed.to_dataframe().isnull().sum()

traffic    382416
dtype: int64