# Testing rescaling function
The function we are creating will be useful in order to convert the resolution of a latitude-longitude grid. 

Input parameters : 



*   da: xarray containg data to interpolate with "latitude" and "longitude" among levels of indexes
*   int : target resolution (could be grater or lower than the original)
*   method : String "linear" or "nearest"(Default)

Return :


*   xarray with new index (computed with target resolution and starting values of original indexes) and interpolated values of variables (depending the method you choose)









## Install dependencies

In [1]:
# Install library to read netcds files
!pip install netcdf4



Next cell mount in the colab notebook the google drive folder, click on the link that appears under the cell, login and copy the string into the field of the cell to connect.

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Rebase the current directory to a directory called "ESoWC", make sure you created it before in your drive. 
(It should create it automatically but you never know :) )

In [3]:
%cd /content/drive/My Drive/ESoWC

/content/drive/My Drive/ESoWC


## Function: scale grid

In order ot test the function we will select a small portion of the original CAMS  concentrations measurements and analyze interpolation results on such small frame.
For easy visualization we apply the function to xarray, but visualize the pandas dataframe.

In [4]:
import xarray as xr

filename = 'Data/CAMS_analysis_MAY_2019_00_24.nc'
da = xr.open_dataset(filename)
da

In [5]:
sel = da.where(da.latitude <=44.65  , drop = True)
reduced = sel.where(sel.longitude <= 8.65, drop = True)
reduced

In [6]:
#get the pandas dataframe
df = reduced.to_dataframe()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,co_conc,no2_conc,no_conc,o3_conc,pm10_conc,pm2p5_conc
latitude,level,longitude,time,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
44.650002,0.0,8.55,0 days 00:00:00,166.303879,6.610581,0.006509,97.867714,12.873794,9.136590
44.650002,0.0,8.55,0 days 01:00:00,163.358307,5.797691,0.005642,100.380684,15.227795,9.639406
44.650002,0.0,8.55,0 days 02:00:00,157.803543,5.202508,0.005109,96.185463,15.121656,9.960825
44.650002,0.0,8.55,0 days 03:00:00,156.223511,4.489035,0.005983,91.598442,14.549563,9.901752
44.650002,0.0,8.55,0 days 04:00:00,155.652863,4.132488,0.007987,89.102844,14.079832,9.694771
...,...,...,...,...,...,...,...,...,...
44.549999,0.0,8.65,30 days 19:00:00,153.359818,7.646339,0.130058,113.437927,14.121273,13.699776
44.549999,0.0,8.65,30 days 20:00:00,165.859299,8.432046,0.107320,109.783920,14.817377,14.375214
44.549999,0.0,8.65,30 days 21:00:00,176.851822,10.418944,0.034667,99.340698,15.254338,14.789965
44.549999,0.0,8.65,30 days 22:00:00,181.507019,12.880733,0.011710,84.259216,14.823131,14.345167


In [7]:
import numpy as np


#Split "time" index into "day" and "hour"
df['Days'] = df.index.get_level_values("time").days 
df['Hours'] = df.index.get_level_values("time").seconds//3600
df.set_index('Days', append=True, inplace=True)
df.set_index('Hours', append=True, inplace=True)
# round coordinates
df['lat'] = np.around(df.index.get_level_values("latitude"),2)
df['long'] = np.around(df.index.get_level_values("longitude"),2)
df.set_index('lat', append=True, inplace=True)
df.set_index('long', append=True, inplace=True)
#Drop "level" and "time" indexes and old "latitude" and "longitude" indexes
df.reset_index(level=[0,1,2,3], drop=True, inplace=True)
df = df.reorder_levels(['lat','long','Days', 'Hours'])
df = df.rename_axis(index=['latitude', 'longitude','Days', 'Hours'])

In [8]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,co_conc,no2_conc,no_conc,o3_conc,pm10_conc,pm2p5_conc
latitude,longitude,Days,Hours,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
44.65,8.55,0,0,166.303879,6.610581,0.006509,97.867714,12.873794,9.13659
44.65,8.55,0,1,163.358307,5.797691,0.005642,100.380684,15.227795,9.639406
44.65,8.55,0,2,157.803543,5.202508,0.005109,96.185463,15.121656,9.960825
44.65,8.55,0,3,156.223511,4.489035,0.005983,91.598442,14.549563,9.901752
44.65,8.55,0,4,155.652863,4.132488,0.007987,89.102844,14.079832,9.694771


In [9]:
df_selection = df.loc[df.index.get_level_values('Days') == 0 ] 
df_selection

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,co_conc,no2_conc,no_conc,o3_conc,pm10_conc,pm2p5_conc
latitude,longitude,Days,Hours,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
44.65,8.55,0,0,166.303879,6.610581,0.006509,97.867714,12.873794,9.136590
44.65,8.55,0,1,163.358307,5.797691,0.005642,100.380684,15.227795,9.639406
44.65,8.55,0,2,157.803543,5.202508,0.005109,96.185463,15.121656,9.960825
44.65,8.55,0,3,156.223511,4.489035,0.005983,91.598442,14.549563,9.901752
44.65,8.55,0,4,155.652863,4.132488,0.007987,89.102844,14.079832,9.694771
...,...,...,...,...,...,...,...,...,...
44.55,8.65,0,19,163.655258,5.394433,0.102773,113.270790,13.648726,12.442271
44.55,8.65,0,20,160.770355,7.640146,0.091859,110.265816,13.724623,11.301576
44.55,8.65,0,21,161.823700,7.357179,0.022281,110.038719,14.640473,10.683568
44.55,8.65,0,22,165.873016,6.643798,0.018565,111.396194,14.403239,10.215868


In [10]:
df_select_hourly = df_selection.loc[df_selection.index.get_level_values('Hours' )<= 4]
df_select_hourly 

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,co_conc,no2_conc,no_conc,o3_conc,pm10_conc,pm2p5_conc
latitude,longitude,Days,Hours,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
44.65,8.55,0,0,166.303879,6.610581,0.006509,97.867714,12.873794,9.13659
44.65,8.55,0,1,163.358307,5.797691,0.005642,100.380684,15.227795,9.639406
44.65,8.55,0,2,157.803543,5.202508,0.005109,96.185463,15.121656,9.960825
44.65,8.55,0,3,156.223511,4.489035,0.005983,91.598442,14.549563,9.901752
44.65,8.55,0,4,155.652863,4.132488,0.007987,89.102844,14.079832,9.694771
44.65,8.65,0,0,174.830841,9.064035,0.007518,98.7062,14.357748,9.443238
44.65,8.65,0,1,170.862778,7.773185,0.006558,102.320412,15.259172,9.844498
44.65,8.65,0,2,162.089539,6.700667,0.00523,99.626617,14.444993,9.068241
44.65,8.65,0,3,158.35025,5.61587,0.006352,97.751244,14.040246,10.07052
44.65,8.65,0,4,156.923645,5.353683,0.012449,95.213631,13.807581,8.90533


In [11]:
da = df_select_hourly.to_xarray()
da

In [16]:
import numpy as np
import math 
#from rescaling import rescale_grid
def rescale_grid(data, target_res, method = 'nearest'):
  
  assert ('latitude' in data.variables),"latitude column missing (name must be 'latitude')"
  assert ('longitude' in data.variables),"longitude column missing (name must be 'longitude')"

  #da = data.to_array()

  lats = data.latitude.values
  longs = data.longitude.values

  lats.sort()
  longs.sort()

  #lats_res = round(abs(lats[1] - lats[0]), 2) # supposing at least 2 values and max_precision = 2 decimals and res_lat = res_long
  #lat_interval = abs(round(lats[-1],2) - round(lats[0], 2))
  #long_interval = abs(round(longs[-1],2) - round(longs[0], 2))
  lat_interval = np.float32(lats[-1] - lats[0])
  long_interval = np.float32(longs[-1] - longs[0])
  #print("lat interval is ",lat_interval)
  #print("long interval is --> " , long_interval)
  lat_new_squares = lat_interval // target_res
  #print(lat_new_squares)
  long_new_squares = long_interval // target_res
  #print(long_new_squares)
  new_lat_values= np.around(np.arange(0, lat_new_squares +1 , 1) * target_res + round(lats[0], 2), decimals=2)
  #print("New latitude values are -> ", new_lat_values)
  new_long_values= np.around(np.arange(0, long_new_squares +1 , 1) * target_res + round(longs[0], 2), decimals=2)
  #print("New longitude values are -> ", new_long_values)
  #da = da.sortby(['latitude','longitude','time'])
  #df_temp = data.interp(latitude = new_lat_values, longitude = new_long_values, method = method)
  #df= df_temp.interp(longitude = new_long_values, method = method)

  return data.interp(latitude = new_lat_values, longitude = new_long_values, method = method)

da_rescale =rescale_grid(da, 0.05, 'linear')

In [17]:
#df_rescale = da_final.to_dataframe(name = 'df_scaled')
#df_rescale = da_final.to_dataframe()
df = da_rescale.to_dataframe()
df = df.reorder_levels(['latitude','longitude','Days', 'Hours'])

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,co_conc,no2_conc,no_conc,o3_conc,pm10_conc,pm2p5_conc
latitude,longitude,Days,Hours,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
44.55,8.55,0,0,161.228439,6.017477,0.00297,107.051453,13.415171,8.935563
44.55,8.6,0,0,165.389038,7.113887,0.003695,107.473862,14.447163,9.180419
44.55,8.65,0,0,169.549637,8.210297,0.00442,107.896271,15.479155,9.425276
44.6,8.55,0,0,163.766159,6.314029,0.004739,102.459583,13.144482,9.036077
44.6,8.6,0,0,167.978199,7.475597,0.005354,102.880409,14.031467,9.235167
44.6,8.65,0,0,172.190239,8.637166,0.005969,103.301235,14.918451,9.434257
44.65,8.55,0,0,166.303879,6.610581,0.006509,97.867714,12.873794,9.13659
44.65,8.6,0,0,170.56736,7.837308,0.007013,98.286957,13.615771,9.289914
44.65,8.65,0,0,174.830841,9.064035,0.007518,98.7062,14.357748,9.443238
44.55,8.55,0,1,155.539841,5.553236,0.002181,106.760406,15.926343,9.566974
