#Interpolation of ICE Bed Training Data using scipy.interpolate.RegularGridInterpolator method

2D Interpolation of all 5 variables.

Src: https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.RegularGridInterpolator.html#scipy.interpolate.RegularGridInterpolator

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

h5_file_location = '/content/drive/MyDrive/Ice-bed_dataset/hackathon.h5'
df = h5py.File(h5_file_location, 'r')

In [None]:
track_bed_training =df.get('track_bed_training')
track_bed_training = pd.DataFrame(track_bed_training)
track_bed_testing =df.get('track_bed_testing')
track_bed_testing = pd.DataFrame(track_bed_testing)
surf_x =df.get('surf_x')
surf_x = pd.DataFrame(surf_x)
surf_y =df.get('surf_y')
surf_y = pd.DataFrame(surf_y)
surf_SMB=df.get('surf_SMB')
surf_SMB = pd.DataFrame(surf_SMB)
surf_dhdt=df.get('surf_dhdt')
surf_dhdt = pd.DataFrame(surf_dhdt)
surf_elv=df.get('surf_elv')
surf_elv = pd.DataFrame(surf_elv)
surf_vx=df.get('surf_vx')
surf_vx = pd.DataFrame(surf_vx)
surf_vy=df.get('surf_vy')
surf_vy = pd.DataFrame(surf_vy)

In [None]:
# converting dataframe to numpy array
track_bed_training_T = track_bed_training.T
track_bed_training_T_arr = track_bed_training_T.to_numpy()

track_bed_testing_T = track_bed_testing.T
track_bed_testing_T_arr = track_bed_testing_T.to_numpy()

surf_SMB_arr = surf_SMB.to_numpy()
surf_dhdt_arr = surf_dhdt.to_numpy()
surf_elv_arr = surf_elv.to_numpy()
surf_vx_arr = surf_vx.to_numpy()
surf_vy_arr = surf_vy.to_numpy()

In [None]:
#checking column sequence in test dataset of size 1201x1201
data_1201=pd.read_csv('/content/drive/MyDrive/Ice-bed_dataset/df_1201.csv')
list(data_1201.columns.values) #list(test_data_1201.columns.values)

['Unnamed: 0',
 'surf_x',
 'surf_y',
 'surf_vx',
 'surf_vy',
 'surf_dhdt',
 'surf_SMB',
 'surf_elv']

In [None]:
# concatenating all 5 variables into one data array
data_all = np.zeros((5,1201,1201))

# 'surf_vx', 'surf_vy','surf_dhdt','surf_SMB', 'surf_elv'
data_all[0,:,:]= surf_vx_arr
data_all[1,:,:]= surf_vy_arr
data_all[2,:,:]= surf_dhdt_arr
data_all[3,:,:]= surf_SMB_arr
data_all[4,:,:]= surf_elv_arr

data_all_T = data_all.transpose(1, 2, 0)  # tranforming from (5, 1201, 1201) to (1201, 1201, 5)

data_all_T.shape

(1201, 1201, 5)

In [None]:
data_all_T[1,1,0], surf_vx_arr[1,1]

(-117.95758819580078, -117.95759)

In [None]:
# numpy array to store the training data with interpolated variables

track_bed_training_T_interpotale = np.zeros((track_bed_training_T_arr.shape[0],8))
track_bed_training_T_interpotale[:,0:3] = track_bed_training_T_arr

In [None]:
track_bed_training_T_interpotale[0,:], track_bed_training_T.iloc[0], track_bed_training_T_arr[0,:]

(array([-2.06037406e+05, -1.82738400e+06,  5.38235168e+02,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00]),
 0   -2.060374e+05
 1   -1.827384e+06
 2    5.382352e+02
 Name: 0, dtype: float64,
 array([-2.06037406e+05, -1.82738400e+06,  5.38235168e+02]))

In [None]:
# interpolation of training data using surf_x(0) and surf_y(1)

from scipy.interpolate import RegularGridInterpolator
import numpy as np
import matplotlib.pyplot as plt

x = surf_x.iloc[:][0]
y = surf_y.iloc[0][:]

interp = RegularGridInterpolator((x, y), data_all_T, bounds_error=False, fill_value=None)

for i in range(track_bed_training_T_interpotale.shape[0]):
  int_data = interp((track_bed_training_T_arr[i,0], track_bed_training_T_arr[i,1]))
  track_bed_training_T_interpotale[i,3:] = int_data
  print(i)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
391734
391735
391736
391737
391738
391739
391740
391741
391742
391743
391744
391745
391746
391747
391748
391749
391750
391751
391752
391753
391754
391755
391756
391757
391758
391759
391760
391761
391762
391763
391764
391765
391766
391767
391768
391769
391770
391771
391772
391773
391774
391775
391776
391777
391778
391779
391780
391781
391782
391783
391784
391785
391786
391787
391788
391789
391790
391791
391792
391793
391794
391795
391796
391797
391798
391799
391800
391801
391802
391803
391804
391805
391806
391807
391808
391809
391810
391811
391812
391813
391814
391815
391816
391817
391818
391819
391820
391821
391822
391823
391824
391825
391826
391827
391828
391829
391830
391831
391832
391833
391834
391835
391836
391837
391838
391839
391840
391841
391842
391843
391844
391845
391846
391847
391848
391849
391850
391851
391852
391853
391854
391855
391856
391857
391858
391859
391860
391861
391862
391863
391864
391865
391866
3918

In [None]:
# convert interpolated data from numpy to dataframe

column_values = ['surf_x', 'surf_y', 'track_bed_target', 'surf_vx', 'surf_vy','surf_dhdt','surf_SMB', 'surf_elv']
track_bed_training_T_interpotale_df = pd.DataFrame(data = track_bed_training_T_interpotale, columns = column_values)
track_bed_training_T_interpotale_df

Unnamed: 0,surf_x,surf_y,track_bed_target,surf_vx,surf_vy,surf_dhdt,surf_SMB,surf_elv
0,-206037.406250,-1827384.000,538.235168,-102.379379,2.455286,-0.197761,0.300519,1873.835128
1,-205981.171875,-1827518.625,550.491638,-101.973237,2.884405,-0.197749,0.300364,1874.221764
2,-205953.109375,-1827586.125,563.800598,-101.784982,2.952367,-0.197750,0.300285,1874.520200
3,-205897.078125,-1827720.750,560.530823,-101.493970,2.288839,-0.197765,0.300125,1874.620712
4,-205841.328125,-1827855.625,545.708252,-101.271730,1.235018,-0.197797,0.299963,1875.128108
...,...,...,...,...,...,...,...,...
396729,-278061.656250,-1720043.625,240.890564,-105.011937,-4.932849,-0.403757,0.144332,1347.292140
396730,-278064.468750,-1720023.125,240.956131,-105.014680,-4.937994,-0.403847,0.144088,1347.204583
396731,-278067.125000,-1720001.375,233.992737,-105.020867,-4.940966,-0.403939,0.143831,1347.069799
396732,-278069.656250,-1719980.875,227.039673,-105.026963,-4.945610,-0.404026,0.143588,1346.941386


## Writing interpolated data in CSV format

In [None]:
track_bed_training_T_interpotale_df.to_csv('/content/drive/MyDrive/Ice-bed_dataset/track_bed_training_interpotale.csv')

##Reading interpolated data from CSV file

In [None]:
training_data = pd.read_csv('/content/drive/MyDrive/Ice-bed_dataset/track_bed_training_interpotale.csv')

In [None]:
training_data

Unnamed: 0.1,Unnamed: 0,surf_x,surf_y,track_bed_target,surf_vx,surf_vy,surf_dhdt,surf_SMB,surf_elv
0,0,-206037.406250,-1827384.000,538.235168,-102.379379,2.455286,-0.197761,0.300519,1873.835128
1,1,-205981.171875,-1827518.625,550.491638,-101.973237,2.884405,-0.197749,0.300364,1874.221764
2,2,-205953.109375,-1827586.125,563.800598,-101.784982,2.952367,-0.197750,0.300285,1874.520200
3,3,-205897.078125,-1827720.750,560.530823,-101.493970,2.288839,-0.197765,0.300125,1874.620712
4,4,-205841.328125,-1827855.625,545.708252,-101.271730,1.235018,-0.197797,0.299963,1875.128108
...,...,...,...,...,...,...,...,...,...
396729,396729,-278061.656250,-1720043.625,240.890564,-105.011937,-4.932849,-0.403757,0.144332,1347.292140
396730,396730,-278064.468750,-1720023.125,240.956131,-105.014680,-4.937994,-0.403847,0.144088,1347.204583
396731,396731,-278067.125000,-1720001.375,233.992737,-105.020867,-4.940966,-0.403939,0.143831,1347.069799
396732,396732,-278069.656250,-1719980.875,227.039673,-105.026963,-4.945610,-0.404026,0.143588,1346.941386


In [None]:
training_data['surf_x'].nunique()

360677

In [None]:
training_data[training_data['surf_x'].duplicated() == True].sort_values('surf_x', ascending=True)

Unnamed: 0.1,Unnamed: 0,surf_x,surf_y,track_bed_target,surf_vx,surf_vy,surf_dhdt,surf_SMB,surf_elv
389508,389508,-278074.375000,-1720835.500,287.025116,-104.518996,-2.757143,-0.401091,0.149106,1346.816768
256092,256092,-278074.312500,-1868325.750,535.870483,-43.602915,5.647541,-1.130346,-1.083477,999.030093
333386,333386,-278073.375000,-1850596.625,498.840942,-223.043011,-65.312867,-0.996279,-0.873431,943.652616
396733,396733,-278072.343750,-1719959.250,227.064072,-105.033698,-4.952594,-0.404118,0.143332,1346.804583
357627,357627,-278072.343750,-1868152.625,508.006958,-44.622644,6.493073,-1.130818,-1.099425,995.045012
...,...,...,...,...,...,...,...,...,...
12014,12014,-98415.562500,-1717305.625,165.338776,-31.252689,6.878477,0.000569,0.365125,2501.668860
36541,36541,-98364.687500,-1744099.750,174.525543,-32.575964,6.819740,0.015615,0.368284,2498.585608
36551,36551,-98332.882812,-1744094.000,187.018265,-32.569097,6.807246,0.015626,0.368265,2498.738240
36568,36568,-98278.796875,-1744084.125,188.417725,-32.557442,6.785271,0.015644,0.368232,2498.997793
