# DataFrame Interpolation and Merging Examples

Reference: [https://stackabuse.com/how-to-merge-dataframes-in-pandas/](https://stackabuse.com/how-to-merge-dataframes-in-pandas/)

In [2]:
import pandas as pd
import numpy as np
import w2

In [3]:
t1 = np.array([0, 0.5, 1.0, 1.5, 2.0])
y1 = pd.Series(2*t1, index=t1)

t2 = np.array([0, 0.34, 1.01, 1.4, 1.6, 1.7, 2.01])
y2 = pd.Series(3*t2, index=t2)

df = pd.DataFrame({'y1': y1, 'y2': y2})
display(df)

# Experiments
df_interp = df.interpolate('index')
display(df_interp)

df_interp = df.interpolate('index').reindex(t2)
display(df_interp)

df_interp = df.interpolate('index').reindex(np.array([0.25, 0.75, 1.25, 1.75]))
display(df_interp)

# This is desired solution
df_interp = df.interpolate('index').reindex(t1)
display(df_interp)

Unnamed: 0,y1,y2
0.0,0.0,0.0
0.34,,1.02
0.5,1.0,
1.0,2.0,
1.01,,3.03
1.4,,4.2
1.5,3.0,
1.6,,4.8
1.7,,5.1
2.0,4.0,


Unnamed: 0,y1,y2
0.0,0.0,0.0
0.34,0.68,1.02
0.5,1.0,1.5
1.0,2.0,3.0
1.01,2.02,3.03
1.4,2.8,4.2
1.5,3.0,4.5
1.6,3.2,4.8
1.7,3.4,5.1
2.0,4.0,6.0


Unnamed: 0,y1,y2
0.0,0.0,0.0
0.34,0.68,1.02
1.01,2.02,3.03
1.4,2.8,4.2
1.6,3.2,4.8
1.7,3.4,5.1
2.01,4.0,6.03


Unnamed: 0,y1,y2
0.25,,
0.75,,
1.25,,
1.75,,


Unnamed: 0,y1,y2
0.0,0.0,0.0
0.5,1.0,1.5
1.0,2.0,3.0
1.5,3.0,4.5
2.0,4.0,6.0


In [4]:
met_wb1 = w2.sql_query('w2_data.db', 'select * from MET_WB1')
ctr_tr2 = w2.sql_query('w2_data.db', 'select * from CTR_TR2')
display(met_wb1)
display(ctr_tr2)

Unnamed: 0_level_0,Air Temperature,Dew Point Temperature,Wind Speed,Wind Direction,Cloudiness,Solar Radiation
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-01 00:00:00,1.0,-4.5,4.78,4.71,8.7,0
2017-01-01 01:00:00,1.0,-4.5,4.78,4.71,8.7,0
2017-01-01 02:00:00,0.4,-5.1,3.96,4.69,1.1,0
2017-01-01 03:00:00,-0.7,-5.6,3.38,4.46,0.0,0
2017-01-01 04:00:00,-1.7,-6.1,2.10,3.96,0.2,0
...,...,...,...,...,...,...
2017-12-31 20:00:00,-13.9,-15.6,2.30,4.54,1.2,0
2017-12-31 21:00:00,-14.0,-16.3,2.68,4.49,1.6,0
2017-12-31 22:00:00,-14.6,-17.3,2.68,4.21,3.8,0
2017-12-31 23:00:00,-15.5,-17.8,2.68,4.34,3.4,0


Unnamed: 0_level_0,TDS,ISS,OP,NH3,NOx,LDOM,RDOM,LPOM,RPOM,DO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-01-01 00:00:00,988.0,5.8,0.784,0.140,31.500,0.00,0.00,3.60,0.90,7.10
2017-01-01 12:00:00,988.0,5.8,0.784,0.140,31.500,0.00,0.00,3.60,0.90,7.10
2017-01-02 12:00:00,1000.0,5.4,0.822,0.145,31.000,0.00,0.00,3.55,0.89,7.03
2017-01-03 12:00:00,1012.0,5.1,0.860,0.150,30.500,0.00,0.00,3.50,0.88,6.96
2017-01-04 12:00:00,999.2,5.1,0.901,0.190,30.000,0.00,0.00,3.10,0.78,6.86
...,...,...,...,...,...,...,...,...,...,...
2017-12-28 12:00:00,1253.7,1.2,0.100,0.058,37.219,5.41,62.22,3.84,0.96,6.13
2017-12-29 12:00:00,1253.3,1.4,0.100,0.067,36.938,5.26,60.44,4.48,1.12,6.12
2017-12-30 12:00:00,1253.0,1.6,0.100,0.075,36.656,5.10,58.66,5.12,1.28,6.74
2017-12-31 12:00:00,1252.7,1.8,0.100,0.083,36.375,4.95,56.88,5.76,1.44,7.36


In [None]:
merged1 = met_wb1.merge(ctr_tr2, left_index=True, right_index=True, how='outer', indicator=True)
merged2 = ctr_tr2.merge(met_wb1, left_index=True, right_index=True)
# merged3 = pd.merge(met_wb1, ctr_tr2, how='inner', left_index=True, right_index=True)
# merged4 = pd.merge(met_wb1, ctr_tr2, how='outer', left_index=True, right_index=True)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)
display(merged1)
# display(merged2)

In [27]:
merged1.interpolate('time')
# help(merged1.interpolate)

Help on method interpolate in module pandas.core.frame:

interpolate(method: 'str' = 'linear', axis: 'Axis' = 0, limit: 'int | None' = None, inplace: 'bool' = False, limit_direction: 'str | None' = None, limit_area: 'str | None' = None, downcast: 'str | None' = None, **kwargs) -> 'DataFrame | None' method of pandas.core.frame.DataFrame instance
    Fill NaN values using an interpolation method.
    
    Please note that only ``method='linear'`` is supported for
    DataFrame/Series with a MultiIndex.
    
    Parameters
    ----------
    method : str, default 'linear'
        Interpolation technique to use. One of:
    
        * 'linear': Ignore the index and treat the values as equally
          spaced. This is the only method supported on MultiIndexes.
        * 'time': Works on daily and higher resolution data to interpolate
          given length of interval.
        * 'index', 'values': use the actual numerical values of the index.
        * 'pad': Fill in NaNs using existing va

In [None]:
sns.relplot(data=merged1, x=merged1.index, y='Air Temperature', hue='DO', aspect=2, palette=mpl.cm.jet, alpha=0.5, size=2)
# merged.loc[:, ['LDOM', 'LPOM']].plot()

In [None]:
ax = plt.scatter(merged1['Air Temperature'], merged1['DO'])
plt.ylim([0, 14])

In [None]:
merged1['POM'] = merged1.RPOM + merged1.LPOM
ax = plt.scatter(merged1['POM'], merged1['DO'])
plt.ylim([0, 14])