In [6]:
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.api import OLS
import pandas as pd
import seaborn as sns
import sklearn.neighbors as sn
from sklearn.linear_model import LinearRegression
from scipy.optimize import curve_fit
from scipy.signal import savgol_filter
import scipy.signal as signal
import os
import glob
import tempfile
import dask.dataframe as dd

In [60]:
df_lookup = pd.read_excel('../data/Calib_samples.xlsx')
print(df_lookup)

    Core  top  bottom
0   C1-1   51      52
1   C1-2   57      58
2   B1-2   30      31
3   B1-2   40      41
4   B1-2   50      51
5   B1-2   80      81
6   B1-2   90      91
7   A2-2   70      71
8   A2-2   90      91
9   A2-3   30      31
10  A2-3   40      41
11  A2-3   90      91


In [61]:
# Get data file names
path = r'../data/HSI'
all_files = glob.glob(os.path.join(path, "*.csv")) 

df_from_each_file = (pd.read_csv(f) for f in all_files)

print(all_files)

['../data/HSI/C1_2_index_projection.csv', '../data/HSI/A2_2_index_projection.csv', '../data/HSI/C1_1_index_projection.csv', '../data/HSI/B1_2_index_projection.csv', '../data/HSI/A2_3_index_projection.csv']


In [63]:
filepaths = [f for f in os.listdir(".") if f.endswith('.csv')]
print(filepaths)

[]


In [64]:
# assign path
path, dirs, files = next(os.walk("../data/HSI/"))
file_count = len(files)

In [65]:
# create empty list
dataframes_list = []
 
# append datasets to the list 
for i in range(file_count):
    temp_df = pd.read_csv("../data/HSI/"+files[i])
    dataframes_list.append(temp_df)
     
# display datasets
for dataset in dataframes_list:
    display(dataset)

Unnamed: 0,depth,RMean,RABD620,RABD670,RABD715,RABD845,RABD830,depth [mm]
0,0,,,,,,,0.0000
1,1,,,,,,,0.0336
2,2,,,,,,,0.0672
3,3,,,,,,,0.1008
4,4,,,,,,,0.1344
...,...,...,...,...,...,...,...,...
17178,17178,,,,,,,577.1808
17179,17179,,,,,,,577.2144
17180,17180,,,,,,,577.2480
17181,17181,,,,,,,577.2816


Unnamed: 0,depth,RMean,RABD620,RABD670,RABD715,RABD845,RABD830,depth [mm]
0,0,,,,,,,0.0000
1,1,,,,,,,0.0329
2,2,,,,,,,0.0658
3,3,,,,,,,0.0987
4,4,,,,,,,0.1316
...,...,...,...,...,...,...,...,...
30152,30152,,,,,,,992.0008
30153,30153,,,,,,,992.0337
30154,30154,,,,,,,992.0666
30155,30155,,,,,,,992.0995


Unnamed: 0,depth,RMean,RABD620,RABD670,RABD715,RABD845,RABD830,depth [mm]
0,0,0.078438,0.996081,0.994287,1.002980,0.995604,0.995494,0.0000
1,1,0.081445,0.996956,0.993991,1.001620,0.996861,0.995228,0.0357
2,2,0.084109,0.997630,0.993967,1.000540,0.997826,0.994872,0.0714
3,3,0.086449,0.998129,0.994180,0.999737,0.998526,0.994439,0.1071
4,4,0.088490,0.998475,0.994597,0.999168,0.998986,0.993946,0.1428
...,...,...,...,...,...,...,...,...
15115,15115,0.108387,1.003070,1.000070,1.003560,0.997449,0.993345,539.6055
15116,15116,0.108890,1.003180,1.000560,1.003020,0.997133,0.992640,539.6412
15117,15117,0.109508,1.003230,1.001140,1.002250,0.996847,0.991709,539.6769
15118,15118,0.110250,1.003200,1.001830,1.001250,0.996600,0.990526,539.7126


Unnamed: 0,depth,RMean,RABD620,RABD670,RABD715,RABD845,RABD830,depth [mm]
0,0,,,,,,,0.000
1,1,,,,,,,0.031
2,2,,,,,,,0.062
3,3,,,,,,,0.093
4,4,,,,,,,0.124
...,...,...,...,...,...,...,...,...
32233,32233,0.049254,1.00247,0.994556,1.00460,0.997220,0.998783,999.223
32234,32234,0.048431,1.00353,0.993292,1.00380,0.997895,0.998592,999.254
32235,32235,0.048660,1.00487,0.991905,1.00281,0.998748,0.998212,999.285
32236,32236,0.050067,1.00653,0.990390,1.00161,0.999793,0.997623,999.316


Unnamed: 0,depth,RMean,RABD620,RABD670,RABD715,RABD845,RABD830,depth [mm]
0,0,,,,,,,0.0000
1,1,,,,,,,0.0309
2,2,,,,,,,0.0618
3,3,,,,,,,0.0927
4,4,,,,,,,0.1236
...,...,...,...,...,...,...,...,...
30759,30759,,,,,,,950.4531
30760,30760,,,,,,,950.4840
30761,30761,,,,,,,950.5149
30762,30762,,,,,,,950.5458


In [66]:
print(files)

['C1_2_index_projection.csv', 'A2_2_index_projection.csv', 'C1_1_index_projection.csv', 'B1_2_index_projection.csv', 'A2_3_index_projection.csv']


In [68]:
C1_2 = dataframes_list[0]
A2_2 = dataframes_list[1]
C1_1 = dataframes_list[2]
B1_2 = dataframes_list[3]
A2_3 = dataframes_list[4]

In [70]:
alldfs = [var for var in dir() if isinstance(eval(var), pd.core.frame.DataFrame)]
print(alldfs)

       depth  RMean  RABD620  RABD670  RABD715  RABD845  RABD830  depth [mm]
0          0    NaN      NaN      NaN      NaN      NaN      NaN      0.0000
1          1    NaN      NaN      NaN      NaN      NaN      NaN      0.0336
2          2    NaN      NaN      NaN      NaN      NaN      NaN      0.0672
3          3    NaN      NaN      NaN      NaN      NaN      NaN      0.1008
4          4    NaN      NaN      NaN      NaN      NaN      NaN      0.1344
...      ...    ...      ...      ...      ...      ...      ...         ...
17178  17178    NaN      NaN      NaN      NaN      NaN      NaN    577.1808
17179  17179    NaN      NaN      NaN      NaN      NaN      NaN    577.2144
17180  17180    NaN      NaN      NaN      NaN      NaN      NaN    577.2480
17181  17181    NaN      NaN      NaN      NaN      NaN      NaN    577.2816
17182  17182    NaN      NaN      NaN      NaN      NaN      NaN    577.3152

[17183 rows x 8 columns]


In [92]:
filtered_C1_1_0 = C1_1[(C1_1['depth [mm]'] >= 51) & (C1_1['depth [mm]'] <= 52)]
filtered_C1_2_1 = C1_2[(C1_2['depth [mm]'] >= 57) & (C1_2['depth [mm]'] <= 58)]
filtered_B1_2_2 = B1_2[(B1_2['depth [mm]'] >= 30) & (B1_2['depth [mm]'] <= 31)]
filtered_B1_2_3 = B1_2[(B1_2['depth [mm]'] >= 40) & (B1_2['depth [mm]'] <= 41)]
filtered_B1_2_4 = B1_2[(B1_2['depth [mm]'] >= 50) & (B1_2['depth [mm]'] <= 51)]
filtered_B1_2_5 = B1_2[(B1_2['depth [mm]'] >= 80) & (B1_2['depth [mm]'] <= 81)]
filtered_B1_2_6 = B1_2[(B1_2['depth [mm]'] >= 90) & (B1_2['depth [mm]'] <= 91)]
filtered_A2_2_7 = A2_2[(A2_2['depth [mm]'] >= 70) & (A2_2['depth [mm]'] <= 71)]
filtered_A2_2_8 = A2_2[(A2_2['depth [mm]'] >= 90) & (A2_2['depth [mm]'] <= 91)]
filtered_A2_3_9 = A2_3[(A2_3['depth [mm]'] >= 30) & (A2_3['depth [mm]'] <= 31)]
filtered_A2_3_10 = A2_3[(A2_3['depth [mm]'] >= 40) & (A2_3['depth [mm]'] <= 41)]
filtered_A2_3_11 = A2_3[(A2_3['depth [mm]'] >= 90) & (A2_3['depth [mm]'] <= 91)]

In [93]:
print(filtered_C1_1_0)

      depth     RMean   RABD620   RABD670   RABD715   RABD845   RABD830  \
1429   1429  0.119656  0.996595  0.999502  1.000480  1.000700  0.993074   
1430   1430  0.119803  0.996755  0.999404  1.000290  1.001100  0.993008   
1431   1431  0.120026  0.997265  0.999317  1.000230  1.001300  0.992736   
1432   1432  0.120307  0.997376  0.999261  0.999794  1.000780  0.992800   
1433   1433  0.120554  0.997511  0.998509  0.999338  1.000700  0.992978   
1434   1434  0.120700  0.997799  0.998146  0.999004  1.000630  0.993203   
1435   1435  0.120742  0.998571  0.997990  0.998640  1.000010  0.993941   
1436   1436  0.120677  0.998586  0.997944  0.998530  1.000050  0.994207   
1437   1437  0.120665  0.999252  0.998510  0.998061  0.999943  0.994200   
1438   1438  0.120582  0.999146  0.998615  0.997465  0.999495  0.994226   
1439   1439  0.120368  0.999178  0.999008  0.997245  0.999148  0.994003   
1440   1440  0.120120  0.998715  0.998779  0.997134  0.999130  0.994036   
1441   1441  0.119849  0.

In [94]:
df_lookup.loc[0, 'RABD670'] = np.average(filtered_C1_1_0['RABD670'])
df_lookup.loc[1, 'RABD670'] = np.average(filtered_C1_2_1['RABD670'])
df_lookup.loc[2, 'RABD670'] = np.average(filtered_B1_2_2['RABD670'])
df_lookup.loc[3, 'RABD670'] = np.average(filtered_B1_2_3['RABD670'])
df_lookup.loc[4, 'RABD670'] = np.average(filtered_B1_2_4['RABD670'])
df_lookup.loc[5, 'RABD670'] = np.average(filtered_B1_2_5['RABD670'])
df_lookup.loc[6, 'RABD670'] = np.average(filtered_B1_2_6['RABD670'])
df_lookup.loc[7, 'RABD670'] = np.average(filtered_A2_2_7['RABD670'])
df_lookup.loc[8, 'RABD670'] = np.average(filtered_A2_2_8['RABD670'])
df_lookup.loc[9, 'RABD670'] = np.average(filtered_A2_3_9['RABD670'])
df_lookup.loc[10, 'RABD670'] = np.average(filtered_A2_3_10['RABD670'])
df_lookup.loc[11, 'RABD670'] = np.average(filtered_A2_3_11['RABD670'])

In [95]:
print(df_lookup)

    Core  top  bottom   RABD670
0   C1-1   51      52  1.000419
1   C1-2   57      58  1.003117
2   B1-2   30      31  0.996180
3   B1-2   40      41  0.997254
4   B1-2   50      51  0.998044
5   B1-2   80      81  0.996421
6   B1-2   90      91  1.000284
7   A2-2   70      71  1.005741
8   A2-2   90      91  1.007993
9   A2-3   30      31  1.016016
10  A2-3   40      41  1.020870
11  A2-3   90      91  1.021412


In [96]:
df_lookup.loc[0, 'RABD845'] = np.average(filtered_C1_1_0['RABD845'])
df_lookup.loc[1, 'RABD845'] = np.average(filtered_C1_2_1['RABD845'])
df_lookup.loc[2, 'RABD845'] = np.average(filtered_B1_2_2['RABD845'])
df_lookup.loc[3, 'RABD845'] = np.average(filtered_B1_2_3['RABD845'])
df_lookup.loc[4, 'RABD845'] = np.average(filtered_B1_2_4['RABD845'])
df_lookup.loc[5, 'RABD845'] = np.average(filtered_B1_2_5['RABD845'])
df_lookup.loc[6, 'RABD845'] = np.average(filtered_B1_2_6['RABD845'])
df_lookup.loc[7, 'RABD845'] = np.average(filtered_A2_2_7['RABD845'])
df_lookup.loc[8, 'RABD845'] = np.average(filtered_A2_2_8['RABD845'])
df_lookup.loc[9, 'RABD845'] = np.average(filtered_A2_3_9['RABD845'])
df_lookup.loc[10, 'RABD845'] = np.average(filtered_A2_3_10['RABD845'])
df_lookup.loc[11, 'RABD845'] = np.average(filtered_A2_3_11['RABD845'])

In [97]:
print(df_lookup)

    Core  top  bottom   RABD670   RABD845
0   C1-1   51      52  1.000419  0.999686
1   C1-2   57      58  1.003117  0.997233
2   B1-2   30      31  0.996180  1.001112
3   B1-2   40      41  0.997254  0.996999
4   B1-2   50      51  0.998044  1.000544
5   B1-2   80      81  0.996421  1.000675
6   B1-2   90      91  1.000284  0.999368
7   A2-2   70      71  1.005741  0.999242
8   A2-2   90      91  1.007993  1.001864
9   A2-3   30      31  1.016016  1.001615
10  A2-3   40      41  1.020870  1.000079
11  A2-3   90      91  1.021412  1.000161


In [100]:
df_lookup.to_excel("../data/Calib_RABDs.xlsx")