In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import tabula

In [7]:
def create_median_df(table_list, col_names):
    '''extract the median of each sample to a new column
    Args:
    - table_list: seq. of panda df
    - col_names: seq of str names
    
    return dataframe with median
    '''
    
    df = pd.DataFrame()
    for table, col in zip(table_list, col_names):
        df['median_'+ col] = table.median(1)
    return df

In [8]:
def plot_corr(data):
    '''
    Plot correlation 
    
    Args:
    - data: pd dataframe
    
    '''
    corr = data.corr()
    sns.set(font_scale=1.2)

    with sns.axes_style("white"):
        f, ax = plt.subplots(figsize=(5, 5))
        ax = sns.heatmap(corr, square=True, cmap='RdBu_r', center=0)


In [91]:
UT_pdf = os.path.join(os.path.dirname(os.getcwd()), '../Data/Raw_data/UT Measurements_Report.pdf')

In [336]:
tables = tabula.read_pdf(UT_pdf, pages=[8, 29,34, 54, 55, 60], lattice=True)
del(tables[6:8])

In [295]:
#TO DO p7_table by hand

In [377]:
table_0 = tables[0].T.reset_index(drop=True).T 
table_0.rename(columns = {0:'Sample',
                          1:'backscatter_rms'}, inplace=True)
table_0.to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Intermediate_data/'+'PIPE_ut_backscatter.csv'))

In [242]:
tables[1].rename(columns = {'Unnamed: 0':'Sample'}, inplace=True)
tables[1].to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Intermediate_data/'+'TUBE_ut_attenuation.csv'))

In [251]:
tables[2].rename(columns = {'Unnamed: 0':'Sample'}, inplace=True)
tables[2].to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Intermediate_data/'+'PIPE_ut_attenuation.csv'))

In [337]:
tables[3].rename(columns = {'Value of internal friction Q-1':'Absorption_avg',
                           'Unnamed: 0':'Absorption_std'}, inplace=True)
tables[3].drop(0, inplace=True)
tables[3][['Absorption_avg', 'Absorption_std']] = tables[3][['Absorption_avg', 'Absorption_std']].astype('float')
tables[3]['Absorption_avg'] = tables[3]['Absorption_avg']/1000
tables[3]['Absorption_std'] = tables[3]['Absorption_std']/10000
tables[3].to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Intermediate_data/'+'TUBE_ut_absorption.csv'))

In [346]:
table_4 = pd.concat([tables[4], tables[5].T.reset_index(drop=False).T ])
table_4.rename(columns = {0:'Sample',
                          1:'Absorption_avg',
                           2:'Absorption_std'}, inplace=True)
table_4 = table_4.iloc[:, 0:3]
table_4.drop(0, inplace=True)
table_4.reset_index(drop=True, inplace=True)
table_4[['Absorption_avg', 'Absorption_std']] = table_4[['Absorption_avg', 'Absorption_std']].astype('float')
table_4['Absorption_avg'] = table_4['Absorption_avg']/1000
table_4['Absorption_std'] = table_4['Absorption_std']/10000
table_4.to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Intermediate_data/'+'TUBE_blind_ut_absorption.csv'))

In [371]:
tables[6].rename(columns = {'Value of internal friction Q-1':'Absorption_avg',
                           'Unnamed: 0':'Absorption_std'}, inplace=True)
tables[6].drop(0, inplace=True)
tables[6][['Absorption_avg', 'Absorption_std']] = tables[6][['Absorption_avg', 'Absorption_std']].astype('float')
tables[6]['Absorption_avg'] = tables[6]['Absorption_avg']/1000
tables[6]['Absorption_std'] = tables[6]['Absorption_std']/10000
tables[6].to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Intermediate_data/'+'PIPE_ut_absorption.csv'))