In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os, glob

In [2]:
def plot_corr(data, figsize=(15,15)):
    '''
    Plot correlation 
    Args:
    - data: pd dataframe
    '''
    corr = data.corr()
    sns.set(font_scale=1.2)
    mask = np.triu(np.ones_like(corr, dtype=bool))
    with sns.axes_style("white"):
        f, ax = plt.subplots(figsize=figsize)
        ax = sns.heatmap(corr, mask=mask, square=True, cmap='RdBu_r', center=0, annot=True,
                        annot_kws={'fontsize':8})

In [3]:
path = os.path.join(os.path.dirname(os.getcwd()), '../Data/Extracted_data/')
os.chdir(path)

### Fracture toughness

In [4]:
FT = pd.read_csv('fracture_toughness.csv')
FT.set_index('ID', inplace=True)
FT.drop(['Type', 'Percent_cold_work','KIC_stress_intensity_max_ksi_x_in1_2', 'KIC_stress_intensity_range_ksi_x_in1_2', 'JQ_lb-in_in2'], axis=1, inplace=True)
FT.rename(columns={'KJIC_ksi_x_in1_2':'KJIC'}, inplace=True)

### TEP (both)

In [5]:
TEP = pd.read_csv('TEP_magnetic_susceptibility_summary.csv')
TEP.set_index('ID', inplace=True)
TEP.drop(['material', 'cold_work_percent'], axis=1, inplace=True)
TEP.rename(columns={'magnetic_susceptibility_average':'MS_Avg', 'magnetic_susceptibility_neg_error': 'MS_neg_error',
                   'magnetic_susceptibility_pos_error':'MS_pos_error'}, inplace=True)

### Nonlinear

In [6]:
UT_NL = pd.read_csv('UT_nonlinear.csv')
UT_NL.drop(['material', 'cold_work_percent','A2/A1^2', 'Thickness (mm)', 'R^2'], axis=1, inplace=True)
UT_NL.set_index('ID', inplace=True)
UT_NL = UT_NL.groupby('ID').mean().add_suffix('_avg')

### Internal Friction 225MHz

In [38]:
IF_225_dfs=[]
for file in glob.glob('*friction*25MHz_up.csv'):
    df = pd.read_csv(file)
    IF_225_dfs.append(df)

UT_IF_225 = pd.concat(IF_225_dfs, axis=0, ignore_index=False)
UT_IF_225['Sample ID'] = UT_IF_225['Sample ID'].str.replace('%|CW|\,', '').str.strip().str.replace(' - ', '-')\
.str.replace('  ', ' ').str.replace(' ', '-').str.strip('-1')+ '-1'
UT_IF_225.drop(['% CW'], axis=1, inplace=True)
UT_IF_225.rename(columns={'Sample ID':'ID', '% change from 0% CW':'%C_IF',
                         	'Standard Error': 'SE_IF', 'Standard Error (%)': 'SE_%_IF', 'Q-1':'IF_amp'}, inplace=True)
UT_IF_225.set_index('ID', inplace=True)
UT_IF_225 = UT_IF_225.add_suffix('_2.25MHz')
UT_IF_225.replace(regex={'%':''}, inplace=True)
UT_IF_225 = UT_IF_225.astype('float')

### Internal Friction 35MHz

In [39]:
IF_35_dfs=[]
for file in glob.glob('*friction*_5MHz_up.csv'):
    df = pd.read_csv(file)
    IF_35_dfs.append(df)

UT_IF_35 = pd.concat(IF_35_dfs, axis=0, ignore_index=False)
UT_IF_35['Sample ID'] = UT_IF_35['Sample ID'].str.replace('%|CW|\,', '').str.strip().str.replace(' - ', '-')\
.str.replace('  ', ' ').str.replace(' ', '-').str.strip('-1')+ '-1'
UT_IF_35.drop(['% CW'], axis=1, inplace=True)
UT_IF_35.rename(columns={'Sample ID':'ID', '% change from 0% CW':'%C_IF',
                         	'Standard Error': 'SE_IF', 'Standard Error (%)': 'SE_%_IF', 
                        'Q-1':'IF_amp'}, inplace=True)
UT_IF_35.set_index('ID', inplace=True)
UT_IF_35 = UT_IF_35.add_suffix('_3.5MHz')
UT_IF_35.replace(regex={'%':''}, inplace=True)
UT_IF_35 = UT_IF_35.astype('float')

### Backscattering

In [40]:
UT_BS = pd.read_csv('UT_backscattering_up.csv')
UT_BS['Sample ID'] = UT_BS['Sample ID'].str.replace('%|CW|\,', '').str.strip().str.replace(' - ', '-')\
.str.replace('  ', ' ').str.replace(' ', '-').str.strip('-1')+ '-1'
UT_BS.rename(columns={'Sample ID':'ID', '% change from 0% CW': '%C_BS',
                     'Backscattering Amplitude':'BS_amp'}, inplace=True)
UT_BS.set_index('ID', inplace=True)
UT_BS.replace(regex={'%':''}, inplace=True)
UT_BS = UT_BS.astype('float')

Merge all data

In [41]:
merged = FT.join([TEP, UT_NL, UT_IF_225, UT_IF_35, UT_BS], how='left')

In [42]:
merged.to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Merged_data/MERGE_FT_TEP_UT_on_ID.csv'))

In [43]:
merged

Unnamed: 0_level_0,KJIC,MS_Avg,MS_neg_error,MS_pos_error,TEP_average,TEP_error,Beta_avg,IF_amp_2.25MHz,%C_IF_2.25MHz,SE_IF_2.25MHz,SE_%_IF_2.25MHz,IF_amp_3.5MHz,%C_IF_3.5MHz,SE_IF_3.5MHz,SE_%_IF_3.5MHz,BS_amp,%C_BS
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
304-0-1,147.2,,,,,,0.002743,0.46,0.0,0.013,2.9,0.4,0.0,0.005,1.3,11.10,0.0
304-0-2,153.6,0.12,0.035,0.086,-1.547,0.049,,,,,,,,,,,
304-0-3,148.7,,,,,,,,,,,,,,,,
304-0-4,181.2,,,,,,,,,,,,,,,,
304-0-5,179.3,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
A286-80-1,51.0,,,,,,0.002187,1.20,501.0,0.025,2.0,1.5,759.0,0.022,1.5,3.42,-64.0
A286-80-2,50.5,0.01,0.002,0.002,-1.466,0.047,,,,,,,,,,,
A286-80-3,53.1,,,,,,,,,,,,,,,,
A286-80-4,52.4,,,,,,,,,,,,,,,,
