Merge all data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os, glob

In [None]:
path = os.path.join(os.path.dirname(os.getcwd()), '../Data/Extracted_data/')
os.chdir(path)

### MBN

In [None]:
MBN = pd.read_csv('MBN.csv', index_col='Sample')

MBN = MBN.groupby("Sample").mean()

MBN['peak_ratio'] = MBN['peak']/MBN['peak_ref']
MBN['fwhm_ratio'] = MBN['fwhm']/MBN['fwhm_ref']
MBN['diff_two_peaks_ratio'] = MBN['diff_two_peaks']/MBN['diff_two_peaks_ref']
MBN['amp_ratio'] = MBN['amp']/MBN['amp_ref']
MBN['pos_ratio'] = MBN['pos']/MBN['pos_ref']
MBN['energy_ratio'] = MBN['energy']/MBN['energy_ref']

MBN = MBN.drop(columns=['location', 'measure', 
                        'peak', 'peak_ref', 
                        'fwhm', 'fwhm_ref', 
                        'diff_two_peaks', 'diff_two_peaks_ref',
                        'amp', 'amp_ref',
                        'pos', 'pos_ref',
                        'energy', 'energy_ref'])
MBN = MBN[['diff_two_peaks_ratio', 'amp_ratio',
       'pos_ratio', 'energy_ratio']]
print(MBN.shape)


### NLE

In [None]:
NLE = pd.read_csv('NLE.csv')
NLE['NLE_avg'] = NLE[['S10', 'S11', 'S12']].mean(axis=1)
NLE = pd.pivot_table(NLE, index='Sample' ,columns='freq', values='NLE_avg')
NLE = NLE.add_prefix('NLE_avg_')
NLE[['NLE_ratio_51_17', 'NLE_ratio_85_17', 'NLE_ratio_119_17']] = NLE[['NLE_avg_51', 'NLE_avg_85', 'NLE_avg_119']].div(NLE['NLE_avg_17'], axis=0)
NLE[['NLE_ratio_17_119', 'NLE_ratio_51_119', 'NLE_ratio_85_119']] = NLE[['NLE_avg_17', 'NLE_avg_51', 'NLE_avg_85']].div(NLE['NLE_avg_119'], axis=0)
NLE = NLE[['NLE_ratio_51_17', 'NLE_ratio_85_17', 'NLE_ratio_51_119']]
print(NLE.shape)


### Electric

In [None]:
electric = pd.read_csv('Electric_RP.csv', index_col='ID')
electric.drop('Unnamed: 0', axis=1, inplace=True)
electric = electric[['Avg_RP', 'ACEE_30_Avg_58', 'ACEE_500_Avg_58', 'ACPD_Avg_58']]
print(electric.shape)


### NLO

In [None]:
NLO = pd.read_csv('NLO_result.csv', index_col='Sample')
NLO.drop(['Stress', 'St Dev','Confidence'], axis=1, inplace=True)
NLO.rename(columns={'Average':'NLO_avg'}, inplace=True)
NLO = NLO.groupby('Sample').mean()
print(NLO.shape)


### Fatigue

In [None]:
fatigue = pd.read_excel('FatigueSampleInventory.xlsx', 
                        usecols=[' Specimen ID', 'Cycles at NDE Test', 'Cycles at Failure', 'Current Unfailed Cycles']
                       )
fatigue.rename(columns={' Specimen ID':'ID', 'Cycles at NDE Test':'NDE_cycle', 'Cycles at Failure':'failure_cycle', 'Current Unfailed Cycles':'unfailed_cycle'}, inplace=True)
fatigue.set_index('ID', inplace=True)
fatigue['F_Fraction'] = fatigue['NDE_cycle']/fatigue['failure_cycle']
fatigue['Observed'] = fatigue.failure_cycle.notnull()
fatigue['F_Time'] = fatigue['failure_cycle']-fatigue['NDE_cycle']
fatigue['F_Time'] = fatigue['F_Time'].fillna(fatigue['unfailed_cycle']- fatigue['NDE_cycle'])
print(fatigue.shape)


Merge all data

In [None]:
merged = fatigue.join([MBN, NLE, NLO, electric], how='left')

In [None]:
merged.to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Merged_data/MERGE_on_sample.csv'))

In [None]:
merged.shape

In [None]:
survival = merged.copy()
survival.dropna(how='any', subset=['F_Time'], inplace=True)
survival = survival[['Observed', 'F_Time', 'NDE_cycle', 'diff_two_peaks_ratio', 'amp_ratio', 'pos_ratio',
       'energy_ratio', 'NLE_ratio_51_17', 'NLE_ratio_85_17','NLE_ratio_51_119',
       'NLO_avg', 'Avg_RP', 'ACEE_30_Avg_58',
       'ACEE_500_Avg_58', 'ACPD_Avg_58']]
survival.drop(['6C', '31C', '29C', '36C'], inplace=True)
survival.to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Merged_data/Survival_df.csv'))
survival.shape

In [None]:
survival_left_truncated = merged.copy()
survival_left_truncated = survival_left_truncated[survival_left_truncated.F_Time.isna()]
survival_left_truncated.unfailed_cycle = survival_left_truncated.NDE_cycle
survival_left_truncated.F_Time = survival_left_truncated.NDE_cycle - survival_left_truncated.unfailed_cycle
survival_left_truncated = survival_left_truncated[['Observed', 'F_Time', 'NDE_cycle', 'diff_two_peaks_ratio', 'amp_ratio', 'pos_ratio',
       'energy_ratio', 'NLE_ratio_51_17', 'NLE_ratio_85_17','NLE_ratio_51_119',
       'NLO_avg', 'Avg_RP', 'ACEE_30_Avg_58',
       'ACEE_500_Avg_58', 'ACPD_Avg_58']]

survival_left_truncated.to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Merged_data/Survival_df_left_trunc.csv'))
survival.shape