In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os, glob

In [23]:
path = os.path.join(os.path.dirname(os.getcwd()), '../Data/Extracted_data/')
os.chdir(path)

### MBN

In [24]:
MBN = pd.read_csv('MBN.csv', index_col='Sample')

MBN = MBN.groupby("Sample").mean()

MBN['peak_ratio'] = MBN['peak']/MBN['peak_ref']
MBN['fwhm_ratio'] = MBN['fwhm']/MBN['fwhm_ref']
MBN['diff_two_peaks_ratio'] = MBN['diff_two_peaks']/MBN['diff_two_peaks_ref']
MBN['amp_ratio'] = MBN['amp']/MBN['amp_ref']
MBN['pos_ratio'] = MBN['pos']/MBN['pos_ref']
MBN['energy_ratio'] = MBN['energy']/MBN['energy_ref']

MBN = MBN.drop(columns=['location', 'measure', 
                        'peak', 'peak_ref', 
                        'fwhm', 'fwhm_ref', 
                        'diff_two_peaks', 'diff_two_peaks_ref',
                        'amp', 'amp_ref',
                        'pos', 'pos_ref',
                        'energy', 'energy_ref'])
MBN = MBN[['diff_two_peaks_ratio', 'amp_ratio',
       'pos_ratio', 'energy_ratio']]
print(MBN.shape)
MBN.head()

(36, 4)


Unnamed: 0_level_0,diff_two_peaks_ratio,amp_ratio,pos_ratio,energy_ratio
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10C,0.230009,1.257148,0.99271,0.457476
13C,1.163056,1.237837,1.002948,1.10972
14C,1.677959,0.939024,0.966947,1.755207
16C,0.684264,0.927139,0.987522,0.910617
19C,0.763296,1.128928,1.002407,1.126849


### NLE

In [25]:
NLE = pd.read_csv('NLE.csv')
NLE['NLE_avg'] = NLE[['S10', 'S11', 'S12']].mean(axis=1)
NLE = pd.pivot_table(NLE, index='Sample' ,columns='freq', values='NLE_avg')
NLE = NLE.add_prefix('NLE_avg_')
NLE[['NLE_ratio_51_17', 'NLE_ratio_85_17', 'NLE_ratio_119_17']] = NLE[['NLE_avg_51', 'NLE_avg_85', 'NLE_avg_119']].div(NLE['NLE_avg_17'], axis=0)
NLE[['NLE_ratio_17_119', 'NLE_ratio_51_119', 'NLE_ratio_85_119']] = NLE[['NLE_avg_17', 'NLE_avg_51', 'NLE_avg_85']].div(NLE['NLE_avg_119'], axis=0)
NLE = NLE[['NLE_ratio_51_17', 'NLE_ratio_85_17']]
print(NLE.shape)
NLE.head()

(36, 2)


freq,NLE_ratio_51_17,NLE_ratio_85_17
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1
10C,0.918086,1.125854
13C,0.919655,1.101159
14C,0.90312,1.079935
16C,0.90015,1.053618
19C,0.978877,1.235508


### Electric

In [26]:
electric = pd.read_csv('Electric_RP.csv', index_col='ID')
electric.drop('Unnamed: 0', axis=1, inplace=True)
electric = electric[['Avg_RP', 'ACEE_30_Avg_58', 'ACEE_500_Avg_58', 'ACPD_Avg_58']]
print(electric.shape)
electric.head()

(36, 4)


Unnamed: 0_level_0,Avg_RP,ACEE_30_Avg_58,ACEE_500_Avg_58,ACPD_Avg_58
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
41C,190.979294,-0.13764,-2.86603,4.804749
43C,184.930215,-0.46204,-2.254125,3.869428
44C,187.204261,-0.596385,-2.7885,3.370729
45C,189.010633,-0.64571,-3.20222,4.186639
7C,179.123964,-1.13596,-1.718115,3.438451


### NLO

In [27]:
NLO = pd.read_csv('NLO_result.csv', index_col='Sample')
NLO.drop(['Stress', 'St Dev','Confidence'], axis=1, inplace=True)
NLO.rename(columns={'Average':'NLO_avg'}, inplace=True)
NLO = NLO.groupby('Sample').mean()
print(NLO.shape)
NLO.head()


(36, 1)


Unnamed: 0_level_0,NLO_avg
Sample,Unnamed: 1_level_1
10C,4.2407
13C,9.687991
14C,4.58822
16C,3.755165
19C,3.798736


### Fatigue

In [28]:
fatigue = pd.read_excel('FatigueSampleInventory.xlsx', 
                        usecols=[' Specimen ID', 'Cycles at NDE Test', 'Cycles at Failure', 'Current Unfailed Cycles']
                       )
fatigue.rename(columns={' Specimen ID':'ID', 'Cycles at NDE Test':'NDE_cycle', 'Cycles at Failure':'failure_cycle', 'Current Unfailed Cycles':'unfailed_cycle'}, inplace=True)
fatigue.set_index('ID', inplace=True)
fatigue['F_Fraction'] = fatigue['NDE_cycle']/fatigue['failure_cycle']
fatigue['Observed'] = fatigue.failure_cycle.notnull()
fatigue['F_Time'] = fatigue['failure_cycle']-fatigue['NDE_cycle']
fatigue['F_Time'] = fatigue['F_Time'].fillna(fatigue['unfailed_cycle']- fatigue['NDE_cycle'])
print(fatigue.shape)
fatigue.head()


(36, 6)


Unnamed: 0_level_0,NDE_cycle,failure_cycle,unfailed_cycle,F_Fraction,Observed,F_Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
41C,0,2154137.0,,0.0,True,2154137.0
43C,0,,,,False,
44C,0,1318034.0,,0.0,True,1318034.0
45C,0,,3245267.0,,False,3245267.0
7C,150000,,,,False,


Merge all data

In [29]:
merged = fatigue.join([MBN, NLE, NLO, electric], how='left')

In [30]:
merged.to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Merged_data/MERGE_on_sample.csv'))

In [31]:
merged.shape

(36, 17)

In [32]:
survival = merged.copy()
survival.dropna(how='any', subset=['F_Time'], inplace=True)
survival = survival[['Observed', 'F_Time', 'NDE_cycle', 'diff_two_peaks_ratio', 'amp_ratio', 'pos_ratio',
       'energy_ratio', 'NLE_ratio_51_17', 'NLE_ratio_85_17',
       'NLO_avg', 'Avg_RP', 'ACEE_30_Avg_58',
       'ACEE_500_Avg_58', 'ACPD_Avg_58']]
survival.drop(['6C', '31C', '29C', '36C'], inplace=True)
survival.to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Merged_data/Survival_df.csv'))
survival.shape

(16, 14)

In [33]:
survival

Unnamed: 0_level_0,Observed,F_Time,NDE_cycle,diff_two_peaks_ratio,amp_ratio,pos_ratio,energy_ratio,NLE_ratio_51_17,NLE_ratio_85_17,NLO_avg,Avg_RP,ACEE_30_Avg_58,ACEE_500_Avg_58,ACPD_Avg_58
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
41C,True,2154137.0,0,2.104871,1.185236,1.002988,1.559453,0.893268,1.107898,5.576476,190.979294,-0.13764,-2.86603,4.804749
44C,True,1318034.0,0,2.280959,1.365329,0.998446,1.504218,0.888503,1.140535,4.619563,187.204261,-0.596385,-2.7885,3.370729
45C,False,3245267.0,0,1.245687,0.80997,0.989351,1.282046,0.896534,1.137561,3.458337,189.010633,-0.64571,-3.20222,4.186639
19C,True,464392.0,450000,0.763296,1.128928,1.002407,1.126849,0.978877,1.235508,3.798736,171.925123,-1.065155,-1.57074,3.226957
32C,True,306142.0,450000,0.610771,1.137713,0.994283,0.844962,0.844824,1.091099,4.296261,169.717689,-0.61719,-1.312415,2.907922
23C,False,2255225.0,600000,0.39758,0.919473,0.999861,0.846653,0.918699,1.114762,5.621833,173.403495,-0.83111,-1.19454,3.061743
25C,False,1444844.0,600000,0.821445,1.178319,1.003158,1.141313,0.881052,1.083718,3.061693,172.727399,-0.912065,-0.927155,3.284087
21C,True,200007.0,600000,0.705402,1.151889,0.984968,0.853808,0.929446,1.119635,4.626314,174.003954,-0.8849,-1.578795,2.764862
27C,True,297627.0,750000,0.144147,0.777585,1.002966,0.394731,0.912962,1.112708,5.020607,173.688949,-1.276355,-1.60215,2.746634
26C,False,2829121.0,750000,0.453026,1.052153,1.003191,0.868,0.890721,1.091059,4.831161,169.891439,-0.6878,-1.269005,3.820881


In [20]:
survival_left_truncated = merged.copy()
survival_left_truncated = survival_left_truncated[survival_left_truncated.F_Time.isna()]
survival_left_truncated.unfailed_cycle = survival_left_truncated.NDE_cycle
survival_left_truncated.F_Time = survival_left_truncated.NDE_cycle - survival_left_truncated.unfailed_cycle
survival_left_truncated = survival_left_truncated[['Observed', 'F_Time', 'NDE_cycle', 'diff_two_peaks_ratio', 'amp_ratio', 'pos_ratio',
       'energy_ratio', 'NLE_ratio_51_17', 'NLE_ratio_85_17',
       'NLO_avg', 'Avg_RP', 'ACEE_30_Avg_58',
       'ACEE_500_Avg_58', 'ACPD_Avg_58']]

survival_left_truncated.to_csv(os.path.join(os.path.dirname(os.getcwd()), '../Data/Merged_data/Survival_df_left_trunc.csv'))
survival.shape

(16, 14)

In [21]:
survival_left_truncated

Unnamed: 0_level_0,Observed,F_Time,NDE_cycle,diff_two_peaks_ratio,amp_ratio,pos_ratio,energy_ratio,NLE_ratio_51_17,NLE_ratio_85_17,NLO_avg,Avg_RP,ACEE_30_Avg_58,ACEE_500_Avg_58,ACPD_Avg_58
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
43C,False,0,0,1.39662,0.996217,1.00571,1.393382,0.878759,1.115332,3.422903,184.930215,-0.46204,-2.254125,3.869428
7C,False,0,150000,0.385614,1.3056,1.002897,1.109924,0.936855,1.139517,4.720874,179.123964,-1.13596,-1.718115,3.438451
13C,False,0,150000,1.163056,1.237837,1.002948,1.10972,0.919655,1.101159,9.687991,170.130491,-0.660875,-2.167395,3.725514
8C,False,0,150000,0.820318,1.319565,0.996523,0.697468,0.933717,1.135902,3.477006,169.866095,-0.95184,-1.502545,3.529319
35C,False,0,150000,0.34677,1.188801,1.002494,0.688566,0.967767,1.163921,3.760074,171.321012,-0.86743,-1.350065,3.05065
24C,False,0,300000,0.588694,1.061223,1.005517,1.030618,0.844482,1.030615,5.064456,174.454983,-0.816755,-1.836935,4.259982
34C,False,0,300000,0.589633,0.999639,1.002006,0.821495,0.780283,0.999564,4.849455,170.036299,-1.01434,-1.521755,3.57996
9C,False,0,300000,0.771186,0.968763,0.98974,0.716189,0.904619,1.086779,5.642206,173.081727,-0.917735,-1.67576,3.958266
10C,False,0,300000,0.230009,1.257148,0.99271,0.457476,0.918086,1.125854,4.2407,171.622934,-0.79764,-1.795,2.579638
30C,False,0,450000,0.388101,1.06992,1.004996,0.932989,0.924855,1.195796,3.513676,174.421961,-0.96813,-1.24822,3.305279
