In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
from utils import *
import matplotlib.pyplot as plt
import matplotlib

data_path = '../data/FACT_raw.xlsx'

df_all = pd.read_excel(
    data_path, engine='openpyxl', sheet_name=None)['Sheet1']

df_all.drop(0,axis=0,inplace=True)
df_all.reset_index(drop=True,inplace=True)

In [2]:
name_mapping = {
    # 'optiDAT nr.', ##### Not useful
    # 'Optimat/FACT name', ##### Not useful
    # 'data delivered under name', ##### Not useful
    # 'from plate',##### empty
    # 'tested at laboratory', ##### Not useful
    'laminate': 'Lay-up',
    # 'cut angle': 'Cut Angle', ##### empty
    # 'chop', ##### What is it?
    0: 'Percentage of Fibre in 0-deg Direction',
    '+/-45': 'Percentage of Fibre in 45-deg Direction',
    90: 'Percentage of Fibre in 90-deg Direction',
    'other direction': 'Percentage of Fibre in Other Direction',
    # 'Resin', ##### Not useful
    # 'Fibre', ##### Not useful
    'FVF': 'Fibre Volumn Fraction',
    'porosity': 'Porosity', ##### Too much absence
    'Barcol hardness': 'Barcol Hardness',
    # 'Manufactured by/Quality Assurance Specification', ##### Not useful
    # 'production method',##### Not useful
    # 'Geometry', ##### Not useful
    # 'material ', ##### Not useful
    'thickness (old)': 'Thickness',
    # 'average thickness (new)', ##### empty
    'maximum width': 'Maximum Width',
    'minimum width': 'Minimum Width',
    # 'average width (new)', ##### empty
    'area': 'Area',
    'length': 'Length',
    'load length': 'Load Length',
    'radius of waist': 'Radius of Waist',
    # 'specimen standard', ##### empty
    # 'prog', ##### Not useful
    # 'start test', ##### Not useful
    # 'end of test', ##### Not useful
    # 'test type', ##### As filter
    'R-value': 'Minimum/Maximum Stress',
    # 'F_max',##### empty
    # 'Fmax during fatigue', ##### empty
    # 'Fmax during static test', ##### empty
    # 'deviation of Fmax>2% w.r.t. test frame settings during fatigue?', ##### empty
    # 'F_max 90° direction', ##### empty
    'e_max': 'Maximum Strain',
    # 'e_max 90°', ##### empty
    # 'e_max 45°', ##### empty
    's_max': 'Maximum Stress',
    # 'normalized s_max', ##### Not useful
    # 'shear strain (e12)', ##### empty
    # 'shear strength', ##### empty
    # 'Shear Modulus (G12)', ##### empty
    'No. of cycles to failure': 'Cycles to Failure',
    # 'level', ##### empty
    # 'Unnamed: 51', ##### empty
    # 'no. of cycles', ##### empty
    # 'Unnamed: 53', ##### empty
    # 'No. of spectrum passes to failure', ##### To much absence, how to use it?
    # 'failure mode', ##### To much absence
    # 'runout?',
    # 'R-value.1', ##### empty
    # 'F_max.1', ##### empty
    # 'e_max.1', ##### empty
    # 's_max.1', ##### empty
    # 'No. of cycles in block 2', ##### empty
    'UTS': 'Static Maximum Tensile Stress',
    'UCS': 'Static Maximum Compressive Stress',
    'e_UTS': 'Static Maximum Tensile Strain',
    'e_UCS': 'Static Maximum Compressive Strain',
    # 'UTS (mat. spec.)', ##### What is it?
    # 'UCS (mat. spec.)', ##### What is it?
    # 'corr. UTS', ##### What is it?
    # 'corr. UCS', ##### What is it?
    # 'RTS', ##### empty
    # 'RCS', ##### empty
    # 'e_RTS', ##### empty
    # 'e_RCS', ##### empty
    # 'LRU': 'Stress Rate',
    'SRU': 'Strain Rate',
    # 'SRU.1', ##### empty
    # 'SRU speed deviation ? 2% during static test?', ##### empty
    # 'LRF', ##### What is it?
    # 'SRF', ##### What is it?
    # 'SRF.1', ##### What is it?
    # 'wave', ##### Not useful
    'fconstant': 'Frequency',
    # 'test time', ##### Not useful
    # 'fconstant block 2', ##### empty
    'Eit': 'Static Elastic Modulus',
    'Eic': 'Static Compressive Modulus',
    # 'Eft',
    # 'Efc',
    # 'ILSS', ##### empty
    # 's_flex', ##### empty
    # 'test machine', ##### empty
    # 'control', ##### All load control
    # 'grip', ##### Not useful
    # 'ABG', ##### What is it?
    'Temperature': 'Temperature',
    # 'Temperature control?',
    # 'Preconditioned?', ##### empty
    # 'environment', ##### All "d", What is it?
    'RH': 'Relative Humidity',
    # 'Test condition', ##### empty
    # 'ref.', ##### Not useful
    # 'NB', ##### What is it?
    # 'RB', ##### What is it?
    # 's_res', ##### empty
    # 'N10sr', ##### To much absence
    # 'specimen shape', ##### Not useful
    # 'Remarks', ##### empty
    # 'Bending', ##### empty
    # 'buckling', ##### empty
    # 'temperature failure or temperature above 35 °C', ##### empty
    # 'tab failure', ##### empty
    # 'delaminated', ##### empty
    # 'incomplete measurement data available',  ##### empty
    # 'Strain calculated using E', ##### empty
    # 'Premature failure in RST', ##### empty
    # "Poissons's ratio", ##### empty
    # 'Strain measurement equipment', ##### empty
    # 'Strain measurement equipment (2)', ##### empty
    # 'Grip pressure' ##### empty
}

In [3]:
col_to_del = [x for x in df_all.columns if x not in name_mapping.keys()]
print('Deleted features', col_to_del)

df_tmp = replace_column_name(df_all, name_mapping)
for col in col_to_del:
    del df_tmp[col]

df_presence = calculate_absence_ratio(df_tmp)

matplotlib.rc("text", usetex=False)
plt.rcParams["font.size"] = 12

clr = sns.color_palette("deep")

plt.figure(figsize=(10,8))
ax = plt.subplot(111)
plot_absence_ratio(ax, df_presence, orient='h', palette=clr, linewidth=1, edgecolor=[0, 0, 0])
plt.tight_layout()

plt.savefig('../output/FACT/absence_ratio_initial.png', dpi=600)
# plt.close()
# plt.show()
plt.close()

Deleted features ['optiDAT nr.', 'Optimat/FACT name', 'data delivered under name', 'from plate', 'tested at laboratory', 'cut angle', 'chop', 'Resin', 'Fibre', 'Manufactured by/Quality Assurance Specification', 'production method', 'Geometry', 'material ', 'average thickness (new)', 'average width (new)', 'specimen standard', 'prog', 'start test', 'end of test', 'test type', 'F_max', 'Fmax during fatigue', 'Fmax during static test', 'deviation of Fmax>2% w.r.t. test frame settings during fatigue?', 'F_max 90° direction', 'e_max 90°', 'e_max 45°', 'normalized s_max', 'shear strain (e12)', 'shear strength', 'Shear Modulus (G12)', 'level', 'Unnamed: 51', 'no. of cycles', 'Unnamed: 53', 'No. of spectrum passes to failure', 'failure mode', 'runout?', 'R-value.1', 'F_max.1', 'e_max.1', 's_max.1', 'No. of cycles in block 2', 'UTS (mat. spec.)', 'UCS (mat. spec.)', 'corr. UTS', 'corr. UCS', 'RTS', 'RCS', 'e_RTS', 'e_RCS', 'LRU', 'SRU.1', 'SRU speed deviation ? 2% during static test?', 'LRF', '

In [4]:
def fill_na(x, n):
    if np.isnan(x):
        return n
    else:
        return x


def modify_col(df, column_name, func, **kargs):
    if column_name in list(df.columns):
        col = df[column_name]
        col = [func(x,**kargs) for x in col]
        df.loc[:, column_name] = col

modify_col(df_all, 0, fill_na, n = 0)
modify_col(df_all, '+/-45', fill_na, n = 0)
modify_col(df_all, 90, fill_na, n = 0)
modify_col(df_all, 'other direction', fill_na, n = 0)

In [5]:
# Add material code
material_code = [str(df_all.loc[x, 'material ']) + str(df_all.loc[x, 'laminate']) for x in range(len(df_all))]
df_all['Material_Code'] = material_code

In [6]:
# If R>1, max stress must be negative
where_R1 = []
for idx,x in enumerate(df_all['R-value']):
    if not isinstance(x,str) and x>1:
        where_R1.append(idx)
where_R1 = np.array(where_R1)
df_all.loc[where_R1,'s_max'] = -np.abs(df_all.loc[where_R1,'s_max'])

In [7]:
# Cal minimum stress
df_all['Minimum Stress'] = df_all['R-value'] * df_all['s_max']

In [8]:
df_fatigue = df_all.loc[np.where(df_all['test type'] == 'CA')[0],:].copy()
df_fatigue.reset_index(drop=True, inplace=True)
df_fatigue = replace_column_name(df_fatigue, name_mapping)

In [9]:
df_fatigue['Absolute Maximum Stress'] = np.nan
df_fatigue['Absolute Peak-to-peak Stress'] = np.nan
df_fatigue['Relative Maximum Stress'] = np.nan
df_fatigue['Relative Peak-to-peak Stress'] = np.nan

for idx in range(df_fatigue.values.shape[0]):
    s = np.array([df_fatigue.loc[idx, 'Maximum Stress'],df_fatigue.loc[idx, 'Minimum Stress']])
    which_max_stress = np.where(np.abs(s) == np.max(np.abs(s)))[0]
    if len(which_max_stress) == 0:
        which_max_stress = 1 - int(np.isnan(s[1])) # when nan appears in s
    else:
        which_max_stress = which_max_stress[0]

    relative_to = np.abs(df_fatigue.loc[idx,'Static Maximum Tensile Stress']) if s[which_max_stress] > 0 else np.abs(df_fatigue.loc[idx,'Static Maximum Compressive Stress'])
    if np.isnan(relative_to) and s[0] + s[1] < 1e-5 and s[which_max_stress] > 0:
        relative_to = np.abs(df_fatigue.loc[idx,'Static Maximum Compressive Stress'])

    df_fatigue.loc[idx,'Absolute Maximum Stress'] = s[which_max_stress]
    p2p = np.abs(s[0]-s[1])
    if np.isnan(p2p):
        p2p = np.abs(s[1 - int(np.isnan(s[1]))])
    df_fatigue.loc[idx,'Absolute Peak-to-peak Stress'] = p2p

    # if True:
    if np.abs(s[which_max_stress]/relative_to) <= 1.1: # otherwise static data is not correct
        df_fatigue.loc[idx,'Relative Maximum Stress'] = np.abs(s[which_max_stress]/relative_to)
        df_fatigue.loc[idx,'Relative Peak-to-peak Stress'] = np.abs(p2p/relative_to)
    else:
        df_fatigue.loc[idx,'Static Maximum Tensile Stress'] = np.nan
        df_fatigue.loc[idx,'Static Maximum Compressive Stress'] = np.nan

In [10]:
# Remove runout tests
df_fatigue = df_fatigue.loc[np.where(df_fatigue['runout?'] != 'y')[0],:].copy()
df_fatigue.reset_index(drop=True, inplace=True)

In [11]:
if 'Lay-up' in df_fatigue.columns:
    code2seq_dict = {}
    layups = df_fatigue['Lay-up'].values
    for layer in list(set(layups)):
        code2seq_dict[layer] = code2seq(layer)
    seq = []
    for layer in layups:
        seq.append('/'.join([str(x) for x in code2seq(layer)]))
    df_fatigue['Sequence'] = seq

In [12]:
df_fatigue.to_excel('../data/FACT_fatigue.xlsx', engine='openpyxl', index=False)

In [13]:
measure_features = ['Maximum Stress', 'Minimum Stress', 'Frequency']
from sklearn.preprocessing import MinMaxScaler
fatigue_mat_lay = df_fatigue['Material_Code'].copy()
df_avg_fatigue = pd.DataFrame(columns = df_fatigue.columns)
bar = tqdm(total=len(list(set(fatigue_mat_lay))))
for material in list(set(fatigue_mat_lay)):
    where_material = np.where(fatigue_mat_lay == material)[0]
    # print(where_material)
    df_material = df_fatigue.loc[where_material, measure_features].copy()
    scaler = MinMaxScaler()
    df_material.loc[:,:] = scaler.fit_transform(df_material)
    mse_matrix = np.zeros((len(where_material), len(where_material)))
    for i_idx, i in enumerate(where_material):
        for j_idx, j in enumerate(where_material):
            if j_idx < i_idx:
                continue
            val_i = df_material.loc[i,:]
            val_j = df_material.loc[j,:]
            mse_val = np.mean((val_i - val_j)**2)
            mse_matrix[i_idx, j_idx] = mse_matrix[j_idx, i_idx] = mse_val

    all_correlation = []
    for i_idx, i in enumerate(where_material):
        where_correlated = list(where_material[np.where(mse_matrix[i_idx,:]<1e-5)[0]])
        if where_correlated not in all_correlation:
            all_correlation.append(where_correlated)
    # all_correlation = all_correlation
    for cor in all_correlation:
        if len(cor)>1:
            df_avg = df_fatigue.loc[[cor[0]], :].copy()
            mean_values = df_fatigue.loc[cor, :].mean()
            df_avg[mean_values.index] = mean_values.values
            df_avg_fatigue = pd.concat([df_avg_fatigue, df_avg], ignore_index=True, axis=0)
        elif len(cor) == 1:
            df_avg_fatigue = pd.concat([df_avg_fatigue, df_fatigue.loc[[cor[0]], :]], ignore_index=True, axis=0)
        else:
            pass # Min Stress, Max Stress or frequency is not recorded
    bar.update(1)

df_avg_fatigue.reset_index(drop=True, inplace=True)

df_avg_fatigue.to_excel('../data/FACT_avg_fatigue.xlsx', engine='openpyxl', index=False)

  0%|          | 0/34 [00:00<?, ?it/s]

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


In [14]:
col_to_del = [x for x in df_fatigue.columns if x not in name_mapping.values()]
print('Deleted features', col_to_del)

df_tmp = df_fatigue.copy()
for col in col_to_del:
    del df_tmp[col]

df_presence = calculate_absence_ratio(df_tmp)

clr = sns.color_palette("deep")
plt.rcParams["font.size"] = 5

plt.figure(figsize=(5,4),dpi=300)
ax = plt.subplot(111)
plot_absence_ratio(ax, df_presence, orient='h', palette=clr, linewidth=1, edgecolor=[0, 0, 0])
plt.tight_layout()

plt.savefig('../output/FACT/absence_ratio.png')
# plt.close()
# plt.show()
plt.close()

Deleted features ['optiDAT nr.', 'Optimat/FACT name', 'data delivered under name', 'from plate', 'tested at laboratory', 'cut angle', 'chop', 'Resin', 'Fibre', 'Manufactured by/Quality Assurance Specification', 'production method', 'Geometry', 'material ', 'average thickness (new)', 'average width (new)', 'specimen standard', 'prog', 'start test', 'end of test', 'test type', 'F_max', 'Fmax during fatigue', 'Fmax during static test', 'deviation of Fmax>2% w.r.t. test frame settings during fatigue?', 'F_max 90° direction', 'e_max 90°', 'e_max 45°', 'normalized s_max', 'shear strain (e12)', 'shear strength', 'Shear Modulus (G12)', 'level', 'Unnamed: 51', 'no. of cycles', 'Unnamed: 53', 'No. of spectrum passes to failure', 'failure mode', 'runout?', 'R-value.1', 'F_max.1', 'e_max.1', 's_max.1', 'No. of cycles in block 2', 'UTS (mat. spec.)', 'UCS (mat. spec.)', 'corr. UTS', 'corr. UCS', 'RTS', 'RCS', 'e_RTS', 'e_RCS', 'LRU', 'SRU.1', 'SRU speed deviation ? 2% during static test?', 'LRF', '

In [15]:
df_fatigue

Unnamed: 0,optiDAT nr.,Optimat/FACT name,data delivered under name,from plate,tested at laboratory,Lay-up,cut angle,chop,Percentage of Fibre in 0-deg Direction,Percentage of Fibre in 45-deg Direction,...,Strain measurement equipment,Strain measurement equipment (2),Grip pressure,Material_Code,Minimum Stress,Absolute Maximum Stress,Absolute Peak-to-peak Stress,Relative Maximum Stress,Relative Peak-to-peak Stress,Sequence
0,19001,9001,FACT,,TUD,"[[+45(240),-45(240)SB]8,[0(495),90(5)]7]s",,,7,8,...,,,,"GP[[+45(240),-45(240)SB]8,[0(495),90(5)]7]s",9.11,91.1000,81.9900,0.212850,0.191565,45/-45/45/-45/45/-45/45/-45/45/-45/45/-45/45/-...
1,19002,9002,FACT,,TUD,"[[+45(240),-45(240)SB]8,[0(495),90(5)]7]s",,,7,8,...,,,,"GP[[+45(240),-45(240)SB]8,[0(495),90(5)]7]s",9.11,91.1000,81.9900,0.212850,0.191565,45/-45/45/-45/45/-45/45/-45/45/-45/45/-45/45/-...
2,11334,1334,FACT,,TUD,"[[+45(240),-45(240)SB]8,[0(495),90(5)]7]s",,,7,8,...,,,,"GP[[+45(240),-45(240)SB]8,[0(495),90(5)]7]s",9.11,91.1000,81.9900,0.212850,0.191565,45/-45/45/-45/45/-45/45/-45/45/-45/45/-45/45/-...
3,11333,1333,FACT,,TUD,"[[+45(240),-45(240)SB]8,[0(495),90(5)]7]s",,,7,8,...,,,,"GP[[+45(240),-45(240)SB]8,[0(495),90(5)]7]s",9.111,91.1100,81.9990,0.212874,0.191586,45/-45/45/-45/45/-45/45/-45/45/-45/45/-45/45/-...
4,19005,9005,FACT,,TUD,"[[+45(240),-45(240)SB]8,[0(495),90(5)]7]s",,,7,8,...,,,,"GP[[+45(240),-45(240)SB]8,[0(495),90(5)]7]s",-65,65.0000,130.0000,0.151869,0.303738,45/-45/45/-45/45/-45/45/-45/45/-45/45/-45/45/-...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
880,10604,604,FACT,,DNV,"[[0(400),90(400)WR],[(100CSM)]SB]5",,5,5,0,...,,,,"GV[[0(400),90(400)WR],[(100CSM)]SB]5",-39,39.0000,78.0000,,,0/90/0/0/90/0/0/90/0/0/90/0/0/90/0
881,10605,605,FACT,,DNV,"[[0(400),90(400)WR],[(100CSM)]SB]5",,5,5,0,...,,,,"GV[[0(400),90(400)WR],[(100CSM)]SB]5",-39,39.0000,78.0000,,,0/90/0/0/90/0/0/90/0/0/90/0/0/90/0
882,10606,606,FACT,,DNV,"[[0(400),90(400)WR],[(100CSM)]SB]5",,5,5,0,...,,,,"GV[[0(400),90(400)WR],[(100CSM)]SB]5",-78,78.0000,156.0000,,,0/90/0/0/90/0/0/90/0/0/90/0/0/90/0
883,10620,620,FACT,,DNV,"[[0(400),90(400)WR],[(100CSM)]SB]5",,5,5,0,...,,,,"GP[[0(400),90(400)WR],[(100CSM)]SB]5",-58.4666,58.4666,116.9332,,,0/90/0/0/90/0/0/90/0/0/90/0/0/90/0
