## Initialize

In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
from utils import *
import matplotlib.pyplot as plt
import matplotlib

data_path = '../data/Upwind_combine.xlsx'

df_all = pd.read_excel(
    data_path, engine='openpyxl', sheet_name=None)['Sheet1']

In [2]:
name_mapping = {
    # 'Table', ##### Not useful
    'OptiDAT nr.': 'OptiDat Test Number',
    # 'identification', ##### Not useful
    # 'Property Plate', ##### Used to generate Material_Code
    # 'laboratory', ##### Not useful
    # 'angle': 'Load Angle',
    # 'geometry', ##### Used to generate Material_Code
    # 'material', ##### Not useful
    'average thicknessmiddle': 'Thickness',
    'widthmiddle': 'Width',
    'area[mm2]': 'Area',
    'length total[mm]': 'Length',
    'length gaugeaverage': 'Gauge Length',
    # 'workpackageI', ##### Not useful
    # 'start date', ##### Not useful
    # 'end date', ##### Not useful
    # 'test type', ##### Identify type
    'R-value': 'R-value',
    # 'load (max)[kN]': 'Maximum Load', ##### Using Maximum Stress directly
    # 'average_emax': 'Average Strain',
    # 'poissonaverage': 'Average Poisson Ratio',
    'σmax[MPa]': 'Maximum Stress',
    # 'G[Gpa]': 'Shear Modulus',
    'cycles to failurefatigue': 'Cycles to Failure',
    # 'runout', ##### Not useful
    # 'loading rate[mm/min]': 'Displacement Rate',
    'loading rate[Hz]': 'Frequency',
    # 'E_avg[GPa]': 'Modulus (Tensile or Compressive)', ##### Fill Static Elastic/Compressive Modulus.
    # 'machine', ##### Not useful
    # 'control mode', ##### Not useful
    # 'special fixture', ##### Not useful
    # 'tabs remarks', ##### Not useful
    'T02 max[ºC]': 'Temperature',
    # 'Environment', ##### Most of them are "Ambient"
    # 'reference document(s)', ##### Not useful
    # 'remarks', ##### Not useful
    # 'invalid', ##### Not useful
    # 'strain gaugesmiddle', ##### Not useful
    # 'grip pressure[MPa]', ##### How to use it?
    # 'number of layers': 'Number of Layers',
    'tab thickness': 'Tab Thickness',
    # 'Plate-Fibre Type',
    # 'Plate-Mould', ##### Not useful
    # 'Plate-Date of Arrival', ##### Not useful
    # 'Plate-Glass Density', ##### To calculate Volume Fraction
    # 'Plate-Laminate-Width': 'Laminate Width', ##### Use Width directly.
    # 'Plate-Fibre Weight': 'Fibre Weight', ##### Use Fibre Weight Fraction
    # 'Plate-Project', ##### Not useful
    # 'Plate-Laminate-Thickness Adjustment Mould': 'Thickness Adjustment Mould', ##### not useful
    # 'Plate-Injection Date', ##### Not useful
    # 'Plate-Resin System-Mix Ratio', ##### Most of them are the same
    # 'Plate-Material', ##### Not useful
    # 'Plate-Weaver', ##### Not useful
    # 'Plate-Laminate-Length': 'Laminate Length', misleading
    # 'Plate-Remarks', ##### Not useful
    # 'Plate-Resin Density(liquid)', ##### To calculate Volume Fraction
    # 'Plate-Fibre Volume Fraction', ##### Incomplete, recalculate
    # 'Plate-Vacuum-Curing', ##### Most of them are the same
    # 'Plate-Layers', ##### Replicated
    # 'Plate-article nr.', ##### Not useful
    # 'Plate-Prepared Resin Mixture-Resin': 'Resin Mixture Resin Weight', ##### Not useful
    # 'Plate-Post-cure', ##### How to use it?
    # 'Plate-Resin System-Resin', ##### Not useful
    # 'Plate-Vacuum-Injection': 'Vacuum Injection', ##### Not useful
    # 'Plate-Prepared Resin Mixture-Hardener': 'Resin Mixture Hardener Weight', ##### Not useful
    # 'Plate-Lay-up': 'Lay-up',
    # 'Plate-Curing Tabs Glue ', ##### How to use it?
    # 'Plate-Fibre Mass', ##### To calculate Volume Fraction
    'Plate-Fibre Weight Percentage': 'Fibre Weight Fraction',
    # 'Plate-Delta Cp', ##### Not useful
    # 'Plate-Fibre Weight Fraction', ##### Too many absence
    # 'Plate-Tg', ##### Too many absence
    # 'Plate-Prepared Resin Mixture-Mix': 'Resin Mixture Weight', ##### Not useful
    # 'Plate-Void Content', ##### Too many absence
    # 'Plate-Prepared Resin Mixture-Ratio': 'Resin Mixture Hardener Fraction', ##### Not useful
    # 'Plate-Resin System-Hardener', ##### Not useful
    # 'Plate-Material Density', ##### Too many absence
    # 'Plate-Resin Density(cured)', ##### Data seems not compatible with Resin Density(liquid)
    # 'Plate-Operater' ##### Not useful
    ################### Added features ###################
    # 'Maximum Tensile Stress': 'Maximum Tensile Stress',
    # 'Maximum Compressive Stress': 'Maximum Compressive Stress',
    'Static Maximum Tensile Stress': 'Static Maximum Tensile Stress',
    'Static Maximum Compressive Stress': 'Static Maximum Compressive Stress',
    'Minimum Stress': 'Minimum Stress',
    'Static Elastic Modulus': 'Static Elastic Modulus',
    'Static Compressive Modulus': 'Static Compressive Modulus',
}

## Plot initial absence ratio

In [3]:
plot_absence(df_all, name_mapping, '../data/Upwind_absence_ratio_initial.png', fontsize=12)

In [4]:
modify_col(df_all, 'widthmiddle', cal_fraction, s=' / ')
modify_col(df_all, 'widthmiddle', cal_fraction, s='/')
modify_col(df_all, 'T02 max[ºC]', remove_strs)
modify_col(df_all, 'Plate-Fibre Weight Percentage', remove_strs)
modify_col(df_all, 'Plate-Glass Density', remove_strs)
modify_col(df_all, 'Plate-Laminate-Width', remove_strs)
modify_col(df_all, 'Plate-Fibre Weight', remove_strs)
modify_col(df_all, 'Plate-Laminate-Thickness Adjustment Mould', remove_strs)
modify_col(df_all, 'Plate-Laminate-Length', remove_strs)
modify_col(df_all, 'Plate-Fibre Mass', remove_strs)

In [5]:
# Add material code
material_code = [str(df_all.loc[x, 'Property Plate']) + str(df_all.loc[x, 'geometry']) for x in range(len(df_all))]
df_all['Material_Code'] = material_code

In [6]:
# Divide Static Maximum Stress into Tensile and Compressive Stress
df_all['Maximum Tensile Stress'] = np.nan
df_all['Maximum Compressive Stress'] = np.nan
df_all['Elastic Modulus'] = np.nan
df_all['Compressive Modulus'] = np.nan
for idx in range(len(df_all)):
    if df_all.loc[idx,'σmax[MPa]']<0:
        df_all.loc[idx,'Maximum Compressive Stress']=df_all.loc[idx,'σmax[MPa]']
        df_all.loc[idx,'Compressive Modulus']=df_all.loc[idx,'E_avg[GPa]']
    else:
        df_all.loc[idx,'Maximum Tensile Stress']=df_all.loc[idx,'σmax[MPa]']
        df_all.loc[idx,'Elastic Modulus']=df_all.loc[idx,'E_avg[GPa]']

In [7]:
# If R>1, max stress must be negative
where_R1 = []
for idx,x in enumerate(df_all['R-value']):
    if not isinstance(x,str) and x>1:
        where_R1.append(idx)
where_R1 = np.array(where_R1)
df_all.loc[where_R1,'σmax[MPa]'] = -np.abs(df_all.loc[where_R1,'σmax[MPa]'])

In [8]:
# Calculate Minimum Stress using R-value and Maximum Stress
df_all['Minimum Stress'] = df_all['R-value'] * df_all['σmax[MPa]']

In [9]:
# Remove tests with temperature control or humidity control
df_all = df_all.loc[np.where(df_all['Environment'] == 'Ambient')[0],:].copy()
df_all.reset_index(drop=True, inplace=True)

## Extract static and fatigue experiments respectively

In [10]:
if 'Plate-Lay-up' in df_all.columns:
    code2seq_dict = {}
    layups = df_all['Plate-Lay-up'].values
    for layer in list(set(layups)):
        code2seq_dict[layer] = code2seq(layer)
    seq = []
    for layer in layups:
        seq.append('/'.join([str(x) for x in code2seq(layer)]))
    df_all['Sequence'] = seq

In [11]:
static_indexes = np.union1d(np.where(df_all['test type']=='STT')[0], np.where(df_all['test type']=='STC')[0])
non_static_indexes = np.where(df_all['test type']=='CA')[0]

df_tmp = df_all.copy()

df_static = df_tmp.loc[static_indexes].copy()
df_fatigue = df_tmp.loc[non_static_indexes].copy()

df_static.reset_index(drop=True, inplace=True)
df_fatigue.reset_index(drop=True, inplace=True)

df_fatigue = replace_column_name(df_fatigue, name_mapping)
df_static = replace_column_name(df_static, name_mapping)

In [12]:
static_mat_lay = df_static['Material_Code'].values
static_properties = {}

static_features = ['Maximum Tensile Stress', 'Maximum Compressive Stress', 'Elastic Modulus', 'Compressive Modulus']

for material in list(set(static_mat_lay)):
    where_material = np.where(static_mat_lay == material)[0]
    # print(material, len(where_material))
    material_data = df_static.loc[where_material, static_features].copy()
    material_data.reset_index(drop=True, inplace=True)
    material_df = {}
    for feature in static_features:
        for idx in range(len(material_data[feature])):
            if type(material_data.loc[idx, feature]) == str:
                material_data.loc[idx, feature] = np.nan

        presence_indexes = np.where(material_data[feature])[0]
        mean_value = np.mean(material_data.loc[presence_indexes, feature])
        material_df[feature] = mean_value

    material_df = pd.DataFrame(material_df, index=[0])
    static_properties[material]=material_df

########### modify for another dataset ##############
fatigue_static_features = ['Static '+x if 'Static' not in x else x for x in static_features]
#####################################################
fatigue_mat_lay = df_fatigue['Material_Code'].values

for feature in fatigue_static_features:
    if feature not in list(df_fatigue.columns):
        df_fatigue[feature] = np.nan

for material in list(set(static_mat_lay)):
    where_material = np.where(fatigue_mat_lay == material)[0]
    if len(where_material) > 0:
        static_property = static_properties[material]
        for feature in static_features:
            ########### modify for another dataset ##############
            name = 'Static '+feature if 'Static' not in feature else feature
            feature_absence = np.where(pd.isna(df_fatigue[name]))[0]
            to_assign = np.intersect1d(where_material, feature_absence)
            #####################################################
            df_fatigue.loc[to_assign,name] = static_property[feature].values[0]


In [13]:
where_E_avg_notna = np.where(pd.notna(df_fatigue['E_avg[GPa]']))[0]
df_fatigue.loc[where_E_avg_notna, 'Static Elastic Modulus'] = df_fatigue.loc[where_E_avg_notna, 'E_avg[GPa]']
df_fatigue.loc[where_E_avg_notna, 'Static Compressive Modulus'] = df_fatigue.loc[where_E_avg_notna, 'E_avg[GPa]']

In [14]:
# Remove runout tests
df_fatigue = df_fatigue.loc[np.where(df_fatigue['runout'] != 'y')[0],:].copy()
df_fatigue.reset_index(drop=True, inplace=True)

In [15]:
df_static['log(Static Maximum Tensile Stress)'] = np.log10(df_static['Maximum Tensile Stress'].values.astype(float))

In [16]:
df_fatigue['log(Cycles to Failure)'] = np.log10(df_fatigue['Cycles to Failure'].values.astype(float))

In [17]:
df_fatigue.to_excel('../data/Upwind_fatigue.xlsx', engine='openpyxl', index=False)
df_static.to_excel('../data/Upwind_static.xlsx', engine='openpyxl', index=False)

In [18]:
df_static = replace_column_name(df_static, name_mapping)
averaging(df_fatigue, measure_features=['Maximum Stress', 'Minimum Stress', 'Frequency']).to_excel('../data/Upwind_avg_fatigue.xlsx', engine='openpyxl', index=False)
averaging(df_static, measure_features=['loading rate[mm/min]']).to_excel('../data/Upwind_avg_static.xlsx', engine='openpyxl', index=False)

  0%|          | 0/68 [00:00<?, ?it/s]

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


  0%|          | 0/56 [00:00<?, ?it/s]

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


In [19]:
plot_absence(df_fatigue, name_mapping, '../data/Upwind_absence_ratio.png', fontsize=5)

In [20]:
plot_absence(df_static, name_mapping, '../data/Upwind_static_absence_ratio.png', fontsize=5)