In [28]:
import pandas as pd
import numpy as np


# Given the explanation of the header, we will define a function to parse it and extract the relevant information.
# Function to parse header and extract relevant information
def parse_header(header):
    parts = header.split('_')
    return {
        "architectural_archetype": parts[0],
        "stories": int(parts[1]),
        "soil_class": parts[4],
        "seismic_zone": int(parts[6]),
        "connection_system": parts[8]
    }

# Function to fill values based on the last non-NaN value in the key column
def fill_values_based_on_key(data, key_column_index, value_row_index, finishing_row_informationB):
    last_valid_key = None
    for i in range(value_row_index, finishing_row_informationB):
        key_value = data.iat[i, key_column_index]
        if pd.notna(key_value):
            last_valid_key = key_value
        if pd.isna(key_value):
            data.iat[i, key_column_index] = last_valid_key


#Since we added the archetype ID in whatever order we need to find it in the performance file
def find_performance_using_header(data_D, header): 
    starting_column_D = 2
    row_header = data_D[data_D.iloc[:, starting_column_D] == header].index[0]
    relevant_columns = [1, 2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 16, 17]
    relevant_columns_D = [col + starting_column_D for col in relevant_columns]   
    row_header_D = data_D.iloc[row_header, relevant_columns_D].tolist()
    
    return row_header_D


def prepare_data_to_csv3(file_path, sheet_name, data_D) :
    
    # Load the Excel file
    # For this file we will read it with a header since it is easier to extract the information of type A
    data = pd.read_excel(file_path, sheet_name=sheet_name)
     
    # To find the number of buildings, we need to find how many unamed columns there are
    nbr_building = len([col for col in data.columns if not 'Unnamed' in str(col)])

    #general parameters to understand better to not have random parameters
    nbr_story = 5 #this parameter is not used in my code but maybe in another
    starting_row_informationB = 14 #normally same for all files
    size_columns_informationA = 5 #normally same for all files
    row_Tx_Ty_values = 12

    # Extract Type A Information

    # Step 1: Parsing headers and store the results
    parsed_data = []
    for i in range(1, nbr_building+1):
        header = data[i][1]
        parsed_data.append([parse_header(header), header])

    # Step 2: Defining the desired columns
    columns = ["architectural_archetype", "stories", "soil_class", "seismic_zone", "connection_system"]

    # Step 3: Create a new table and populate it with the parsed data
    new_table = []
    for item in parsed_data:
        row = [item[0][col] for col in columns]
        new_table.append(row + [item[1]])

    # 'new_table' now contains the parsed data structured as rows and columns
    df = pd.DataFrame(new_table, columns=columns + ["header"])


    data2 = pd.read_excel(file_path,sheet_name=sheet_name, header=None)

    # We want to find the finishing row of the tables (we don't know sine it is an excel file without headers)
    # We want to consider rows that have at least one non-NaN value:
    finishing_row_informationB = data.dropna(how='all').index[-1] + 1

    #There is an exception for file_path './Design_C_ATS.xlsx'. There are additional informations only in this file that is not needed.
    if(file_path == './Design_C_ATS.xlsx' and sheet_name == 0):
        finishing_row_informationB = 284


    # filling the values of story and direction
    story_index = 3
    direction_index = 4
    fill_values_based_on_key(data2, story_index, starting_row_informationB, finishing_row_informationB)
    fill_values_based_on_key(data2, direction_index, starting_row_informationB, finishing_row_informationB)


    repetitions = finishing_row_informationB - starting_row_informationB
    d_all = []
    columns_all = []
    for i in range(nbr_building) :
        d_all_bis = []
        for j in range(starting_row_informationB, finishing_row_informationB+1) :
            d_all_bis += [data2.iat[j,6], data2.iat[j,7], data2.iat[j,8], 
                          data2.iat[j,9 + size_columns_informationA * i],data2.iat[j,10 + size_columns_informationA * i],
                          data2.iat[j,11 + size_columns_informationA * i],data2.iat[j,12 + size_columns_informationA * i],
                          data2.iat[j,13 + size_columns_informationA * i]
                         ]
            if i==0 :
                name_plus = '_' + str(data2.iat[j,3])+ '_' + str(data2.iat[j,4]) +'_'+ str(data2.iat[j,5])
                columns_all += ['L cm' + name_plus,
                                'xi cm'+ name_plus,
                                'yi cm'+ name_plus,
                                "Nail spacing [cm]" + name_plus ,
                                "Number sheathing panels"+ name_plus,
                                "Number end studs"+name_plus,
                                "Total number studs"+name_plus,
                                "HoldDown Model / ATS "+name_plus
                               ]

        d_all.append(d_all_bis)

    df_all = pd.DataFrame(d_all, columns=columns_all)
    
    
    #Add type D information (for the second prediction)

    finishing_row_informationD = data_D.dropna(how='all').index[-1] + 1
    columns_D = ['Ωx', 'Ωy', 'µx', 'µy', 'CMR', 'SSF', 'ACMR', 'IO-ln θ','IO-β',
           'LS-ln θ','LS-β', 'CP-ln θ','CP-β']
    
    all_rows_data = []
    for index, row in df.iterrows():
        # Taking the header in each row
        header_value = row['header']
        
        # Find corresponding data in data_D using the header value
        row_header_D = find_performance_using_header(data_D, header_value)
        
        # Append the found data to the list
        all_rows_data.append(row_header_D)
    
    df_D = pd.DataFrame(all_rows_data, columns=columns_D)
   

    unique_values = data2.iloc[:, 3].unique()[2:]
    d_plus_quarter_l_values = np.zeros((nbr_building, len(unique_values)))
    story_area_values = np.zeros((nbr_building, len(unique_values)))
    Tx_values = []
    Ty_values = []

    # Loop through the specified ranges and compute the values
    for i in range(0, nbr_building):
        #we start at two because we remove nan values

        for j, value in enumerate(unique_values):
            story = int(value)
            d_plus_quarter_l =  data2.iat[4 + story, 11 + size_columns_informationA * i]
            d_plus_quarter_l_values[i,j] = d_plus_quarter_l

            # Extract Story Area value and append to the list
            story_area = data2.iat[4 + story, 13 + size_columns_informationA * i]
            story_area_values[i,j] = story_area

        Tx_values.append(data2.iat[row_Tx_Ty_values, 9 + size_columns_informationA * i])
        Ty_values.append(data2.iat[row_Tx_Ty_values, 10 + size_columns_informationA * i])

    df = df.drop('header', axis=1)
    # Concatenate horizontally, ensuring that both dataframes have the same index
    resultFinal = pd.concat([df, df_all], axis=1, ignore_index=False)


    df_d_plus_quarter_l = pd.DataFrame(d_plus_quarter_l_values)
    # Rename the columns based on your naming convention
    df_d_plus_quarter_l.columns = [f'D+0.25L {i+1}' for i in range(len(unique_values))]

    story_area_values = pd.DataFrame(story_area_values)
    story_area_values.columns = [f'Story Area {i+1}' for i in range(len(unique_values))]

    Tx_values, Ty_values = pd.DataFrame(Tx_values), pd.DataFrame(Ty_values)
    Tx_values.columns, Ty_values.columns = ['Tx(s)'] , ['Ty(s)'] 
    # Concatenate the transposed DataFrame to resultFinal
    resultFinal = pd.concat([resultFinal, df_d_plus_quarter_l, story_area_values, Tx_values, Ty_values, df_D], axis=1)

    return resultFinal


# For each file, file_path sheet (page of the excel file)
# List of Excel files
files = ['./Design_P_ATS.xlsx', './Design_P_HD.xlsx',
         './Design_D_ATS.xlsx', './Design_D_HD.xlsx',
         './Design_C_ATS.xlsx', './Design_C_HD.xlsx',
         './Design_Q_ATS.xlsx', './Design_Q_HD.xlsx']
data_D = pd.read_excel('./PerformanceResults.xlsx', header=None)

# Initialize an empty list to store file and sheet index information
Files_informations = []

# For each file, get all sheet names and add to Files_informations
for file in files:
    xls = pd.ExcelFile(file)
    sheet_names = xls.sheet_names
    for sheet_index, sheet_name in enumerate(sheet_names):
        Files_informations.append([file, sheet_index])

resultsFinal = []

# Loop through your DataFrames, reset the index, and append
for i in range(len(Files_informations)):
    resultFinal = prepare_data_to_csv3(Files_informations[i][0], Files_informations[i][1], data_D)
    resultsFinal.append(resultFinal)

# Assuming you have a list of DataFrames in resultsFinal
# Combine DataFrames into a list
dataframes_to_merge = resultsFinal

# Initialize the merged DataFrame with the first DataFrame
merged_df = dataframes_to_merge[0]

# Merge each DataFrame in the list with the merged DataFrame
for df in dataframes_to_merge[1:]:
    merged_df = pd.concat([merged_df, df], axis=0, ignore_index=True)

columns_D = ['Ωx', 'Ωy', 'µx', 'µy', 'CMR', 'SSF', 'ACMR', 'IO-ln θ','IO-β',
           'LS-ln θ','LS-β', 'CP-ln θ','CP-β']
# Get a list of column names excluding 'Tx' and 'Ty'
other_columns = [col for col in merged_df.columns if col not in ['Tx(s)', 'Ty(s)'] + columns_D]


# Reorder the columns with 'Tx' and 'Ty' as the last two columns
new_order = other_columns + ['Tx(s)', 'Ty(s)'] + columns_D
merged_df = merged_df[new_order]

prepared_file_path = 'data_D.csv'
merged_df.to_csv(prepared_file_path, index=False)

In [29]:
merged_df

Unnamed: 0,architectural_archetype,stories,soil_class,seismic_zone,connection_system,L cm_1_X_1.1,xi cm_1_X_1.1,yi cm_1_X_1.1,Nail spacing [cm]_1_X_1.1,Number sheathing panels_1_X_1.1,...,µy,CMR,SSF,ACMR,IO-ln θ,IO-β,LS-ln θ,LS-β,CP-ln θ,CP-β
0,P,5,C,3,ATS,270.0,439.0,11.0,5.0,2.0,...,2.82,2.03,1.23,3.00,-0.078,0.379,0.533,0.453,0.887,0.487
1,P,5,B,3,ATS,270.0,439.0,11.0,5.0,1.0,...,4.23,1.78,1.30,2.77,-0.698,0.250,-0.064,0.270,0.274,0.288
2,P,5,A,3,ATS,270.0,439.0,11.0,5.0,2.0,...,4.32,3.02,1.32,4.77,-0.664,0.262,-0.025,0.275,0.311,0.297
3,P,5,A,3,ATS,270.0,439.0,11.0,5.0,1.0,...,4.16,2.53,1.37,4.14,-1.016,0.352,-0.397,0.346,-0.046,0.363
4,P,5,D,1,ATS,270.0,439.0,11.0,5.0,2.0,...,4.35,2.30,1.13,3.13,-0.370,0.318,0.290,0.340,0.647,0.369
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,Q,5,C,1,HD,120.0,222.0,11.0,15.0,1.0,...,4.27,1.74,1.26,2.63,-1.389,0.370,-0.808,0.362,-0.487,0.379
196,Q,5,B,1,HD,120.0,222.0,11.0,5.0,2.0,...,4.77,3.19,1.27,4.88,-1.248,0.317,-0.673,0.324,-0.363,0.358
197,Q,5,B,1,HD,120.0,222.0,11.0,15.0,1.0,...,4.28,2.85,1.26,4.31,-1.433,0.401,-0.838,0.396,-0.525,0.389
198,Q,5,A,1,HD,120.0,222.0,11.0,5.0,2.0,...,4.26,3.89,1.26,5.87,-1.371,0.356,-0.793,0.342,-0.473,0.353
