In [223]:
import pandas as pd
import numpy as np

# Load the Excel file
# For this file we will read it with a header since it is easier to extract the information of type A
file_path = './Design_P_ATS.xlsx'
data = pd.read_excel(file_path)

In [224]:
data.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,1,...,11,Unnamed: 60,Unnamed: 61,Unnamed: 62,Unnamed: 63,12,Unnamed: 65,Unnamed: 66,Unnamed: 67,Unnamed: 68
0,,,,,,,,,,,...,,,,,,,,,,
1,,Interstory height,,,,,,,,P_5_004_6-5_C_II_3_MGP10_ATS,...,P_5_002_5-5_A_II_1_MGP10_ATS,,,,,P_5_004_6-5_A_II_1_MGP10_ATS,,,,
2,,h [cm],40.36,,,,,,,Weight per story [kgf],...,Weight per story [kgf],,,Cmin\nCmax\nQx [tonf]\nQy [tonf],Story area [cm2],Weight per story [kgf],,,Cmin\nCmax\nQx [tonf]\nQy [tonf],Story area [cm2]
3,,,,,,,,,Story,D,...,D,L,D+0.25L,,,D,L,D+0.25L,,
4,,,,,,,,,1,137713.086561,...,134142.060424,98375.19172,158735.858354,0.03,4918759.586,133547.500845,98375.19172,158141.298775,0.03,4918759.586


In [225]:
# Given the explanation of the header, we will define a function to parse it and extract the relevant information.

def parse_header(header):
    # Splitting the header string by underscores
    parts = header.split('_')
    
    # Extracting the information based on the index
    architectural_archetype = parts[0]  # C
    stories = int(parts[1])            # 3
    drift_allowance = parts[2]         # 002 (ignored for the project)
    r_factor = parts[3]                # 5-5 (ignored for the project)
    soil_class = parts[4]              # D
    occupancy_category = parts[5]      # II (ignored for the project)
    seismic_zone = int(parts[6])       # 3
    timber_quality = parts[7]          # MGP10 (ignored for the project)
    connection_system = parts[8]       # HD
    
    # Creating a dictionary to hold the extracted information
    extracted_info = {
        "architectural_archetype": architectural_archetype,
        "stories": stories,
        "soil_class": soil_class,
        "seismic_zone": seismic_zone,
        "connection_system": connection_system
    }
    
    return extracted_info

# Example usage:
header = "C_3_002_5-5_D_II_3_MGP10_HD"
parsed_info = parse_header(header)

parsed_info # This will display the parsed information from the header



{'architectural_archetype': 'C',
 'stories': 3,
 'soil_class': 'D',
 'seismic_zone': 3,
 'connection_system': 'HD'}

In [226]:
# Extract Type A Information

# Step 1: Parsing headers and store the results
parsed_data = []
for i in range(1, 13):
    header = data[i][1]
    parsed_data.append(parse_header(header))

# Step 2: Defining the desired columns
columns = ["architectural_archetype", "stories", "soil_class", "seismic_zone"]

# Step 3: Create a new table and populate it with the parsed data
new_table = []
for item in parsed_data:
    row = [item[col] for col in columns]
    new_table.append(row)

# 'new_table' now contains the parsed data structured as rows and columns
df = pd.DataFrame(new_table, columns=columns)
print(df)

   architectural_archetype  stories soil_class  seismic_zone
0                        P        5          C             3
1                        P        5          B             3
2                        P        5          A             3
3                        P        5          A             3
4                        P        5          D             1
5                        P        5          D             1
6                        P        5          C             1
7                        P        5          C             1
8                        P        5          B             1
9                        P        5          B             1
10                       P        5          A             1
11                       P        5          A             1


In [227]:
data2 = pd.read_excel(file_path, header=None)
data2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,59,60,61,62,63,64,65,66,67,68
0,,,,,,,,,,1,...,11,,,,,12,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,Interstory height,,,,,,,,P_5_004_6-5_C_II_3_MGP10_ATS,...,P_5_002_5-5_A_II_1_MGP10_ATS,,,,,P_5_004_6-5_A_II_1_MGP10_ATS,,,,
3,,h [cm],40.36,,,,,,,Weight per story [kgf],...,Weight per story [kgf],,,Cmin\nCmax\nQx [tonf]\nQy [tonf],Story area [cm2],Weight per story [kgf],,,Cmin\nCmax\nQx [tonf]\nQy [tonf],Story area [cm2]
4,,,,,,,,,Story,D,...,D,L,D+0.25L,,,D,L,D+0.25L,,


Attention
In pandas:

df[i] accesses the column with label i.

df[i][j] then accesses the j-th element (row) of that column.

In [228]:
# We have an issue where in an excel file if a column is represented by multiple rows, the first one will have the correct value and then the others will have nan values

def fill_values_based_on_key(data, key_column_index, value_column_index):
    """
    Fill the values in the value column based on the last non-NaN value in the key column.

    :param data: The 2D list (list of lists) representing the data.
    :param key_column_index: The index of the column to use as the key.
    :param value_column_index: The index of the column where values are to be filled.
    :return: None; the operation modifies the data list in place.
    """
    last_valid_key = None
    for i in range(value_column_index, 304):
        key_value = data.iat[i, key_column_index]
        if pd.notna(key_value):  # Check if the key column value is not NaN
            last_valid_key = key_value
        if pd.isna(key_value):  # Check if the value column is NaN
            data.iat[i, key_column_index] = last_valid_key

# Assuming data2 is a list of lists representation of your Excel data
fill_values_based_on_key(data2, 3, 14)
fill_values_based_on_key(data2, 4, 14)
data2[3][206]

4

Create the new panda table

In [149]:
print(data2.iloc[13:304,3:8])

         3          4     5       6        7
13   Story  Direction  Wall  L [cm]  xi [cm]
14       1          X   1.1     270    439.0
15       1          X   1.2     270     1503
16       1          X   2.1     149      886
17       1          X   2.2     149     1057
..     ...        ...   ...     ...      ...
299      5          Y   J.1     284   1769.0
300      5          Y   J.2     276   1769.0
301      5          Y   J.3     422   1769.0
302      5          Y   J.4     276   1769.0
303      5          Y   J.5     284   1769.0

[291 rows x 5 columns]


In [244]:
repetitions = 304 - 13 - 1

#Add type A information
# Repeat each row in the DataFrame
repeated_df = pd.DataFrame(np.repeat(df.values, repetitions, axis=0), columns=df.columns)

#Add type B information part 1
# Replicate df1 twelve times, ensuring the column names are as expected.
df1 = [data2.iloc[14:304, 3:8].rename(columns={data2.columns[3]: "Story",
                                                data2.columns[4]: "Direction",
                                                data2.columns[5]: "Wall",
                                                data2.columns[6]: "L [cm]",
                                                data2.columns[7]: "xi [cm]"}) for _ in range(12)]
#Add type C information
# Create subsets using list comprehensions for dfs with the correct column names
dfs = [data2.iloc[14:304, 9 + 5 * i:13 + 5 * i].rename(columns={data2.columns[9 + 5 * i]: "Nail spacing [cm]",
                                                                  data2.columns[10 + 5 * i]: "Number sheathing panels",
                                                                  data2.columns[11 + 5 * i]: "Number end studs",
                                                                  data2.columns[12 + 5 * i]: "Total number studs"}) for i in range(12)]


# Concatenate lists of DataFrames, ensuring the column names align.
result2 = pd.concat(df1, ignore_index=True)
result3 = pd.concat(dfs, ignore_index=True)


In [250]:
result2.head()

Unnamed: 0,Story,Direction,Wall,L [cm],xi [cm],D+0.25L,Story Area
0,1,X,1.1,270,439.0,162306.884491,4918759.586
1,1,X,1.2,270,1503.0,162306.884491,4918759.586
2,1,X,2.1,149,886.0,162306.884491,4918759.586
3,1,X,2.2,149,1057.0,162306.884491,4918759.586
4,1,X,3.1,544,272.0,162306.884491,4918759.586


In [257]:
#We have to add additional type B information (part 2)
#We will also add additional type C information (part2)
# Initialize empty lists to store the D+0.25L and Story Area values
d_plus_quarter_l_values = []
story_area_values = []
Tx_values = []
Ty_values = []

# Loop through the specified ranges and compute the values
for i in range(0, 12):
    for j in range(0, 304-14):
        # Extract D+0.25L value and append to the list 
        story = int(result2.iat[j, 0])
        
        d_plus_quarter_l =  data2.iat[4 + story, 11 + 5 * i]
        d_plus_quarter_l_values.append(d_plus_quarter_l)
        
        # Extract Story Area value and append to the list
        story_area = data2.iat[4 + story, 13 + 5 * i]
        story_area_values.append(story_area)
        
        Tx_values.append(data2.iat[12, 9 + 5 * i])
        Ty_values.append(data2.iat[12, 10 + 5 * i])
        


# Add the new columns to result2
result2['D+0.25L'] = d_plus_quarter_l_values
result2['Story Area'] = story_area_values
result3['Tx(s)'] = Tx_values
result3['Ty(s)'] = Ty_values

In [258]:
# Concatenate horizontally, ensuring that both dataframes have the same index
resultFinal = pd.concat([repeated_df, result2, result3], axis=1, ignore_index=False)

prepared_file_path = 'prepared_data_all.csv'
resultFinal.to_csv(prepared_file_path, index=False)

In [261]:
#Creating another table if we are using only information of type A to predict Tx and Ty not everything
Tx_values = []
Ty_values = []

# Loop through the specified ranges and compute the values
for i in range(0, 12): 
        Tx_values.append(data2.iat[12, 9 + 5 * i])
        Ty_values.append(data2.iat[12, 10 + 5 * i])
        
df['Tx(s)'] = Tx_values
df['Ty(s)'] = Ty_values
prepared_file_path = 'prepared_data_C_part2.csv'
df.to_csv(prepared_file_path, index=False)