In [2]:
import pandas as pd
import numpy as np
import glob
import os
import re

In [3]:
input_directory = "../data/distance_data/"
timecard_path = "../data/timecards/combined_timecard_3seconds.csv"
output_file = "../data/combined_distance_table.csv"

# Load the timecard CSV
timecard_df = pd.read_csv(timecard_path)

# Prepare the timecard for case-insensitive matching
timecard_df["Lowercase Label"] = timecard_df["Label"].str.lower()

In [4]:

rows = []
dropped_rows = []
def getBridgeName(file_path):
    if "Easy 1" in file_path:
        return "Bridge 1"
    if "Easy 2" in file_path:
        return "Bridge 2"
    if "Hard 1" in file_path:
        return "Bridge 3"
    if "Hard 2" in file_path:
        return "Bridge 4"
    return "Unknown Bridge"
    
# Iterate over each file in the directory
for file_path in glob.glob(os.path.join(input_directory, "*.csv")):
    df = pd.read_csv(file_path, skiprows=0, keep_default_na=False)
    bridge = getBridgeName(file_path)  # Derive bridge name from file name
    
    for index, row in df.iterrows():
        respondent = int(row["Respondent Name"])
        
        # Iterate over crack-related columns
        for col in df.columns:
            if col.startswith("crack") and row[col] != "":
                try:
                    distance = float(row[col])  # Get the value for this crack
                except ValueError:
                    continue  # Skip if the value is invalid
                
                # Prepare for matching
                crack_name = col.lower()  # Convert to lowercase for case-insensitive matching
                
                matched_row = timecard_df[
                    (timecard_df["Study Name"] == bridge) &
                    (timecard_df["Respondent"] == respondent) &
                    (timecard_df["Lowercase Label"].str.contains(rf'\b{re.escape(crack_name)}\b', na=False, regex=True))
                ]
                # Replace AOI Label with matched Label
                if not matched_row.empty:
                    aoi_label = matched_row["Label"].values[0]  # Use the matched label from timecard
                    if bridge == "Bridge 2" and respondent == 20024 and crack_name == "crack 1":
                        print(aoi_label)
                    
                    # Append the new row to the transformed data
                    rows.append({
                        "Study Name": bridge,
                        "Respondent Name": respondent,
                        "Label": aoi_label,  # Use the column name as the AOI Label
                        "Distance": distance
                    })
                
                else:
                    dropped_rows.append((bridge, respondent, col))
                    continue
                    # aoi_label = col  # Keep the original label if no match
                    # print(bridge, respondent, col)
                    
                

# Create a new DataFrame from the transformed data
df = pd.DataFrame(rows)
print(dropped_rows)
print(len(dropped_rows))
        

Crack 1 Hit
[('Bridge 3', 20002, 'crack 4'), ('Bridge 3', 20002, 'crack 18'), ('Bridge 3', 20009, 'crack 12'), ('Bridge 3', 20016, 'crack 10'), ('Bridge 3', 20023, 'crack 18'), ('Bridge 3', 20026, 'crack 10'), ('Bridge 3', 20027, 'crack 10'), ('Bridge 3', 20027, 'crack 16'), ('Bridge 3', 20032, 'crack 10'), ('Bridge 3', 20036, 'crack 18'), ('Bridge 3', 20036, 'crack 20'), ('Bridge 3', 20037, 'crack 18'), ('Bridge 3', 20044, 'crack 5'), ('Bridge 4', 20004, 'crack 14'), ('Bridge 4', 20006, 'crack 16'), ('Bridge 4', 20009, 'crack 15'), ('Bridge 4', 20009, 'crack 16'), ('Bridge 4', 20010, 'crack 14'), ('Bridge 4', 20010, 'crack 15'), ('Bridge 4', 20015, 'crack 16'), ('Bridge 4', 20016, 'crack 10'), ('Bridge 4', 20017, 'crack 11'), ('Bridge 4', 20017, 'crack 15'), ('Bridge 4', 20017, 'crack 18'), ('Bridge 4', 20021, 'crack 13'), ('Bridge 4', 20021, 'crack 17'), ('Bridge 4', 20022, 'crack 14'), ('Bridge 4', 20023, 'crack 13'), ('Bridge 4', 20023, 'crack 14'), ('Bridge 4', 20024, 'crack 12'),

In [14]:
# List of conditions to omit
omit_conditions = [
    (df["Label"].str.contains("Crack 3")) & (df["Study Name"] == "Bridge 1"),
    (df["Label"].str.contains("Crack 3")) & (df["Study Name"] == "Bridge 2"),
    (df["Label"].str.contains("Crack 10")) & (df["Study Name"] == "Bridge 2"),
    (df["Label"].str.contains("Crack 14")) & (df["Study Name"] == "Bridge 2"),
    (df["Label"].str.contains("Crack 15")) & (df["Study Name"] == "Bridge 2"),
    (df["Label"].str.contains("Crack 19")) & (df["Study Name"] == "Bridge 2"),
    (df["Label"].str.contains("Crack 4")) & (df["Study Name"] == "Bridge 3"),
    (df["Label"].str.contains("Crack 5")) & (df["Study Name"] == "Bridge 3"),
    (df["Label"].str.contains("Crack 17")) & (df["Study Name"] == "Bridge 3"),
    (df["Label"].str.contains("Crack 20")) & (df["Study Name"] == "Bridge 3"),
    (df["Label"].str.contains("Crack 4")) & (df["Study Name"] == "Bridge 4"),
    (df["Label"].str.contains("Crack 15")) & (df["Study Name"] == "Bridge 4"),
    (df["Label"].str.contains("Crack 16")) & (df["Study Name"] == "Bridge 4")
]

# Combine all conditions with OR
combined_condition = omit_conditions[0]
for condition in omit_conditions[1:]:
    combined_condition |= condition

# Omit rows based on combined conditions
df_filtered = df[~combined_condition]

df_filtered.to_csv("../data/finalized_data/combined_distance_table.csv", index=False)