In [59]:
import pandas as pd
import re

# Load the CSV file
file_path = 'C:\\Users\\q\\Downloads\\WF data3.csv'  # Adjust based on your local path
df = pd.read_csv(file_path, header=None)

# Define the target columns, including the correct number of columns for each variable with _X, _Y, _Z
# Updated Lead_Leg_GRF_Mag@Max_Shoulder_Rot to have only _X and no _Y, _Z
target_columns = [
    "Lead_Leg_GRF_mag_max_X", "Lead_Leg_GRF_mag_max_Y", "Lead_Leg_GRF_mag_max_Z",
    "Lead_Leg_GRF_max_X", "Lead_Leg_GRF_max_Y", "Lead_Leg_GRF_max_Z",
    "Lead_Leg_GRF_min_X", "Lead_Leg_GRF_min_Y", "Lead_Leg_GRF_min_Z",
    "Lead_Leg_GRF_mag_Midpoint_FS_Release_X", "Lead_Leg_GRF_mag_Midpoint_FS_Release_Y", "Lead_Leg_GRF_mag_Midpoint_FS_Release_Z",
    "Lead_Leg_GRF@Midpoint_FS_Release_X", "Lead_Leg_GRF@Midpoint_FS_Release_Y", "Lead_Leg_GRF@Midpoint_FS_Release_Z",
    "Lead_Leg_GRF@Max_Shoulder_Rot_X", "Lead_Leg_GRF@Max_Shoulder_Rot_Y", "Lead_Leg_GRF@Max_Shoulder_Rot_Z",
    "Lead_Leg_GRF_Mag@Max_Shoulder_Rot_X",  # Only _X for this column
    "Trunk_COG@Footstrike_X", "Trunk_COG@Footstrike_Z", "Trunk_COG@Footstrike_Y", 
    "Trunk_COG@Release_X", "Trunk_COG@Release_Z", "Trunk_COG@Release_Y", 
    "Trunk_COG_Translation_X", "Trunk_COG_Translation_Z", "Trunk_COG_Translation_Y", 
    "HEIGHT", "MASS"
]

# Step 1: Copy row 1 (e.g., filenames or identifiers) into column A
filenames_row = df.iloc[0, 1:]  # Skip the first column (A)

# Step 2: Extract row 2 (core labels) and row 5 (subset labels like "X", "Y", "Z") and row 6 (data to paste)
variable_names_row = df.iloc[1, 1:]  # Row 2, skip first column (A)
subset_labels_row = df.iloc[4, 1:]  # Row 5, skip first column (A)

# Remove the trailing numbers from the variable names in row 2
core_variable_names = variable_names_row.apply(lambda x: re.sub(r'_\d+_\d+', '', str(x)))

# Step 3: Extract data from row 6 (to paste) and initialize an empty DataFrame for the restructured data
data_row = df.iloc[5, 1:]  # Row 6, skip first column (A)
df_restructured = pd.DataFrame(columns=['Filename'] + target_columns)

# Step 4: Cut the data in blocks of 17 cells and paste them into the correct columns
block_size = 17
for block_start in range(0, len(data_row), block_size):
    # Extract the current block of data (cutting out 17 cells at a time)
    data_block = data_row[block_start:block_start + block_size]
    
    # Initialize matched data row with None values
    matched_data = [None] * len(target_columns)
    
    # Step 5: Paste the block into the correct columns:
    # Apply the exact logic for both "Trunk_COG" and "Lead_Leg_GRF" variables, checking both row 2 and row 5
    for i, (core_name, subset_label) in enumerate(zip(core_variable_names, subset_labels_row)):
        if i >= block_start and i < block_start + block_size:
            for j, target_col in enumerate(target_columns):
                # Handle "Trunk_COG" and "Lead_Leg_GRF" variables with "_X", "_Y", "_Z" matching both row 2 and row 5
                if "Trunk_COG" in target_col or "Lead_Leg_GRF" in target_col:
                    # Match both the core name (row 2) and the subset label (row 5)
                    if core_name in target_col and subset_label in target_col:
                        matched_data[j] = data_block.iloc[i - block_start]
                        break
                # For "Lead_Leg_GRF_Mag@Max_Shoulder_Rot" only match _X, no _Y or _Z
                elif "Lead_Leg_GRF_Mag@Max_Shoulder_Rot_X" in target_col and "Lead_Leg_GRF_Mag@Max_Shoulder_Rot" in core_name:
                    matched_data[j] = data_block.iloc[i - block_start]
                    break
                # For other variables without subset labels, just match core name (row 2)
                elif target_col in core_name:
                    matched_data[j] = data_block.iloc[i - block_start]
                    break
    
    # Step 6: Create a new DataFrame row with the filename and matched data
    filename = filenames_row.iloc[block_start] if block_start < len(filenames_row) else None
    new_row = pd.DataFrame([[filename] + matched_data], columns=df_restructured.columns)
    
    # Append the new row to the restructured DataFrame
    df_restructured = pd.concat([df_restructured, new_row], ignore_index=True)

# Step 7: Save the restructured file to the specified path
output_path = 'C:/Users/q/Downloads/restructured_file_with_final_lead_leg_mag_x.csv'
df_restructured.to_csv(output_path, index=False)

# Output the file path for verification
print(f"Restructured file saved to: {output_path}")


Restructured file saved to: C:/Users/q/Downloads/restructured_file_with_final_lead_leg_mag_x.csv


In [13]:
# Load the CSV file
file_path = 'C:\\Users\\q\\Downloads\\WF data2.csv'  # Adjust based on your local path
df = pd.read_csv(file_path, header=None)

# Define the target columns, including both core names and subset labels for "Trunk_COG" like "X", "Y", and "Z"
target_columns = [
    "Lead_Leg_GRF_mag_max", "Lead_Leg_GRF_max", "Lead_Leg_GRF_min", 
    "Lead_Leg_GRF_mag_Midpoint_FS_Release", "Lead_Leg_GRF@Midpoint_FS_Release", "Lead_Leg_GRF@Midpoint_FS_Release",
    "Lead_Leg_GRF@Max_Shoulder_Rot", "Lead_Leg_GRF@Max_Shoulder_Rot", "Lead_Leg_GRF@Max_Shoulder_Rot", 
    "Trunk_COG@Footstrike_X", "Trunk_COG@Footstrike_Z", "Trunk_COG@Footstrike_Y", 
    "Trunk_COG@Release_X", "Trunk_COG@Release_Z", "Trunk_COG@Release_Y", 
    "Trunk_COG_Translation_X", "Trunk_COG_Translation_Z", "Trunk_COG_Translation_Y", 
    "HEIGHT", "MASS"
]

# Step 1: Copy row 1 (e.g., filenames or identifiers) into column A
filenames_row = df.iloc[0, 1:]  # Skip the first column (A)

# Step 2: Extract row 2 (core labels) and row 5 (subset labels "X", "Y", "Z") and row 6 (data to paste)
variable_names_row = df.iloc[1, 1:]  # Row 2, skip first column (A)
subset_labels_row = df.iloc[4, 1:]  # Row 5, skip first column (A)
core_variable_names = variable_names_row.apply(lambda x: re.sub(r'_\d+_\d+', '', str(x)))

# Step 3: Extract data from row 6 (to paste) and initialize an empty DataFrame for the restructured data
data_row = df.iloc[5, 1:]  # Row 6, skip first column (A)
df_restructured = pd.DataFrame(columns=['Filename'] + target_columns)

# Step 4: Cut the data in blocks of 17 cells and paste them into the correct columns
block_size = 17
for block_start in range(0, len(data_row), block_size):
    # Extract the current block of data (cutting out 17 cells at a time)
    data_block = data_row[block_start:block_start + block_size]
    
    # Initialize matched data row with None values
    matched_data = [None] * len(target_columns)
    
    # Step 5: Paste the block into the correct columns:
    # For "Trunk_COG" variables, match both row 2 and row 5 (core name + subset label)
    # For other variables, only match row 2 (core name)
    for i, (core_name, subset_label) in enumerate(zip(core_variable_names, subset_labels_row)):
        if i >= block_start and i < block_start + block_size:
            for j, target_col in enumerate(target_columns):
                # Handle "Trunk_COG" variables - check both core name and "x"/"y"/"z" in row 5
                if "Trunk_COG" in target_col:
                    # Match both the core name (row 2) and the subset label (row 5)
                    if core_name in target_col and subset_label in target_col:
                        matched_data[j] = data_block.iloc[i - block_start]
                        break
                # For non-"Trunk_COG" variables, just match core name (row 2)
                elif target_col in core_name:
                    matched_data[j] = data_block.iloc[i - block_start]
                    break
    
    # Step 6: Create a new DataFrame row with the filename and matched data
    filename = filenames_row.iloc[block_start] if block_start < len(filenames_row) else None
    new_row = pd.DataFrame([[filename] + matched_data], columns=df_restructured.columns)
    
    # Append the new row to the restructured DataFrame
    df_restructured = pd.concat([df_restructured, new_row], ignore_index=True)

# Step 7: Save the restructured file to the specified path
output_path = 'C:/Users/q/Downloads/restructured_file_fixed_columns_correct_final.csv'
df_restructured.to_csv(output_path, index=False)

# Output the file path for verification
print(f"Restructured file saved to: {output_path}")


Raw content of row 1:
1        Bryant, Luke 08-24\Baseball Right-handed\Repor...
2        Bryant, Luke 08-24\Baseball Right-handed\Repor...
3        Bryant, Luke 08-24\Baseball Right-handed\Repor...
4        Bryant, Luke 08-24\Baseball Right-handed\Repor...
5        Bryant, Luke 08-24\Baseball Right-handed\Repor...
                               ...                        
13036    Tanner Myatt\Baseball Right-handed\Report_TM.c...
13037    Tanner Myatt\Baseball Right-handed\Report_TM_e...
13038    Tanner Myatt\Baseball Right-handed\Report_TM_e...
13039    Webb, Chase\Baseball Right-handed\Report_CW.cm...
13040    Webb, Chase\Baseball Right-handed\Report_CW.cm...
Name: 0, Length: 13040, dtype: object
Columns identified as Fastball:
1        Bryant, Luke 08-24\Baseball Right-handed\Repor...
2        Bryant, Luke 08-24\Baseball Right-handed\Repor...
3        Bryant, Luke 08-24\Baseball Right-handed\Repor...
4        Bryant, Luke 08-24\Baseball Right-handed\Repor...
5        Bryant, Luke 0