In [1]:
import pandas as pd
import numpy as np
from functools import reduce
import re
import logging

In [22]:

def pivot_multi_value_columns(df, pivot_columns, delimiter=','):
    """
    Pivot the specified columns of df assuming each cell in these columns contains
    the same number of comma separated values.

    Parameters:
      df: DataFrame containing the data.
      pivot_columns: list of columns to be pivoted.
      delimiter: delimiter used in the cells (default is ',').

    Returns:
      A new DataFrame with one row per pivot element per original row and an added
      'Engine_start_count' column indicating the position.
    """
    
    # First, convert each target column into a list by splitting on the delimiter.
    for col in pivot_columns:
        df[col] = df[col].astype(str).apply(lambda x: [entry.strip() for entry in x.split(delimiter)])
    
    # Determine the number of elements per row from the first pivoted column (assuming all are equal).
    # We assume that at least one row exists and that every cell has the same length.
    n = len(df[pivot_columns[0]].iloc[0])
    
    # Build a list of DataFrames, each corresponding to one element from the lists.
    pivoted_dfs = []
    for i in range(n):
        # Create a copy for the current pivot position.
        df_temp = df.copy()
        for col in pivot_columns:
            # Replace each cell with its i-th element.
            df_temp[col] = df_temp[col].apply(lambda x: x[i] if i < len(x) else None)
        # Record the engine run event count (starting at 1).
        df_temp['Engine_start_count'] = i + 1
        pivoted_dfs.append(df_temp)
    
    # Concatenate all the temporary DataFrames into one long DataFrame.
    df_pivoted = pd.concat(pivoted_dfs, ignore_index=True)
    return df_pivoted

# Example usage:
# Specify the columns that need to be pivoted.
pivot_columns = [
    'ENL_Engine_Run_Start_Times',
    'MOP_Engine_Run_Start_Times',
    'RPM_Engine_Run_Start_Times',
    'ENL_Engine_Run_Altitudes_MSL', 
    'MOP_Engine_Run_Altitudes_MSL',
    'RPM_Engine_Run_Altitudes_MSL',
    'ENL_Engine_Run_Altitudes_AGL',
    'MOP_Engine_Run_Altitudes_AGL',
    'RPM_Engine_Run_Altitudes_AGL',
    'ENL_Noise_Registration_Times',
    'MOP_Noise_Registration_Times',
    'RPM_Noise_Registration_Times',
    'ENL_Noise_Registration_Altitudes_AGL',
    'MOP_Noise_Registration_Altitudes_AGL',
    'RPM_Noise_Registration_Altitudes_AGL',
    'engine_run_agls',
    'altitude_rate_pre_event_window (ft/s)', 
    'distance_traveled_during_window (miles)',
    'is_circling_during_pre_event_window',
    'terrain_elev_at_engine_run (ft)',
    'terrain_label_at_engine_run',
    'Engine_Start_Above_1000ft',
    'engine_run_times (s)',
    'height_gain_loss (ft)',
    'avg_speed_while_engine_running (knots)',
    'min_speed_while_engine_running (knots)',
    'max_speed_while_engine_running (knots)',
    'All_Min_TAS_knots',
    'All_Max_TAS_knots',
    'All_Avg_TAS_knots',
    'All_Post_Run_Min_TAS_knots',
    'All_Post_Run_Max_TAS_knots',
    'All_Post_Run_Avg_TAS_knots',
    'Circling_Segment_Found',
    'Min_Speed_While_Engine_Running_knots',
    'Max_Speed_While_Engine_Running_knots',
    'Avg_Speed_While_Engine_Running_knots',
    'max_noise_ENL',
    'max_noise_MOP',
    'max_noise_RPM'				    
]

# Suppose you have already loaded your tab-delimited file:
df = pd.read_csv("transformed data 600 500 50/updated_flights_final.csv", delimiter='\t')

# Run the pivot function:
df_pivoted = pivot_multi_value_columns(df, pivot_columns)
print(df_pivoted.head())

   index Date (MM/DD/YYYY)                       File                  Gtype  \
0     13        08/18/2020  2020-08-18-CNI-20Y-01.IGC  ASW 27-18 E (ASG-29E)   
1     15        06/27/2024               46RGJUM1.IGC              VENTUS 3F   
2     24        06/29/2018  2018-06-29-NKL-10Q-01.IGC  ASW 27-18 E (ASG-29E)   
3     25        06/09/2018  2018-06-09-CNI-20E-01.IGC  ASW 27-18 E (ASG-29E)   
4     28        06/04/2023  2023-06-04-LXV-NBR-01.IGC              VENTUS 3F   

  Flight Time  Start Time  End Time Landing  Start_Alt (ft MSL)  \
0     3:37:01      164459    202200    HOME                 652   
1     3:42:29      193703    231932    HOME                3713   
2     3:18:46      183835    215721    HOME                 301   
3     2:39:52      193132    221124    HOME                1578   
4     4:13:53      170746    212139    HOME                 301   

  Max Altitude (ft MSL/ft AGL)  ...  All_Post_Run_Max_TAS_knots  \
0                   7208[5847]  ...              

In [23]:
df_pivoted.to_csv("df_pivoted.csv")