In [1]:
import pandas as pd
import numpy as np


In [2]:

def load_prometheus_parquet():
    """
    Load the prometheus parquet files that were saved after converting arrays to lists
    """
    # Load the parquet files
    prometheus_set = pd.read_parquet('genie_events_for_prometheus.parquet')
    primary_set = pd.read_parquet('genie_events_primary.parquet')
    
    print(f"Loaded prometheus_set with shape: {prometheus_set.shape}")
    print(f"Loaded primary_set with shape: {primary_set.shape}")
    
    # Check the structure of the position column in both datasets
    if 'position' in primary_set.columns:
        # Get the first position entry to understand its structure
        first_position = primary_set['position'].iloc[0]
        print(f"Type of position data: {type(first_position)}")
        print(f"Sample position: {first_position}")
    
    # Create results dataframe with the requested columns
    results = []
    
    for idx in prometheus_set.index:
        if idx in primary_set.index:
            # Get position data - this is now a list, not a numpy array
            position = primary_set.loc[idx, 'position']
            
            # Create result row
            result_row = {
                'initial_state': primary_set.loc[idx, 'pdg_code'],
                'final_states': prometheus_set.loc[idx, 'pdg_code'],
                'interaction': primary_set.loc[idx, 'interaction'],
                'adjusted_initial_position_x': position[0],  # Access list element
                'adjusted_initial_position_y': position[1],  # Access list element
                'adjusted_initial_position_z': position[2]   # Access list element
            }
            
            results.append(result_row)
    
    # Convert to dataframe
    results_df = pd.DataFrame(results)
    print(f"Processed {len(results)} events")
    
    return results_df

# Example usage
if __name__ == "__main__":
    positions_df = load_prometheus_parquet()
    
    # Display a sample of the results
    print("\nSample of extracted position data:")
    print(positions_df.head())
    

Loaded prometheus_set with shape: (100, 14)
Loaded primary_set with shape: (100, 14)
Type of position data: <class 'numpy.ndarray'>
Sample position: [  48.91165602  313.50011218 -380.03670563    0.        ]
Processed 100 events

Sample of extracted position data:
   initial_state                                       final_states  \
0             14    [22, 13, 211, 111, -211, 2212, 2212, 2212, 111]   
1             14     [22, 14, 211, 211, -211, -211, 111, 2112, 111]   
2             14  [22, 14, 111, 2112, 211, 2212, 211, -211, -211...   
3             14                [22, 13, 211, 111, 2112, 111, 2212]   
4             14  [14, 2212, 2212, 2212, 2212, 2112, 2112, -211,...   

  interaction  adjusted_initial_position_x  adjusted_initial_position_y  \
0          CC                    48.911656                   313.500112   
1          NC                  -135.498767                   388.107182   
2          NC                  -390.709797                   193.798250   
3        