In [8]:
import pandas as pd
import json

# Load initial data
df_data = pd.read_csv('3d_data.csv')

# Add the point_id column as the first column
df_data.insert(0, 'point_id', range(1, len(df_data) + 1))

# Load the JSON pairs data into a DataFrame
df_pairs = pd.read_json('seq_pairs.json')

# Merge df_pairs with df_data to get the values for point_id
df_pairs = df_pairs.merge(df_data[['point_id', 'GRADE']], on='point_id', how='left')
df_pairs = df_pairs.rename(columns={'GRADE': 'point_id_value'})

# Merge df_pairs with df_data to get the values for paired_point_id
df_pairs = df_pairs.merge(df_data[['point_id', 'GRADE']], left_on='paired_point_id', right_on='point_id', how='left', suffixes=('', '_paired_id'))
df_pairs = df_pairs.rename(columns={'GRADE': 'paired_point_id_value'})

# Drop the extra point_id column from the second merge
df_pairs = df_pairs.drop(columns=['point_id_paired_id'])

# Display the updated DataFrame
display(df_pairs)

Unnamed: 0,point_id,dim_id,n,paired_point_id,point_id_value,paired_point_id_value
0,1,0,1,2,0.2900,0.2800
1,1,0,2,3,0.2900,0.0563
2,1,0,3,4,0.2900,0.0464
3,1,0,4,5,0.2900,0.1872
4,1,0,5,6,0.2900,0.2910
...,...,...,...,...,...,...
1205,118,0,2,120,0.2440,0.2727
1206,118,0,3,121,0.2440,0.2800
1207,119,0,1,120,0.2750,0.2727
1208,119,0,2,121,0.2750,0.2800


In [33]:
import pandas as pd
import itertools
import numpy as np

ndir = [0, 1]
nlag_dir = [10, 10]

# Generate column names
columns = [f"dir_{i}_nlag" for i in ndir]

# Generate all combinations of values for each column
combinations = list(itertools.product(*[range(1, nlag + 1) for nlag in nlag_dir]))

# Create DataFrame
df_generated = pd.DataFrame(combinations, columns=columns)

# First merge
result = df_generated.merge(
    df_pairs[df_pairs['dim_id'] == 0], 
    left_on='dir_0_nlag', 
    right_on='n', 
    how='left'
)

# Second merge
result = result.merge(
    df_pairs[df_pairs['dim_id'] == 1], 
    left_on='dir_1_nlag', 
    right_on='n', 
    how='left',
    suffixes=('', '_dir_1')
)

# Rename columns from first merge
result = result.rename(columns={
    'point_id' : 'point_id_dir_0',
    'paired_point_id': 'paired_point_id_dir_0',
    'paired_point_id_value': 'paired_point_id_value_dir_0'
})

# Filter rows where point_id_dir_0 and point_id_dir_1 match
result = result.dropna(subset=['point_id_dir_0', 'point_id_dir_1'])
result = result[result['point_id_dir_0'] == result['point_id_dir_1']]

# Select final columns
result = result[['dir_0_nlag', 'dir_1_nlag', 'point_id_value',
                       'paired_point_id_dir_0', 'paired_point_id_value_dir_0',
                        'paired_point_id_dir_1', 'paired_point_id_value_dir_1']]

# Add new column 'E'
result['E'] = result['point_id_value'] * result['paired_point_id_value_dir_0'] * result['paired_point_id_value_dir_1']

# Group by dir_0_nlag and dir_1_nlag and average E
final_result = result.groupby(['dir_0_nlag', 'dir_1_nlag'])['E'].mean().reset_index()
final_result = final_result.rename(columns={'E': 'average_E'})

display(final_result)




Unnamed: 0,dir_0_nlag,dir_1_nlag,average_E
0,1,1,0.013761
1,1,2,0.013853
2,1,3,0.013555
3,1,4,0.014410
4,1,5,0.015270
...,...,...,...
95,10,6,0.013061
96,10,7,0.012747
97,10,8,0.012311
98,10,9,0.017818


In [37]:
def compute_moments_from_pairs(df_pairs, ndir, nlag_dir):
    # Generate column names
    columns = [f"dir_{i}_nlag" for i in range(len(ndir))]

    # Generate all combinations of values for each column
    combinations = list(itertools.product(*[range(1, nlag + 1) for nlag in nlag_dir]))

    # Create DataFrame
    df_generated = pd.DataFrame(combinations, columns=columns)

    # Perform merges
    result = df_generated.copy()
    for i in range(len(ndir)):
        suffix = f'_dir_{i}' if i > 0 else ''
        result = result.merge(
            df_pairs[df_pairs['dim_id'] == ndir[i]], 
            left_on=f'dir_{i}_nlag', 
            right_on='n', 
            how='left',
            suffixes=('', suffix)
        )
        
        # Rename columns for each merge
        result = result.rename(columns={
            'point_id': f'point_id_dir_{i}',
            'paired_point_id': f'paired_point_id_dir_{i}',
            'paired_point_id_value': f'paired_point_id_value_dir_{i}'
        })

    # Filter rows where all existing point_id_dir_X match
    point_id_columns = [col for col in result.columns if col.startswith('point_id_dir_')]
    if point_id_columns:
        result = result.dropna(subset=point_id_columns)
        result = result[result[point_id_columns].nunique(axis=1) == 1]

    # Select final columns
    final_columns = columns + ['point_id_value'] + [col for col in result.columns if col.startswith('paired_point_id_dir_') or col.startswith('paired_point_id_value_dir_')]
    result = result[final_columns]

    # Add new column 'E'
    result['E'] = result['point_id_value']
    for col in result.columns:
        if col.startswith('paired_point_id_value_dir_'):
            result['E'] *= result[col]

    # Group by all dir_X_nlag columns and average E
    group_columns = [f'dir_{i}_nlag' for i in range(len(ndir))]
    final_result = result.groupby(group_columns)['E'].mean().reset_index()
    final_result = final_result.rename(columns={'E': 'average_E'})

    return final_result

# Example usage:
ndir = [0, 1]  # You can add more dimensions here
nlag_dir = [10, 10]  # Adjust the number of lags for each dimension

final_result = compute_moments_from_pairs(df_pairs, ndir, nlag_dir)
display(final_result)


Unnamed: 0,dir_0_nlag,dir_1_nlag,average_E
0,1,1,0.013761
1,1,2,0.013853
2,1,3,0.013555
3,1,4,0.014410
4,1,5,0.015270
...,...,...,...
95,10,6,0.013061
96,10,7,0.012747
97,10,8,0.012311
98,10,9,0.017818
