In [1]:
import numpy as np
from utils import classify_particles

# Load configurations
path = "/home/n2401517d/my_workspace/flow_state/NF/data/samples_N3_rho_0.03.npz"
configs = np.load(path)
positions = configs[list(configs.keys())[0]] + [5,5]  # Get the first key's data

# Parameters
halfbox = 5.0
r0 = 1.2



In [2]:
print(positions)

[[[3.24530547 5.24108865]
  [2.164995   5.82995647]
  [2.26276744 4.66338164]]

 [[8.04196458 5.1655643 ]
  [6.99806139 5.23889824]
  [7.86657261 4.05325671]]

 [[1.80590794 4.54628117]
  [2.27871521 5.56427092]
  [3.28931677 4.93745371]]

 ...

 [[7.13229903 6.02077595]
  [7.87713372 5.11631674]
  [6.61368227 4.90497586]]

 [[3.35469334 4.89446163]
  [2.13362089 4.91720804]
  [2.60212444 5.96824768]]

 [[2.58990954 4.03014456]
  [3.48955866 4.60527585]
  [2.03554039 5.02451231]]]


In [3]:
# Classify particles
classifications = classify_particles(positions, halfbox, r0)

In [4]:
print(classifications)

[['A' 'A' 'A']
 ['B' 'B' 'B']
 ['A' 'A' 'A']
 ...
 ['B' 'B' 'B']
 ['A' 'A' 'A']
 ['A' 'A' 'A']]


In [5]:
# Count different configurations
total_samples = len(classifications)
counts = {
    'AAA': 0,
    'BBB': 0,
    'AAB': 0,
    'ABA': 0,
    'BAA': 0,
    'ABB': 0,
    'BAB': 0,
    'BBA': 0,
    'other': 0
}

for config in classifications:
    # Convert config list to string for easy pattern matching
    config_str = ''.join(config)
    
    if config_str == 'AAA':
        counts['AAA'] += 1
    elif config_str == 'BBB':
        counts['BBB'] += 1
    elif config_str == 'AAB':
        counts['AAB'] += 1
    elif config_str == 'ABA':
        counts['ABA'] += 1
    elif config_str == 'BAA':
        counts['BAA'] += 1
    elif config_str == 'ABB':
        counts['ABB'] += 1
    elif config_str == 'BAB':
        counts['BAB'] += 1
    elif config_str == 'BBA':
        counts['BBA'] += 1
    else:
        counts['other'] += 1

# Print statistics
print(f"Total number of samples: {total_samples}")
print("\nConfiguration counts:")
for config_type, count in counts.items():
    percentage = (count / total_samples) * 100
    print(f"{config_type}: {count} ({percentage:.2f}%)")

# Optional: Print grouped statistics
print("\nGrouped statistics:")
three_a = counts['AAA']
three_b = counts['BBB']
two_a_one_b = counts['AAB'] + counts['ABA'] + counts['BAA']
one_a_two_b = counts['ABB'] + counts['BAB'] + counts['BBA']
other = counts['other']

print(f"3A (AAA): {three_a} ({(three_a/total_samples)*100:.2f}%)")
print(f"3B (BBB): {three_b} ({(three_b/total_samples)*100:.2f}%)")
print(f"2A1B (AAB+ABA+BAA): {two_a_one_b} ({(two_a_one_b/total_samples)*100:.2f}%)")
print(f"1A2B (ABB+BAB+BBA): {one_a_two_b} ({(one_a_two_b/total_samples)*100:.2f}%)")
print(f"Other: {other} ({(other/total_samples)*100:.2f}%)") 

Total number of samples: 204800

Configuration counts:
AAA: 102342 (49.97%)
BBB: 102338 (49.97%)
AAB: 0 (0.00%)
ABA: 0 (0.00%)
BAA: 0 (0.00%)
ABB: 0 (0.00%)
BAB: 0 (0.00%)
BBA: 0 (0.00%)
other: 120 (0.06%)

Grouped statistics:
3A (AAA): 102342 (49.97%)
3B (BBB): 102338 (49.97%)
2A1B (AAB+ABA+BAA): 0 (0.00%)
1A2B (ABB+BAB+BBA): 0 (0.00%)
Other: 120 (0.06%)


In [10]:
# Print all configurations classified as "other"
print("\nConfigurations classified as 'other':")
for i, config in enumerate(classifications):
    config_str = ''.join(config)
    if config_str not in ['AAA', 'BBB', 'AAB', 'ABA', 'BAA', 'ABB', 'BAB', 'BBA']:
        print(f"Configuration {i}: {config_str}")



Configurations classified as 'other':
Configuration 3806: BBOutside
Configuration 5139: AAOutside
Configuration 5712: BBOutside
Configuration 7256: OutsideBB
Configuration 8101: BBOutside
Configuration 8365: OutsideBB
Configuration 9063: OutsideBB
Configuration 9509: OutsideBB
Configuration 12268: BBOutside
Configuration 14543: AOutsideA
Configuration 16937: BBOutside
Configuration 17282: BBOutside
Configuration 19596: BBOutside
Configuration 19627: AOutsideA
Configuration 20149: BBOutside
Configuration 24921: OutsideAA
Configuration 28100: AOutsideA
Configuration 30205: BBOutside
Configuration 30731: OutsideBB
Configuration 32847: BBOutside
Configuration 33263: OutsideAA
Configuration 33292: AOutsideA
Configuration 37272: OutsideAA
Configuration 37779: OutsideAA
Configuration 38052: BBOutside
Configuration 40130: OutsideBB
Configuration 41221: AOutsideA
Configuration 42506: BBOutside
Configuration 44790: BBOutside
Configuration 46984: AOutsideA
Configuration 51980: OutsideAA
Configur

In [None]:
# Use numpy's unique function to find unique configurations
unique_data = np.unique(positions, axis=0)
n_unique = len(unique_data)

# Get classifications for unique configurations
unique_classifications = classify_particles(unique_data, halfbox, r0)

# Count duplicates by comparing each sample to unique ones
duplicate_counts = {}
for i, pos in enumerate(positions):
    for j, unique_pos in enumerate(unique_data):
        if np.allclose(pos, unique_pos):
            duplicate_counts[j] = duplicate_counts.get(j, 1) + 1
            break

# Calculate effective sample size using Kish's formula
weights = np.array(list(duplicate_counts.values()))
ess = np.sum(weights)**2 / np.sum(weights**2)

# Print statistics
print(f"Total number of samples: {len(positions)}")
print(f"Number of unique configurations: {n_unique}")
print(f"Effective Sample Size (ESS): {ess:.2f}")

# Print distribution of duplicates
print("\nDuplicate distribution:")
duplicate_dist = {}
for count in duplicate_counts.values():
    duplicate_dist[count] = duplicate_dist.get(count, 0) + 1

for count, freq in sorted(duplicate_dist.items()):
    print(f"Configurations appearing {count} times: {freq}")

# Print unique configuration types
print("\nUnique configuration types:")
unique_types = {}
for config in unique_classifications:
    config_type = ''.join(sorted(config))  # Sort to group AAB and ABA together
    unique_types[config_type] = unique_types.get(config_type, 0) + 1

for config_type, count in sorted(unique_types.items()):
    print(f"Type {config_type}: {count} unique configurations")