In [3]:
import json
import pandas as pd

# Load the flag_analysis JSON data
with open('flag_analysis.json', 'r') as f:
    flag_analysis_data = json.load(f)

# Convert the JSON data to a DataFrame
df = pd.DataFrame(flag_analysis_data)

# Count the occurrences of each flag_gap value
flag_counts = df['flag_gap'].value_counts().sort_index()

# Extract gap values from the list of dictionaries
def extract_gaps(gaps_list):
    return [float(list(gap.keys())[0]) for gaps in gaps_list for gap in gaps]

# Calculate the disparity of gap values for each flag_gap value
gap_disparity = df.groupby('flag_gap')['gaps'].apply(lambda gaps_list: extract_gaps(gaps_list))

# Calculate disparity metrics for each flag
gap_disparity_metrics = gap_disparity.apply(lambda gap_list: pd.Series({
    'min_gap': min(gap_list) if gap_list else float('nan'),
    'max_gap': max(gap_list) if gap_list else float('nan'),
    'average_gap': sum(gap_list) / len(gap_list) if gap_list else float('nan'),
    'gap_count': len(gap_list)
}))

# Display the results
print("Flag Counts:")
print(flag_counts)

print("\nGap Disparity Metrics:")
print(gap_disparity_metrics)


Flag Counts:
flag_gap
0     1
1    17
2     4
3     1
Name: count, dtype: int64

Gap Disparity Metrics:
           min_gap   max_gap  average_gap  gap_count
flag_gap                                            
0         0.066603  0.146600     0.100436        5.0
1         0.032059  0.508930     0.123691       85.0
2         0.048287  0.227270     0.127374       20.0
3         0.031242  0.347668     0.158703        5.0


In [6]:
import json
import pandas as pd

# Load the flag_analysis JSON data
with open('flag_analysis.json', 'r') as f:
    flag_analysis_data = json.load(f)

# Convert the JSON data to a DataFrame
df = pd.DataFrame(flag_analysis_data)

# Filter out flags 0, 3, 4, 5
df_filtered = df[df['flag_gap'].isin([1, 2])]

# Extract gap differences from the list of dictionaries
def extract_gap_differences(gaps_list):
    # Extract only the first and second biggest gaps and calculate their differences
    differences = []
    for gaps in gaps_list:
        if len(gaps) >= 2:
            first_gap = float(list(gaps[0].keys())[0])
            second_gap = float(list(gaps[1].keys())[0])
            differences.append(first_gap - second_gap)
    return differences

# Calculate the gap differences for each flag_gap value
gap_differences = df_filtered.groupby('flag_gap')['gaps'].apply(lambda gaps_list: extract_gap_differences(gaps_list))

# Calculate metrics for each flag
gap_difference_metrics = gap_differences.apply(lambda diff_list: pd.Series({
    'min_gap_diff': min(diff_list) if diff_list else float('nan'),
    'max_gap_diff': max(diff_list) if diff_list else float('nan'),
    'average_gap_diff': sum(diff_list) / len(diff_list) if diff_list else float('nan')
}))

# Display the results
print("Flag Counts:")
print(df_filtered['flag_gap'].value_counts().sort_index())

print("\nGap Difference Metrics:")
print(gap_difference_metrics)


Flag Counts:
flag_gap
1    17
2     4
Name: count, dtype: int64

Gap Difference Metrics:
          min_gap_diff  max_gap_diff  average_gap_diff
flag_gap                                              
1             0.000904      0.432834          0.181761
2             0.003481      0.049963          0.020721
