# Disentangle Finalrun CSV Data
## Display each part of the finalrun.csv file

In [1]:
# Original: Load the CSV file
finalrun = []
with open("finalrun.csv") as whole:
    for i in whole:
        finalrun.append(i)

In [2]:
# Check length
len(finalrun)

12

In [3]:
# Look at second line
finalrun[1]

'2025-11-04 11:11:53.060431,commoncrawlglove,"{\'male_pairs\': {}, \'female_pairs\': {}, \'names_hispanic\': {}, \'names_white\': {}, \'names_asian\': {}, \'personalitytraits_original\': {}, \'occupations1950\': {}, \'occupations1950_professional\': {}}","[[np.float64(1.2756084816206137)], [np.float64(0.13471537382542242)], [np.float64(0.0)], [np.float64(0.9292690589317413)], [np.float64(0.9292690589317413)], [np.float64(0.9999999999999999)], [np.float64(0.36703592988346856)], [np.float64(0.36703592988346856)]]","[[np.float64(1.337287862711474)], [np.float64(0.09457492192166313)], [np.float64(0.28084634758664523)], [np.float64(0.9715953797827879)], [np.float64(0.9705312335405207)], [np.float64(0.7057281586506767)], [np.float64(0.25767210842734134)], [np.float64(0.25902759095529915)]]","[[np.float64(1.3675239583419527)], [np.float64(0.06045523128415452)], [np.float64(0.40701112833136754)], [np.float64(1.004969087090936)], [np.float64(1.0137612864798833)], [np.float64(0.42268166344838026

## Parse the CSV properly
The data is comma-separated, so let's parse it correctly

In [None]:
import csv

# Parse CSV properly
with open('finalrun.csv', 'r') as f:
    reader = csv.reader(f)
    data = list(reader)

# Extract header and data rows
header = data[0]
rows = data[1:]

print(f"Number of columns: {len(header)}")
print(f"Number of data rows: {len(rows)}")

## Display Column Names

In [None]:
# Display all column names
for i, col in enumerate(header, 1):
    print(f"{i:2d}. {col}")

## Organize Data by Category

In [None]:
# Organize columns into categories
categories = {
    'Basic Info': [],
    'Female Pairs': [],
    'Male Pairs': [],
    'Asian Names': [],
    'Hispanic Names': [],
    'White Names': [],
    'Group Distances': [],
    'Neutral Distances': [],
    'Occupations': [],
    'Personality Traits': []
}

# Categorize columns
for col in header:
    if col in ['datetime', 'label', 'counts_all', 'variance_over_time']:
        categories['Basic Info'].append(col)
    elif col.startswith('female_pairs_'):
        categories['Female Pairs'].append(col)
    elif col.startswith('male_pairs_'):
        categories['Male Pairs'].append(col)
    elif col.startswith('names_asian_'):
        categories['Asian Names'].append(col)
    elif col.startswith('names_hispanic_'):
        categories['Hispanic Names'].append(col)
    elif col.startswith('names_white_'):
        categories['White Names'].append(col)
    elif 'indiv_distances_group' in col:
        categories['Group Distances'].append(col)
    elif 'indiv_distances_neutral' in col:
        categories['Neutral Distances'].append(col)
    elif 'occupations' in col:
        categories['Occupations'].append(col)
    elif 'personalitytraits' in col:
        categories['Personality Traits'].append(col)

# Display category summary
for cat_name, cols in categories.items():
    print(f"{cat_name}: {len(cols)} columns")

## Display Each Category

In [None]:
# Display Basic Info
print("BASIC INFO")
print("="*80)
basic_indices = [header.index(col) for col in categories['Basic Info']]

for row_idx, row in enumerate(rows, 1):
    print(f"\nRow {row_idx}:")
    for idx in basic_indices:
        print(f"  {header[idx]}: {row[idx]}")

In [None]:
# Function to display any category
def display_category(category_name):
    """Display all data for a specific category"""
    print(f"\n{'='*80}")
    print(f"{category_name.upper()}")
    print(f"{'='*80}")
    
    cols = categories[category_name]
    if not cols:
        print("No columns in this category")
        return
    
    indices = [header.index(col) for col in cols]
    
    for row_idx, row in enumerate(rows, 1):
        print(f"\nRow {row_idx} ({row[header.index('label')]}):")  
        for idx in indices:
            print(f"  {header[idx]}: {row[idx]}")

# Example: Display Female Pairs category
display_category('Female Pairs')

In [None]:
# Display all categories
for category_name in categories.keys():
    display_category(category_name)

## Alternative: Use Pandas for Better Display

In [None]:
import pandas as pd

# Load with pandas
df = pd.read_csv('finalrun.csv')

print(f"Shape: {df.shape}")
df.head()

In [None]:
# Display each category with pandas
def display_category_df(category_name):
    """Display category using pandas DataFrame"""
    cols = categories[category_name]
    if cols:
        print(f"\n{category_name.upper()}")
        print("="*80)
        display(df[cols])

# Display all categories
for category_name in categories.keys():
    display_category_df(category_name)