In [None]:
from google.colab import drive
import os
import csv
import collections
import numpy as np
import matplotlib.pyplot as plt
import random
import seaborn as sns
import pandas as pd

In [None]:
max_dim = 10
epsilon = 0.0001

In [None]:
drive.mount('/content/drive')
%cd /write/your/directory/to/AI_4_ATD

# Prepare Hall Data

## Load Real Data

In [None]:
# List all CSV files in the 'Hall_Data' directory
file_names = os.listdir('Hall_Data')
# Remove a specific problematic file that was missing session labels
file_names.remove('5. Ahearn_Nicki.csv')

# Define the main experimental conditions/lines for analysis
lines = ['control', 'tangible', 'demand', 'attention']

# Create a lookup dictionary to normalize various header names to the standard 'lines'
lookup = {'play': 'control',
          'free play': 'control',
          'toy play': 'control',
          'att': 'attention',
          'attn': 'attention',
          'escape': 'demand',
          'tangibles': 'tangible',}

# Initialize a dictionary to store processed Hall graph data for each subject
hall_graph_data = {}
# Iterate through each file found in the 'Hall_Data' directory
for f_name in file_names:
  # Process only CSV files
  if f_name.endswith('.csv'):
    # Open and read the CSV file
    file = open('../Hall_Data/'+f_name, newline='', encoding='latin-1')
    csv_reader = csv.reader(file)

    # Initialize a graph dictionary for the current subject, with empty lists for each line
    graph = {}
    for line in lines:
      graph[line] = []

    # Dictionary to map header names to column indices
    col = {}
    first_row = True
    # Iterate through rows in the CSV file
    for row in csv_reader:
      # Process the header row to identify column indices for each condition
      if first_row:
        for i, header in enumerate(row):
          if header.lower().strip() in lines:
            col[header.lower().strip()] = i
          elif header.lower().strip() in lookup:
            col[lookup[header.lower().strip()]] = i
          else:
            pass # Ignore irrelevant headers
        first_row = False
      # Process data rows
      else:
        # Populate graph with float values for each line/condition
        for line in lines:
          if line in col:
            if row[col[line]] != '': # Ensure data is not empty
              graph[line].append(float(row[col[line]]))

  # Check if 'control' condition exists and at least two conditions are present
  if 'control' in col and len(col) >= 2:
    # Store the processed graph data using the subject ID as key
    hall_graph_data[int(f_name.split('.')[0])] = graph
  elif 'control' not in col:
    # Print a message if the 'control' condition is missing
    print('Missing Control/Test Condition')

# Identify subjects to be removed if any of their data lines exceed the maximum dimension
del_subs = set()
for sub in hall_graph_data:
  for line in hall_graph_data[sub]:
    if len(hall_graph_data[sub][line]) > max_dim:
      del_subs.add(sub)
      break

# Remove identified subjects from the dataset
for sub in del_subs:
  del hall_graph_data[sub]

# Pad data lines with -1 if they are shorter than 'max_dim' to ensure consistent length
for sub in hall_graph_data:
  for line in hall_graph_data[sub]:
    cur_len = len(hall_graph_data[sub][line])
    hall_graph_data[sub][line] = np.append(hall_graph_data[sub][line], [-1]*(max_dim-cur_len))


## IRA

### CDC Analysis

In [None]:
CDC = {}
all_subs = set(hall_graph_data.keys())
# Iterate through each subject to perform CDC analysis
for sub in all_subs:
  if True:
    # Extract control data for the current subject
    control = hall_graph_data[sub]['control']
    # Filter out placeholder values (-1)
    control = [i for i in control if i != -1]
    # Calculate mean and standard deviation of the control data
    mean = np.mean(control)
    sd = np.std(control, ddof=1)
    # Calculate Upper Control (UC) and Lower Control (LC) limits
    UC = mean + sd
    LC = (mean - sd).clip(min=0)

    CDC_lbl = []
    # Iterate through each experimental line (tangible, demand, attention)
    for line in hall_graph_data[sub]:
      above = 0
      below = 0
      if line != 'control':
        # Extract data for the current experimental line
        line_data = [i for i in hall_graph_data[sub][line] if i != -1]
        # Count data points above UC and below LC
        for point in line_data:
          if point != -1:
            if point > UC:
              above += 1
            elif point < LC:
              below += 1
        # Determine CDC label based on the proportion of points above/below control limits
        if len(line_data) == 0:
          CDC_lbl.append(0)
        else:
          diff = (above - below) / len(line_data) >= 0.5
          if diff:
            CDC_lbl.append(1)
          else:
            CDC_lbl.append(0)
    # Store the CDC labels for the current subject
    CDC[sub] = CDC_lbl

### Load Ratings

In [None]:
def lists_identical(lists):
    # Compares if all lists within a list of lists are identical.
    # It takes the first list as a reference.
    first_list = lists[0]
    # Iterates through the rest of the lists.
    for lst in lists[1:]:
        # If any list is not identical to the first, return False.
        if lst != first_list:
            return False
    # If all lists are identical, return True.
    return True

In [None]:
h_data = {}

h_all_subs = set()
h_err_subs = set()


files = os.listdir('Ratings')
print('loaded: ', end='')
# Loop through rating files to load Hall data ratings
for f_name in files:
  # Only process files starting with 'rh-'
  if f_name.startswith('rh-') == False:
    continue
  name = f_name[3:-4]
  h_data[name] = {}
  print(f_name, end=', ')
  file = open('Ratings/'+f_name)
  csv_reader = csv.reader(file)
  next(csv_reader) # Skip header row 1
  next(csv_reader) # Skip header row 2
  # Process each row to extract subject ratings
  for row in csv_reader:
    # Add subject to the set of all subjects
    h_all_subs.add(int(row[0]))
    # If there's no reported error for the subject, store their ratings
    if row[7] != '1':
      sub = int(row[0])
      h_data[name][sub] = [int(row[1]), int(row[2]), int(row[3])]
    # Otherwise, add the subject to the error set
    else:
      h_err_subs.add(int(row[0]))

# Determine valid subjects by removing subjects with errors from all subjects
h_val_subs = h_all_subs - h_err_subs

h_agr_subs = set()
# Identify subjects where all raters (for Hall data) completely agree
for sub in h_val_subs:
  temp = []
  for key in h_data:
    # Collect ratings from all raters for the current subject
    temp.append(h_data[key][sub])
  # Check if all collected rating lists are identical
  if lists_identical(temp):
    h_agr_subs.add(sub)

print()
# Print summary statistics for Hall data subjects
print('# of all_subs:', len(h_all_subs)) # total subjects
print('# of err_subs:', len(h_err_subs)) # subjects with reported errors
print('# of val_subs:', len(h_val_subs)) # total subjects without reported errors
print('# of agr_subs:', len(h_agr_subs)) # total subjects with agreement between 3 raters

### Remove Subjects With Reported Errors

In [None]:
for key in h_data:
  for sub in h_err_subs:
    try:
      del h_data[key][sub]
    except:
      pass

### Save Disagreements

In [None]:
def intersection(list1, list2):
  # Ensure both input dictionaries have the same keys (subjects)
  assert list1.keys() == list2.keys()
  subs = list1.keys()
  agreement = set()
  # Iterate through subjects and identify where ratings are identical between the two lists
  for sub in list1:
    if list1[sub] == list2[sub]:
      agreement.add(sub)
  # Return the set of subjects with agreement
  return agreement

In [None]:
K_C = h_val_subs - intersection(h_data['katie_VA'], h_data['CDC'])
N_C = h_val_subs - intersection(h_data['neely_VA'], h_data['CDC'])
K_N = h_val_subs - intersection(h_data['katie_VA'], h_data['neely_VA'])

# Combine all sets of disagreements to get a comprehensive list of subjects with any disagreement
dis = K_C | N_C | K_N

# Open a new CSV file to write the disagreements
file = open('Ratings/hall_differences.csv', 'w', newline='')
writer = csv.writer(file)
# Sort the subjects with disagreements for consistent output
dis = sorted(dis)
# Write each subject's ID and their ratings from different raters to the CSV file
for sub in dis:
  writer.writerow([sub] + h_data['CDC'][sub] + h_data['katie_VA'][sub] + h_data['neely_VA'][sub])
file.close()

## Load Ground Truth

In [None]:
f_name = 'rh-Ground_Truth.csv'
name = f_name[3:-4]
h_data[name] = {}
print('loaded: ', end='')
print(f_name, end=', ')
file = open('Ground_Truth/'+f_name)
csv_reader = csv.reader(file)
next(csv_reader)
next(csv_reader)
# Process each row to extract ground truth ratings
for row in csv_reader:
  # Only include subjects without reported errors
  if row[7] != '1':
    sub = int(row[0])
    h_data[name][sub] = [int(row[1]), int(row[2]), int(row[3])]

# Remove subjects with reported errors from the ground truth data if they exist
for sub in h_err_subs:
  if sub in h_data['Ground_Truth']:
    del h_data['Ground_Truth'][sub]

# Prepare Friedel Data

## Load Real Data

In [None]:
# load both datasets and load them into a new combined list

all_subs = set()

# Load the first Friedel data file
file = open('Friedel_Data/fa_data_1_mod.csv', newline='', encoding='latin-1')
raw_data = []
csv_reader = csv.reader(file)
skip_first = True
for row in csv_reader:
  if skip_first:
    skip_first = False
    continue
  all_subs.add(int(row[3]))
  raw_data.append(row)

# Load the second Friedel data file and append to raw_data
file = open('Friedel_Data/fa_data_2_mod.csv', newline='', encoding='latin-1')
csv_reader = csv.reader(file)
skip_first = True
for row in csv_reader:
  if skip_first:
    skip_first = False
    continue
  all_subs.add(int(row[3]))
  raw_data.append(row)

# Define the lines/conditions for the graphs
lines = ['control', 'tangible', 'demand', 'attention']

graph_data = {}

# Initialize graph_data structure for each subject and line
for sub in all_subs:
  graph_data[sub] = {}
  for line in lines:
    graph_data[sub][line] = []

# Populate graph_data with values from raw_data
for row in raw_data:
  sub = int(row[3])
  line = row[1].lower()
  val = float(row[2])
  if line in lines:
    graph_data[sub][line].append(val)

# Identify subjects to be deleted if any line's length exceeds max_dim
del_subs = set()
for sub in all_subs:
  for line in graph_data[sub]:
    cur_len = len(graph_data[sub][line])
    if cur_len <= max_dim:
      # Pad lines with -1 if they are shorter than max_dim
      graph_data[sub][line] = np.append(np.array(graph_data[sub][line]), [-1]*(max_dim - cur_len))
    else:
      del_subs.add(sub)
      break

# Remove subjects marked for deletion
for sub in del_subs:
  del graph_data[sub]
  print('removed:',sub)

# Sort the graph_data dictionary by subject ID
graph_data = dict(collections.OrderedDict(sorted(graph_data.items())))

## IRA

### CDC Analysis

In [None]:
CDC = {}
all_subs = set(graph_data.keys())
# Iterate through each subject to perform CDC analysis
for sub in all_subs:
  if True:
    # Extract and clean control data for the current subject
    control = graph_data[sub]['control']
    control = [i for i in control if i != -1]
    # Calculate mean and standard deviation of the control data
    mean = np.mean(control)
    sd = np.std(control, ddof=1)
    # Calculate Upper Control (UC) and Lower Control (LC) limits
    UC = mean + sd
    LC = (mean - sd).clip(min=0)

    CDC_lbl = []
    # Iterate through each experimental line (tangible, demand, attention)
    for line in graph_data[sub]:
      above = 0
      below = 0
      if line != 'control':
        # Extract and clean data for the current experimental line
        line_data = [i for i in graph_data[sub][line] if i != -1]
        # Count data points above UC and below LC
        for point in line_data:
          if point != -1:
            if point > UC:
              above += 1
            elif point < LC:
              below += 1
        # Determine CDC label (0 or 1) based on the proportion of points outside control limits
        if len(line_data) == 0:
          CDC_lbl.append(0)
        else:
          diff = (above - below) / len(line_data) >= 0.5
          if diff:
            CDC_lbl.append(1)
          else:
            CDC_lbl.append(0)
    # Store the determined CDC labels for the subject
    CDC[sub] = CDC_lbl

### Load Ratings

In [None]:
r_data = {}

r_all_subs = set()
r_err_subs = set()


files = os.listdir('Ratings')
print('loaded: ', end='')
# Loop through rating files to load Friedel data ratings
for f_name in files:
  # Only process files starting with 'r-'
  if f_name.startswith('r-') == False:
    continue
  name = f_name[2:-4]
  r_data[name] = {}
  print(f_name, end=', ')
  file = open('Ratings/'+f_name)
  csv_reader = csv.reader(file)
  next(csv_reader)
  next(csv_reader)
  # Process each row to extract subject ratings
  for row in csv_reader:
    # Add subject to the set of all subjects
    r_all_subs.add(int(row[1]))
    # If there's no reported error for the subject, store their ratings
    if row[8] != '1':
      sub = int(row[1])
      r_data[name][sub] = [int(row[2]), int(row[3]), int(row[4])]
    # Otherwise, add the subject to the error set
    else:
      r_err_subs.add(int(row[1]))

# Determine valid subjects by removing subjects with errors from all subjects
r_val_subs = r_all_subs - r_err_subs

r_agr_subs = set()
# Identify subjects where all raters (for Friedel data) completely agree
for sub in r_val_subs:
  temp = []
  for key in r_data:
    # Collect ratings from all raters for the current subject
    temp.append(r_data[key][sub])
  # Check if all collected rating lists are identical
  if lists_identical(temp):
    r_agr_subs.add(sub)

print()
# Print summary statistics for Friedel data subjects
print('# of all_subs:', len(r_all_subs)) # total subjects
print('# of err_subs:', len(r_err_subs)) # subjects with reported errors
print('# of val_subs:', len(r_val_subs)) # total subjects without reported errors
print('# of agr_subs:', len(r_agr_subs)) # total subjects with agreement between 3 raters

### Remove Subjects With Reported Errors

In [None]:
for key in r_data:
  for sub in r_err_subs:
    try:
      del r_data[key][sub]
    except:
      pass

### Save Disagreements

In [None]:
def intersection(list1, list2):
  # Ensure both input dictionaries have the same keys (subjects)
  assert list1.keys() == list2.keys()
  subs = list1.keys()
  agreement = set()
  # Iterate through subjects and identify where ratings are identical between the two lists
  for sub in list1:
    if list1[sub] == list2[sub]:
      agreement.add(sub)
  return agreement

In [None]:
# determine and save disagreements between raters
K_C = h_val_subs - intersection(h_data['katie_VA'], h_data['CDC'])
N_C = h_val_subs - intersection(h_data['neely_VA'], h_data['CDC'])
K_N = h_val_subs - intersection(h_data['katie_VA'], h_data['neely_VA'])

dis = K_C | N_C | K_N

file = open('Ratings/friedel_differences.csv', 'w', newline='')
writer = csv.writer(file)
dis = sorted(dis)
for sub in dis:
  writer.writerow([sub] + h_data['CDC'][sub] + h_data['katie_VA'][sub] + h_data['neely_VA'][sub])
file.close()

## Load Ground Truth

In [None]:
f_name = 'r-Ground_Truth.csv'
name = f_name[2:-4]
r_data[name] = {}
print('loaded: ', end='')
print(f_name, end=', ')
file = open('Ground_Truth/'+f_name)
csv_reader = csv.reader(file)
next(csv_reader)
next(csv_reader)
# Process each row to extract ground truth ratings for Friedel data
for row in csv_reader:
  # Only include subjects without reported errors
  if row[8] != '1':
    sub = int(row[1])
    r_data[name][sub] = [int(row[2]), int(row[3]), int(row[4])]

# Remove subjects with reported errors from the ground truth data if they exist
for sub in r_err_subs:
  if sub in r_data['Ground_Truth']:
    del r_data['Ground_Truth'][sub]

# Merge Datasets and GT Labels



In [None]:
r_val_subs = sorted(r_val_subs)
h_val_subs = sorted(h_val_subs)

subs = []
# Create a list of all valid subjects, distinguishing between Friedel ('r') and Hall ('h') data
for sub in r_val_subs:
  subs.append([sub, 'r'])
for sub in h_val_subs:
  subs.append([sub, 'h'])

# Create a lookup dictionary to map new sequential subject IDs to original subject IDs and dataset types
sub_lookup = {}
for i, sub in enumerate(subs):
  sub_lookup[i] = sub

combined_graph_data = {}
# Populate combined_graph_data by taking graphs from either Friedel or Hall data based on the lookup
for new_sub in sub_lookup:
  sub = sub_lookup[new_sub]
  if sub[1] == 'r':
    combined_graph_data[new_sub] = graph_data[sub[0]]
  elif sub[1] == 'h':
    combined_graph_data[new_sub] = hall_graph_data[sub[0]]
  else:
    print('error')

combined_ratings = {}
# Populate combined_ratings for each rater (key) by merging ratings from Friedel and Hall data
for key in r_data:
  combined_ratings[key] = {}
  for new_sub in sub_lookup:
    sub = sub_lookup[new_sub]
    if sub[1] == 'r':
      combined_ratings[key][new_sub] = r_data[key][sub[0]]
    elif sub[1] == 'h':
      combined_ratings[key][new_sub] = h_data[key][sub[0]]
    else:
      print('error')

# Extract combined ground truth labels from the combined_ratings
combined_labels = combined_ratings['Ground_Truth']

In [None]:
dataset = []
# Structure the combined graph data into a list of lists, where each inner list represents a graph
for sub in combined_graph_data:
  graph = []
  for line in combined_graph_data[sub]:
    graph.append(combined_graph_data[sub][line])
  dataset.append(graph)
# Extract labels from the combined_labels dictionary
labels = [lbl for lbl in combined_labels.values()]

# Convert the dataset and labels to numpy arrays
dataset = np.array(dataset)
labels = np.array(labels)

# Save the processed dataset and labels to .npy files
np.save('Datasets/real_dataset.npy', dataset)
np.save('Datasets/real_labels.npy', labels)

# Determine Effect Sizes

Effect sizes for each condition are calculated. If the control of the graph has zero standard deviation, conditions are marked with '-2'. If a condition is a placeholder (i.e. just there to fill the 3x10 array), it is marked with a -1. If a graph is both a '-1' and '-2', it is marked as '-1'.

In [None]:
def calc_effect_sizes(dataset):

  real_effect_sizes = []

  # Iterate through each graph in the dataset
  for graph in dataset:
    effect_sizes = []
    # Extract control data and filter out placeholder values (-1)
    control = [i for i in graph[0] if i != -1]
    assert len(control) != 0
    # Calculate mean and standard deviation of the control data
    c_mean = np.mean(control)
    c_std = np.std(control)

    # Iterate through each experimental line (excluding control)
    for line in graph[1:]:
      # Filter out placeholder values (-1)
      line = [i for i in line if i != -1]
      # Handle cases where the line is empty (no data)
      if len(line) == 0:
        effect_sizes.append(-1)
      # Handle cases where control standard deviation is zero
      elif c_std == 0:
        effect_sizes.append(-2)
      # Calculate Cohen's d effect size for the current line
      else:
        es = np.absolute((np.mean(line) - c_mean) / c_std)
        effect_sizes.append(es)
    # Assert that no NaN values were produced in effect sizes
    assert not np.any(np.isnan(effect_sizes))
    real_effect_sizes.append(effect_sizes)

  return real_effect_sizes

In [None]:
real_effect_sizes = calc_effect_sizes(dataset)
np.save('Datasets/real_effect_sizes.npy', real_effect_sizes)

# Interrater Agreement Matrices


In [None]:
n = len(combined_ratings.keys())
agr_mat = [[0 for i in range(n)] for j in range(n)]
# Calculate the agreement percentage between each pair of raters/methods
for i, x_key in enumerate(combined_ratings):
  for j, y_key in enumerate(combined_ratings):
    agreement = intersection(combined_ratings[x_key], combined_ratings[y_key])
    agr_mat[i][j] = len(agreement) / len(combined_ratings[x_key])

# Convert the error matrix to a Pandas DataFrame for easier visualization
agr_df = pd.DataFrame(agr_mat, columns=combined_ratings.keys(), index=combined_ratings.keys())

# Create a mask for the upper triangle to avoid redundant information in the heatmap
mask = np.triu(np.ones_like(agr_df, dtype=bool))

# Create a heatmap using Seaborn to visualize interrater agreement
plt.figure(figsize=(8, 6))
sns.heatmap(agr_df, annot=True, cmap='rocket', fmt=".2f", mask=mask)
plt.title('Inter Rater Agreement Matrix (n='+str(len(combined_ratings[x_key]))+')\n(Complete Agreement on Graph)')
plt.xlabel('Rater and Method')
plt.ylabel('Rater and Method')
plt.show()

In [None]:
missing_condition = []
# Iterate through each graph in the dataset to check for missing conditions
for graph in dataset:
  temp = []
  # Check each experimental line (excluding control) for data presence
  for line in graph[1:]:
    # If the line contains only placeholder values (-1), mark as True (missing data)
    if len([i for i in line if i != -1]) == 0:
      temp.append(True)
    else:
      temp.append(False)
  missing_condition.append(temp)

# Convert the list of booleans to a numpy array and invert the values
# This results in True where data is present and False where it's missing.
missing_condition = np.logical_not(np.array(missing_condition))

In [None]:
flattened_ratings = {}
for key in combined_ratings:
  flattened_ratings[key] = []
  # Flatten the combined ratings for each rater into a single list
  for sub in combined_ratings[key]:
    flattened_ratings[key].append(combined_ratings[key][sub])
  flattened_ratings[key] = np.array(flattened_ratings[key])

n = len(flattened_ratings.keys())
agr_mat = [[0 for i in range(n)] for j in range(n)]
for i, x_key in enumerate(flattened_ratings):
  for j, y_key in enumerate(flattened_ratings):
    # Calculate the mean agreement on individual conditions, considering only existing data points
    agreement = np.mean(np.array(flattened_ratings[x_key][missing_condition]) == np.array(flattened_ratings[y_key][missing_condition]))
    agr_mat[i][j] = agreement

# Convert the error matrix to a Pandas DataFrame for easier visualization
agr_df = pd.DataFrame(agr_mat, columns=flattened_ratings.keys(), index=flattened_ratings.keys())

# Create a mask for the upper triangle to avoid redundant information in the heatmap
mask = np.triu(np.ones_like(agr_df, dtype=bool))

# Create a heatmap using Seaborn to visualize interrater agreement on individual conditions
plt.figure(figsize=(8, 6))
sns.heatmap(agr_df, annot=True, cmap='rocket', fmt=".2f", mask=mask)
plt.title('Inter Rater Agreement Matrix (n='+str(len(combined_ratings[x_key]))+')\n(Agreement on Individual Conditions)')
plt.xlabel('Rater and Method')
plt.ylabel('Rater and Method')
plt.show()