# Explorative Data Analysis

In [1]:
%load_ext autoreload
%autoreload 2
from data_loader import load_participants_info, load_event_descriptions, load_behavioral_data
import pandas as pd

In [2]:
DATASET_PATH = "data"
participants_df = load_participants_info(DATASET_PATH)
load_event_descriptions(DATASET_PATH)

df_behavior = load_behavioral_data(DATASET_PATH, "01")
for idx in range(2,11):
    sample_participant = f"0{idx}"
    df = load_behavioral_data(DATASET_PATH, sample_participant)
    df_behavior = pd.concat([df, df_behavior], ignore_index=True)

In [None]:
df_behavior.head()

In [None]:
df_behavior['MemoryStrength'].value_counts()

In [None]:
df_behavior_clean = df_behavior[['Condition', 'onset', 'duration', 'PreEvent_PupilMax', 'Event_PupilDilation']].dropna()
df_behavior_clean['Condition'] = df_behavior_clean['Condition'].astype('category')
df_behavior_clean

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 6))

# Distribution of PreEvent_PupilMax by Condition
plt.subplot(1, 2, 1)
sns.boxplot(x='Condition', y='PreEvent_PupilMax', data=df_behavior_clean)
plt.title("Baseline Pupil Size (PreEvent_PupilMax) by Condition")
plt.xlabel("Condition")
plt.ylabel("PreEvent_PupilMax")

# Distribution of Event_PupilDilation by Condition
plt.subplot(1, 2, 2)
sns.boxplot(x='Condition', y='Event_PupilDilation', data=df_behavior_clean)
plt.title("Pupil Dilation Response by Condition")
plt.xlabel("Condition")
plt.ylabel("Event_PupilDilation")

plt.tight_layout()
plt.show()

# Scatter plot of PreEvent_PupilMax vs Event_PupilDilation
plt.figure(figsize=(8, 6))
sns.scatterplot(x='PreEvent_PupilMax', y='Event_PupilDilation', hue='Condition', data=df_behavior_clean, alpha=0.7)
plt.title("Baseline Pupil Size vs. Event Pupil Dilation")
plt.xlabel("PreEvent_PupilMax (Baseline Pupil Size)")
plt.ylabel("Event_PupilDilation (Task-Induced Dilation)")
plt.legend(title="Condition")
plt.show()

# Correlation heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(df_behavior_clean[['onset', 'duration', 'PreEvent_PupilMax', 'Event_PupilDilation']].corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Between Pupil Metrics and Timing")
plt.show()


In [None]:
df_behavior['Condition'] = df_behavior['Condition'].astype('category')
plt.figure(figsize=(12, 6))

# 1. Distribution of Pupil Dilation (Event_PupilDilation) across Participants
plt.subplot(2, 2, 1)
sns.histplot(df_behavior['Event_PupilDilation'].dropna(), bins=30, kde=True)
plt.title("Distribution of Event Pupil Dilation")
plt.xlabel("Event Pupil Dilation (arbitrary units)")
plt.ylabel("Frequency")

# 2. Boxplot of Event Pupil Dilation by Condition
plt.subplot(2, 2, 2)
sns.boxplot(x='Condition', y='Event_PupilDilation', data=df_behavior)
plt.title("Pupil Dilation by Condition (Threat vs. Neutral)")
plt.xlabel("Condition")
plt.ylabel("Event Pupil Dilation")

# 3. Line plot: Pupil Dilation over Trial Number (to see trends)
plt.subplot(2, 2, 3)
sns.lineplot(x='TrialNumber', y='Event_PupilDilation', hue='Condition', data=df_behavior, ci="sd")
plt.title("Pupil Dilation Over Trials")
plt.xlabel("Trial Number")
plt.ylabel("Event Pupil Dilation")

# 4. Scatter plot: Memory Accuracy (SceneRem) vs. Pupil Dilation
plt.subplot(2, 2, 4)
sns.scatterplot(x='Event_PupilDilation', y='SceneRem', hue='Condition', data=df_behavior, alpha=0.7)
plt.title("Pupil Dilation vs. Scene Memory Accuracy")
plt.xlabel("Event Pupil Dilation")
plt.ylabel("Scene Memory Accuracy")

plt.tight_layout()
plt.show()