# XAI Layers Master Thesis - Interactive Analysis

This notebook allows you to explore your data interactively after running the extraction script.

## Setup

First, make sure you've run `extract_and_analyze.py` to generate the CSV files.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Load data
df = pd.read_csv('output/xai_layers_analysis_ready.csv')

print(f"Loaded {len(df)} observations from {df['session_id'].nunique()} participants")
print(f"\nColumns: {list(df.columns)}")

## 1. Data Overview

In [None]:
# Basic info
print("Dataset Shape:", df.shape)
print("\nFirst few rows:")
df.head()

In [None]:
# Summary statistics
df[['understanding_rating', 'communicability_rating', 'cognitive_load_rating', 'time_spent_seconds']].describe()

## 2. Visualizations

In [None]:
# Rating distributions by interface
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

metrics = ['understanding_rating', 'communicability_rating', 'cognitive_load_rating']
titles = ['Understanding', 'Communicability', 'Cognitive Load']

for ax, metric, title in zip(axes, metrics, titles):
    df.boxplot(column=metric, by='interface_id', ax=ax)
    ax.set_title(title)
    ax.set_xlabel('Interface')
    ax.set_ylabel('Rating (1-5)')
    
plt.suptitle('Rating Distributions by Interface', y=1.02)
plt.tight_layout()
plt.show()

In [None]:
# Mean ratings comparison
means = df.groupby('interface_id')[['understanding_rating', 'communicability_rating', 'cognitive_load_rating']].mean()

means.plot(kind='bar', figsize=(10, 6))
plt.title('Mean Ratings by Interface')
plt.xlabel('Interface')
plt.ylabel('Mean Rating (1-5)')
plt.legend(title='Metric')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Time spent by interface
plt.figure(figsize=(10, 6))
df.boxplot(column='time_spent_seconds', by='interface_id')
plt.title('Time Spent by Interface')
plt.xlabel('Interface')
plt.ylabel('Time (seconds)')
plt.suptitle('')
plt.tight_layout()
plt.show()

## 3. Layer Preferences

In [None]:
# Count preferences
preferences = pd.DataFrame({
    'Most Helpful': df.groupby('most_helpful_layer')['session_id'].nunique(),
    'Most Trusted': df.groupby('most_trusted_layer')['session_id'].nunique(),
    'Best for Customer': df.groupby('best_for_customer')['session_id'].nunique()
}).fillna(0)

preferences.plot(kind='bar', figsize=(10, 6))
plt.title('Layer Preferences')
plt.xlabel('Layer')
plt.ylabel('Count')
plt.legend(title='Preference Type')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

print("\nPreference Counts:")
print(preferences)

## 4. Comparison by Decision Outcome

In [None]:
# Ratings by decision outcome
by_outcome = df.groupby(['interface_id', 'decision_outcome'])[['understanding_rating', 'communicability_rating', 'cognitive_load_rating']].mean()

by_outcome.plot(kind='bar', figsize=(12, 6))
plt.title('Mean Ratings by Interface and Decision Outcome')
plt.xlabel('Interface × Decision')
plt.ylabel('Mean Rating')
plt.legend(title='Metric')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 5. Comparison by Role Group

In [None]:
# Ratings by role group
by_role = df.groupby(['interface_id', 'role_group'])[['understanding_rating', 'communicability_rating', 'cognitive_load_rating']].mean()

by_role.plot(kind='bar', figsize=(12, 6))
plt.title('Mean Ratings by Interface and Role Group')
plt.xlabel('Interface × Role')
plt.ylabel('Mean Rating')
plt.legend(title='Metric')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 6. Correlation Analysis

In [None]:
# Correlation matrix
corr_cols = ['understanding_rating', 'communicability_rating', 'cognitive_load_rating', 
             'time_spent_seconds', 'overall_intuitiveness', 'ai_usefulness']

corr = df[corr_cols].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=1, cbar_kws={"shrink": 0.8})
plt.title('Correlation Matrix')
plt.tight_layout()
plt.show()

## 7. Custom Analysis

Add your own analysis cells below:

In [None]:
# Your custom analysis here
