In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load data
participants_df = pd.read_csv('/content/participants_66dd0f5d2d96edbedcf4a80b.csv')
responses_df = pd.read_csv('/content/responses_66dd0f5d2d96edbedcf4a80b.csv')
llm_df = pd.read_csv('/content/llm_66dd0f5d2d96edbedcf4a80b.csv')

In [None]:
# Define markers and colors
markers = ['o', 's', 'D']
colors = ['#7F7F7F', '#e57a77', '#3D65A5']  # Gray, Red, Yellow

# Convert string responses to numeric values
def convert_ai_feelings(response):
    if response == 'More concerned than excited':
        return 1
    elif response == 'Equally excited and concerned':
        return 2
    elif response == 'More excited than concerned':
        return 3
    else:
        return np.nan

participants_df['ai_feelings_before'] = participants_df['Increased use of AI computer programs in daily life makes you feel (before)'].apply(convert_ai_feelings)
participants_df['ai_feelings_after'] = participants_df['Increased use of AI computer programs in daily life makes you feel (after)'].apply(convert_ai_feelings)

# Rename conditions
conditions = {'Absent': 'No LLM Response', 'Generate': 'List of Ideas', 'Coach': 'List of Strategies'}

# Calculate means and SEM for creativity score
def calculate_stats(data, column, condition):
    condition_data = data[data['condition'] == condition][column]
    mean = condition_data.mean()
    sem = condition_data.sem()
    return mean, sem

stats = {}
for condition, renamed_condition in conditions.items():
    stats[renamed_condition] = {}
    stats[renamed_condition]['creative_before'] = calculate_stats(participants_df, 'I am more creative than \% of humans (before)', condition)
    stats[renamed_condition]['creative_after'] = calculate_stats(participants_df, 'I am more creative than \% of humans (after)', condition)
    stats[renamed_condition]['ai_before'] = calculate_stats(participants_df, 'ai_feelings_before', condition)
    stats[renamed_condition]['ai_after'] = calculate_stats(participants_df, 'ai_feelings_after', condition)

# Calculate means and SEM for AI feelings
for condition, renamed_condition in conditions.items():
    stats[renamed_condition]['ai_before'] = calculate_stats(participants_df, 'ai_feelings_before', condition)
    stats[renamed_condition]['ai_after'] = calculate_stats(participants_df, 'ai_feelings_after', condition)

In [None]:
# Figure 1: Difference in Creativity Score

fig1, ax1 = plt.subplots(figsize=(7, 7))

# Plot the difference in "I am more creative than % of humans (after - before)"
for i, renamed_condition in enumerate(conditions.values()):
    diff_mean = stats[renamed_condition]['creative_after'][0] - stats[renamed_condition]['creative_before'][0]
    diff_sem = np.sqrt(stats[renamed_condition]['creative_before'][1]**2 + stats[renamed_condition]['creative_after'][1]**2)
    ax1.errorbar([renamed_condition], [diff_mean], yerr=[diff_sem],
                 capsize=8, marker=markers[i], linestyle='', markersize=12, color=colors[i], linewidth=3)

ax1.set_title('Difference in Self-Assessment of Creativity (After - Before)')
ax1.set_xlabel('Condition', fontsize=14)
ax1.set_ylabel('Difference in Creativity Score', fontsize=14)
ax1.axhline(0, color='gray', linewidth=0.5)  # Add horizontal line at 0

# Keep all spines and remove gridlines
ax1.grid(False)
for spine in ax1.spines.values():
    spine.set_linewidth(0.5)

plt.tight_layout()
plt.show()

In [None]:
# Figure 2: Difference in Feelings About AI Usage (After - Before)

fig2, ax2 = plt.subplots(figsize=(7, 7))

# Plot the delta (After - Before) for "Feelings about AI"
for i, renamed_condition in enumerate(conditions.values()):
    diff_mean = stats[renamed_condition]['ai_after'][0] - stats[renamed_condition]['ai_before'][0]
    diff_sem = np.sqrt(stats[renamed_condition]['ai_before'][1]**2 + stats[renamed_condition]['ai_after'][1]**2)
    ax2.errorbar([renamed_condition], [diff_mean], yerr=[diff_sem],
                 capsize=10, marker=markers[i], linestyle='', markersize=12, color=colors[i], linewidth=3)

ax2.set_title('Change (Δ) in Feelings About AI Usage (After - Before)')
ax2.set_xlabel('Condition')
ax2.set_ylabel('Change in AI Feeling Score')
ax2.axhline(0, color='gray', linewidth=0.5)  # Add horizontal line at 0

# Keep all spines and remove gridlines
ax2.grid(False)
for spine in ax2.spines.values():
    spine.set_linewidth(0.5)

plt.tight_layout()
plt.show()

In [None]:
# Chi Squared, p-value, Degrees of Freedom, Expected Frequencies

from scipy.stats import chi2_contingency

# Count the number of participants by condition
counts = participants_df['condition'].value_counts().reset_index()
counts.columns = ['condition', 'count']

# Display the counts in a table format
print(counts)

# Convert counts to a contingency table (1-dimensional in this case)
# Since it's a single dimension, we need to add a dummy dimension for chi2_contingency
contingency_table = counts['count'].values.reshape(1, -1)

# Perform chi-square test
chi2, p, dof, expected = chi2_contingency(contingency_table)

# Output the chi-square test result
print(f"Chi-square statistic: {chi2}")
print(f"p-value: {p}")
print(f"Degrees of freedom: {dof}")
print("Expected frequencies:")
print(expected)

In [None]:
# Figure 3: How difficult was it to come up with uses for the last object?

# Rename conditions
condition_mapping = {'Absent': 'No LLM Response', 'Generate': 'List of Ideas', 'Coach': 'List of Strategies'}
participants_df['condition_renamed'] = participants_df['condition'].map(condition_mapping)

# Convert string responses to numeric values for difficulty
difficulty_mapping = {
    'Very easy': 1,
    'Somewhat easy': 2,
    'Neither easy nor difficult': 3,
    'Somewhat difficult': 4,
    'Very difficult': 5
}
participants_df['difficulty'] = participants_df['How difficult was it to come up with uses for the last object?'].map(difficulty_mapping)

# Calculate means and SEM for difficulty by condition
difficulty_stats = participants_df.groupby('condition_renamed')['difficulty'].agg(['mean', 'sem']).reset_index()

# Sort the conditions
order = ['No LLM Response', 'List of Ideas', 'List of Strategies']
difficulty_stats['condition_renamed'] = pd.Categorical(difficulty_stats['condition_renamed'], categories=order, ordered=True)
difficulty_stats = difficulty_stats.sort_values('condition_renamed')

# Define color scheme and markers
colors = ['#7F7F7F', '#e57a77', '#3D65A5']  # Gray, Red, Blue
markers = ['o', 's', 'D']  # Circle, Square, Diamond

# Plot mean +- sem for difficulty
fig, ax = plt.subplots(figsize=(7, 7))

for i, condition in enumerate(difficulty_stats['condition_renamed']):
    ax.errorbar([condition], [difficulty_stats['mean'].iloc[i]], yerr=[difficulty_stats['sem'].iloc[i]],
                fmt=markers[i], capsize=10, markersize=12, color=colors[i], linewidth=3)

ax.set_title('Difficulty in Coming Up with Uses for the Last Object')
ax.set_xlabel('Condition', fontsize=14)
ax.set_ylabel('Difficulty Score', fontsize=14)

# Keep all spines and remove gridlines
ax.grid(False)  # Remove gridlines
for spine in ax.spines.values():
    spine.set_linewidth(0.5)  # Keep all spines with minimal thickness

plt.tight_layout()
plt.show()

In [None]:
# Figure 4: Originality Scores by Condition and Phase

# Rename conditions
condition_mapping = {'Absent': 'No LLM Response', 'Generate': 'List of Ideas', 'Coach': 'List of Strategies'}
responses_df['condition_renamed'] = responses_df['condition'].map(condition_mapping)

# Calculate mean and SEM for originality in llm_df (no conditions)
llm_mean = llm_df['originality'].mean()
llm_sem = llm_df['originality'].sem()

# Calculate means and SEM for originality by condition and phase
originality_stats_phase = responses_df.groupby(['condition_renamed', 'phase'])['originality'].agg(['mean', 'sem']).reset_index()

# Sort the conditions
order = ['No LLM Response', 'List of Ideas', 'List of Strategies']
originality_stats_phase['condition_renamed'] = pd.Categorical(originality_stats_phase['condition_renamed'], categories=order, ordered=True)
originality_stats_phase = originality_stats_phase.sort_values(['condition_renamed', 'phase'])

# Define color scheme, markers, linestyles, and offsets
colors = ['#7F7F7F', '#E57A77', '#3D65A5']  # Gray, Red, Blue
markers = ['o', 's', 'D']  # Circle, Square, Diamond
linestyles = ['-', '--', ':']
offsets = [-0.1, 0, 0.1]  # Offsets for each condition

# Create the plot with phases on the x-axis
fig, ax = plt.subplots(figsize=(7, 7))

# Shade the background for mean +- sem of originality from llm_df
#ax.fill_between([0.8, 2.2], llm_mean - llm_sem, llm_mean + llm_sem, color='gray', alpha=0.3, label='LLM Originality ± SEM')

# Plot error bars and connect the points for each condition
for i, condition in enumerate(order):
    condition_data = originality_stats_phase[originality_stats_phase['condition_renamed'] == condition]
    x_positions = np.array([1 + offsets[i], 2 + offsets[i]])  # Offset the x positions for each condition
    ax.errorbar(x_positions, condition_data['mean'], yerr=condition_data['sem'],
                fmt=markers[i], capsize=10, markersize=12, linestyle=linestyles[i],
                label=condition, color=colors[i], linewidth=3)  # Apply colors, linewidth, and sizes

# Set x-ticks to align with 'Practice' and 'Test' phases
ax.set_xticks([1, 2])
ax.set_xticklabels(['Exposure', 'Test'])

# Modify x and y tick labels to be bold and larger
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)

ax.set_title('Originality Scores by Condition and Phase')
ax.set_xlabel('Phase', fontsize=20)
ax.set_ylabel('Originality Score', fontsize=20)

# Add legend
ax.legend(title='Condition', loc='lower left', fontsize=14, title_fontsize=16)

# Keep all spines and remove gridlines
ax.grid(False)  # Remove gridlines
for spine in ax.spines.values():
    spine.set_linewidth(0.5)  # Keep all spines with minimal thickness

plt.tight_layout()
plt.show()

In [None]:
!pip install scikit_posthocs

In [None]:
from scipy.stats import kruskal
from scikit_posthocs import posthoc_dunn

In [None]:
# Rename conditions
condition_mapping = {'Absent': 'No LLM Response', 'Generate': 'List of Ideas', 'Coach': 'List of Strategies'}
phase_mapping = {'Practice': 'Exposure', 'Test': 'Test'}
responses_df['condition_renamed'] = responses_df['condition'].map(condition_mapping)
responses_df['phase'] = responses_df['phase'].map(phase_mapping)

# Separate data by phase (Exposure and Test)
exposure_data = responses_df[responses_df['phase'] == 'Exposure']
test_data = responses_df[responses_df['phase'] == 'Test']

In [None]:
# Kruskal-Wallis test for Exposure phase
exposure_kruskal = kruskal(
    exposure_data[exposure_data['condition_renamed'] == 'No LLM Response']['originality'],
    exposure_data[exposure_data['condition_renamed'] == 'List of Ideas']['originality'],
    exposure_data[exposure_data['condition_renamed'] == 'List of Strategies']['originality']
)

print(f"Kruskal-Wallis test result for Exposure phase: H-statistic = {exposure_kruskal.statistic}, p-value = {exposure_kruskal.pvalue}")

# Kruskal-Wallis test for Test phase
test_kruskal = kruskal(
    test_data[test_data['condition_renamed'] == 'No LLM Response']['originality'],
    test_data[test_data['condition_renamed'] == 'List of Ideas']['originality'],
    test_data[test_data['condition_renamed'] == 'List of Strategies']['originality']
)

print(f"Kruskal-Wallis test result for Test phase: H-statistic = {test_kruskal.statistic}, p-value = {test_kruskal.pvalue}")


In [None]:
# Dunn's test for pairwise comparisons with Bonferroni correction for Exposure phase
exposure_dunn = posthoc_dunn(
    exposure_data, val_col='originality', group_col='condition_renamed', p_adjust='bonferroni'
)
print("Dunn's post-hoc test for Exposure phase:")
print(exposure_dunn)

# Dunn's test for pairwise comparisons with Bonferroni correction for Test phase
test_dunn = posthoc_dunn(
    test_data, val_col='originality', group_col='condition_renamed', p_adjust='bonferroni'
)
print("Dunn's post-hoc test for Test phase:")
print(test_dunn)

In [None]:
llm_mean, llm_sem

In [None]:
# Figure 5: Average Number of Ideas by Condition and Phase

# Rename conditions
condition_mapping = {'Absent': 'No LLM Response', 'Generate': 'List of Ideas', 'Coach': 'List of Strategies'}
responses_df['condition_renamed'] = responses_df['condition'].map(condition_mapping)

# Calculate number of ideas in rounds 1, 2, 3 individually
fluency_practice = responses_df[responses_df['item_order'].isin([1, 2, 3])]
fluency_practice = fluency_practice.groupby(['condition_renamed', 'worker_id']).size().reset_index(name='fluency')
fluency_practice['phase'] = 'Practice'
fluency_practice['fluency'] = fluency_practice['fluency'] / 3  # Average across the three practice rounds

# Calculate number of ideas in round 4 (Test)
fluency_test = responses_df[responses_df['item_order'] == 4]
fluency_test = fluency_test.groupby(['condition_renamed', 'worker_id']).size().reset_index(name='fluency')
fluency_test['phase'] = 'Test'

# Combine practice and test fluency data
fluency_combined = pd.concat([fluency_practice, fluency_test])

# Calculate means and SEM for fluency by condition and phase
fluency_stats_phase = fluency_combined.groupby(['condition_renamed', 'phase'])['fluency'].agg(['mean', 'sem']).reset_index()

# Sort the conditions
order = ['No LLM Response', 'List of Ideas', 'List of Strategies']
fluency_stats_phase['condition_renamed'] = pd.Categorical(fluency_stats_phase['condition_renamed'], categories=order, ordered=True)
fluency_stats_phase = fluency_stats_phase.sort_values(['condition_renamed', 'phase'])

# Define color scheme, markers, linestyles, and offsets
colors = ['#7F7F7F', '#e57a77', '#3D65A5']  # Gray, Red, Blue
markers = ['o', 's', 'D']  # Circle, Square, Diamond
linestyles = ['-', '--', ':']
offsets = [-0.1, 0, 0.1]  # Offsets for each condition

# Create the plot for fluency by phase
fig, ax = plt.subplots(figsize=(7, 7))

# Plot error bars and connect the points for each condition
for i, condition in enumerate(order):
    condition_data = fluency_stats_phase[fluency_stats_phase['condition_renamed'] == condition]
    x_positions = np.array([1 + offsets[i], 2 + offsets[i]])  # Offset the x positions for each condition
    ax.errorbar(x_positions, condition_data['mean'], yerr=condition_data['sem'],
                fmt=markers[i], capsize=10, markersize=12, linestyle=linestyles[i],
                label=condition, color=colors[i], linewidth=3)

# Set x-ticks to align with 'Practice' and 'Test' phases
ax.set_xticks([1, 2])
ax.set_xticklabels(['Exposure', 'Test'])

# Modify x and y tick labels to be bold and larger
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)

ax.set_title('Average Number of Ideas by Condition and Phase')
ax.set_xlabel('Phase', fontsize=20)
ax.set_ylabel('Average Number of Ideas', fontsize=20)

# Add legend
# ax.legend(title='Condition', loc='upper left')

# Keep all spines and remove gridlines
ax.grid(False)  # Remove gridlines
for spine in ax.spines.values():
    spine.set_linewidth(0.5)  # Keep all spines with minimal thickness

plt.tight_layout()
plt.show()

In [None]:
# Separate data by phase (Exposure and Test)
fluency_exposure = fluency_combined[fluency_combined['phase'] == 'Practice']
fluency_test = fluency_combined[fluency_combined['phase'] == 'Test']
# Kruskal-Wallis test for Exposure (Practice) phase
fluency_kruskal_exposure = kruskal(
    fluency_exposure[fluency_exposure['condition_renamed'] == 'No LLM Response']['fluency'],
    fluency_exposure[fluency_exposure['condition_renamed'] == 'List of Ideas']['fluency'],
    fluency_exposure[fluency_exposure['condition_renamed'] == 'List of Strategies']['fluency']
)

print(f"Kruskal-Wallis test result for Exposure phase: H-statistic = {fluency_kruskal_exposure.statistic}, p-value = {fluency_kruskal_exposure.pvalue}")

# Kruskal-Wallis test for Test phase
fluency_kruskal_test = kruskal(
    fluency_test[fluency_test['condition_renamed'] == 'No LLM Response']['fluency'],
    fluency_test[fluency_test['condition_renamed'] == 'List of Ideas']['fluency'],
    fluency_test[fluency_test['condition_renamed'] == 'List of Strategies']['fluency']
)

print(f"Kruskal-Wallis test result for Test phase: H-statistic = {fluency_kruskal_test.statistic}, p-value = {fluency_kruskal_test.pvalue}")
# Dunn's test for pairwise comparisons with Bonferroni correction for Exposure phase
fluency_dunn_exposure = posthoc_dunn(
    fluency_exposure, val_col='fluency', group_col='condition_renamed', p_adjust='bonferroni'
)
print("Dunn's post-hoc test for Exposure phase:")
print(fluency_dunn_exposure)

# Dunn's test for pairwise comparisons with Bonferroni correction for Test phase
fluency_dunn_test = posthoc_dunn(
    fluency_test, val_col='fluency', group_col='condition_renamed', p_adjust='bonferroni'
)
print("Dunn's post-hoc test for Test phase:")
print(fluency_dunn_test)


In [None]:
# Figure 6: Creative Flexibility

# Rename conditions
condition_mapping = {'Absent': 'No LLM Response', 'Generate': 'List of Ideas', 'Coach': 'List of Strategies'}
responses_df['condition_renamed'] = responses_df['condition'].map(condition_mapping)
participants_df['condition_renamed'] = participants_df['condition'].map(condition_mapping)

# Rename the 'Creative Flexibility (Diversity)' column to 'creative_flexibility'
participants_df = participants_df.rename(columns={'diversity': 'creative_flexibility'})

# Adjust the creative flexibility values to a 0-100 scale
participants_df['creative_flexibility'] = participants_df['creative_flexibility'] * 100

# Calculate means and SEM for creative flexibility by condition
creative_flexibility_stats = participants_df.groupby('condition_renamed')['creative_flexibility'].agg(['mean', 'sem']).reset_index()

# Sort the conditions
order = ['No LLM Response', 'List of Ideas', 'List of Strategies']
creative_flexibility_stats['condition_renamed'] = pd.Categorical(creative_flexibility_stats['condition_renamed'], categories=order, ordered=True)
creative_flexibility_stats = creative_flexibility_stats.sort_values('condition_renamed')

# Define color scheme and markers
colors = ['#7F7F7F', '#e57a77', '#3D65A5']  # Gray, Red, Blue
markers = ['o', 's', 'D']  # Circle, Square, Diamond
offsets = [-0.1, 0, 0.1]  # Offsets for each condition

# Create the plot for creative flexibility with a zoomed-in y-axis
fig, ax = plt.subplots(figsize=(7, 7))

# Plot error bars and points for each condition with offset
for i, condition in enumerate(order):
    x_position = i + 1 + offsets[i]  # Apply the offset to the x positions
    ax.errorbar(x_position, creative_flexibility_stats.loc[creative_flexibility_stats['condition_renamed'] == condition, 'mean'].values[0],
                yerr=creative_flexibility_stats.loc[creative_flexibility_stats['condition_renamed'] == condition, 'sem'].values[0],
                fmt=markers[i], capsize=10, markersize=12, color=colors[i], linewidth=3)

# Set title and labels
# ax.set_title('Creative Flexibility Across Conditions')
ax.set_xlabel('Condition', fontsize=20)
ax.set_ylabel('Creative Flexibility', fontsize=20)

# Modify x and y tick labels to be bold and larger
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)

# Set x-ticks to the condition names
ax.set_xticks(range(1, len(order) + 1))
ax.set_xticklabels(order)


# Keep all spines and remove gridlines
ax.grid(False)  # Remove gridlines
for spine in ax.spines.values():
    spine.set_linewidth(0.5)  # Keep all spines with minimal thickness

plt.tight_layout()
plt.show()

In [None]:
# Rename conditions
condition_mapping = {'Absent': 'No LLM Response', 'Generate': 'List of Ideas', 'Coach': 'List of Strategies'}
responses_df['condition_renamed'] = responses_df['condition'].map(condition_mapping)
# Optionally, drop any rows with NaN values
participants_df = participants_df.dropna(subset=['creative_flexibility'])
participants_df['condition_renamed'] = participants_df['condition'].map(condition_mapping)
# Kruskal-Wallis test for Creative Flexibility
kruskal_flexibility = kruskal(
    participants_df[participants_df['condition_renamed'] == 'No LLM Response']['creative_flexibility'],
    participants_df[participants_df['condition_renamed'] == 'List of Ideas']['creative_flexibility'],
    participants_df[participants_df['condition_renamed'] == 'List of Strategies']['creative_flexibility']
)

print(f"Kruskal-Wallis test result: H-statistic = {kruskal_flexibility.statistic}, p-value = {kruskal_flexibility.pvalue}")
# Dunn's test for pairwise comparisons with Bonferroni correction
dunn_flexibility = posthoc_dunn(
    participants_df, val_col='creative_flexibility', group_col='condition_renamed', p_adjust='bonferroni'
)
print("Dunn's post-hoc test for Creative Flexibility:")
print(dunn_flexibility)

In [None]:
!pip install sentence-transformers

In [None]:
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine

In [None]:
# Load the SentenceTransformer model
model = SentenceTransformer('sentence-transformers/bert-base-nli-max-tokens')

In [None]:
# Rename conditions
condition_mapping = {'Absent': 'No LLM Response', 'Generate': 'List of Ideas', 'Coach': 'List of Strategies'}
responses_df['condition_renamed'] = responses_df['condition'].map(condition_mapping)

# Function to calculate pairwise cosine distances and return median, max, and min
def calculate_cosine_distance_stats(responses):
    if len(responses) < 2:
        return np.nan, np.nan, np.nan
    embeddings = model.encode(responses)
    distances = []
    for i in range(len(embeddings)):
        for j in range(i + 1, len(embeddings)):
            distances.append(cosine(embeddings[i], embeddings[j]))
    return np.median(distances) * 100, np.max(distances) * 100, np.min(distances) * 100  # Scale to 0-100

In [None]:
from matplotlib.lines import Line2D

In [None]:
# Figure 7: Individual Diversity of Ideas by Condition and Phase 

# Calculate diversity for each worker, condition, and item order (keeping only the median distance)
diversity_results = []
for (worker_id, condition, item_order), group in responses_df.groupby(['worker_id', 'condition', 'item_order']):
    median_dist, _, _ = calculate_cosine_distance_stats(group['response'].tolist())
    phase = 'Practice' if item_order in [1, 2, 3] else 'Test'
    diversity_results.append({
        'worker_id': worker_id,
        'condition': condition,
        'phase': phase,
        'item_order': item_order,
        'median_dist': median_dist
    })

diversity_stats = pd.DataFrame(diversity_results)

# Rename the condition column
diversity_stats['condition_renamed'] = diversity_stats['condition'].map(condition_mapping)

# Filter out any rows with NaN values
diversity_stats = diversity_stats.dropna(subset=['median_dist'])

# Average the diversity measures for practice rounds for each participant
diversity_stats_practice = diversity_stats[diversity_stats['phase'] == 'Practice'].groupby(['worker_id', 'condition_renamed']).agg({
    'median_dist': 'mean'
}).reset_index()
diversity_stats_practice['phase'] = 'Practice'

# Combine the averaged practice results with the test results
diversity_stats_test = diversity_stats[diversity_stats['phase'] == 'Test']
diversity_combined = pd.concat([diversity_stats_practice, diversity_stats_test])

# Calculate mean and SEM for each diversity measure by condition and phase
diversity_stats_summary = diversity_combined.groupby(['condition_renamed', 'phase']).agg({
    'median_dist': ['mean', 'sem']
}).reset_index()

# Flatten the MultiIndex columns
diversity_stats_summary.columns = ['condition_renamed', 'phase', 'mean_median_dist', 'sem_median_dist']

# Sort the conditions
order = ['No LLM Response', 'List of Ideas', 'List of Strategies']
diversity_stats_summary['condition_renamed'] = pd.Categorical(diversity_stats_summary['condition_renamed'], categories=order, ordered=True)
diversity_stats_summary = diversity_stats_summary.sort_values(['condition_renamed', 'phase'])

# Create the plot for diversity by phase
fig, ax = plt.subplots(figsize=(7, 7))

# Define markers, linestyles, and offsets
markers = ['o', 's', 'D']
linestyles = ['-']  # Solid for Median
colors = ['#7F7F7F', '#e57a77', '#3D65A5']  # Gray, Red, Blue (consistent with previous plots)
offsets = [-0.1, 0, 0.1]  # Offsets for each condition

# Plot Median Pairwise Cosine Distance (Mean ± SEM)
for i, condition in enumerate(order):
    condition_data = diversity_stats_summary[diversity_stats_summary['condition_renamed'] == condition]
    x_positions = np.array([1 + offsets[i], 2 + offsets[i]])  # Offset the x positions
    ax.errorbar(x_positions, condition_data['mean_median_dist'], yerr=condition_data['sem_median_dist'],
                fmt=markers[i], capsize=10, markersize=12, linestyle=linestyles[0], label=f'{condition} (Median)', color=colors[i], linewidth=3)

# Set x-ticks to align with 'Exposure' and 'Test' phases
ax.set_xticks([1, 2])
ax.set_xticklabels(['Exposure', 'Test'])

# Set title and labels
ax.set_title('Individual Diversity of Ideas by Condition and Phase (Median Only)')
ax.set_xlabel('Phase', fontsize=20)
ax.set_ylabel('Pairwise Cosine Distance', fontsize=20) # Removed (Mean ± SEM)

# Modify x and y tick labels to be bold and larger
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)

# Add legend
legend_elements = [
    Line2D([0], [0], marker='o', color='#7F7F7F', label='No LLM Response (Median)', linestyle='-', markersize=12),
    Line2D([0], [0], marker='s', color='#e57a77', label='List of Ideas (Median)', linestyle='-', markersize=12),
    Line2D([0], [0], marker='D', color='#E5C454', label='List of Strategies (Median)', linestyle='-', markersize=12)
]
# ax.legend(handles=legend_elements, title='Condition (Median)', loc='upper left', bbox_to_anchor=(1, 1))

# Keep all spines and remove gridlines
ax.grid(False)  # Remove gridlines
for spine in ax.spines.values():
    spine.set_linewidth(0.5)  # Keep all spines with minimal thickness

plt.tight_layout()
plt.show()

In [None]:
# Kruskal-Wallis and Dunn's Test 

# Separate the data for Practice and Test phases
diversity_practice = diversity_combined[diversity_combined['phase'] == 'Practice']
diversity_test = diversity_combined[diversity_combined['phase'] == 'Test']

# Kruskal-Wallis test for Practice phase
kruskal_practice = kruskal(
    diversity_practice[diversity_practice['condition_renamed'] == 'No LLM Response']['median_dist'],
    diversity_practice[diversity_practice['condition_renamed'] == 'List of Ideas']['median_dist'],
    diversity_practice[diversity_practice['condition_renamed'] == 'List of Strategies']['median_dist']
)
print(f"Kruskal-Wallis test result for Practice phase: H-statistic = {kruskal_practice.statistic}, p-value = {kruskal_practice.pvalue}")

# Kruskal-Wallis test for Test phase
kruskal_test = kruskal(
    diversity_test[diversity_test['condition_renamed'] == 'No LLM Response']['median_dist'],
    diversity_test[diversity_test['condition_renamed'] == 'List of Ideas']['median_dist'],
    diversity_test[diversity_test['condition_renamed'] == 'List of Strategies']['median_dist']
)
print(f"Kruskal-Wallis test result for Test phase: H-statistic = {kruskal_test.statistic}, p-value = {kruskal_test.pvalue}")

# Dunn's test for Practice phase with Bonferroni correction
dunn_practice = posthoc_dunn(
    diversity_practice, val_col='median_dist', group_col='condition_renamed', p_adjust='bonferroni'
)
print("Dunn's post-hoc test for Practice phase:")
print(dunn_practice)

# Dunn's test for Test phase with Bonferroni correction
dunn_test = posthoc_dunn(
    diversity_test, val_col='median_dist', group_col='condition_renamed', p_adjust='bonferroni'
)
print("Dunn's post-hoc test for Test phase:")
print(dunn_test)

In [None]:
import random

In [None]:
# Monte Carlo Simulations

# Rename conditions
condition_mapping = {'Absent': 'No LLM Response', 'Generate': 'List of Ideas', 'Coach': 'List of Strategies'}
responses_df['condition_renamed'] = responses_df['condition'].map(condition_mapping)

# Function to calculate pairwise cosine distances for a set of ideas
def calculate_pairwise_distances(responses):
    embeddings = model.encode(responses)
    distances = []
    for i in range(len(embeddings)):
        for j in range(i + 1, len(embeddings)):
            distances.append(cosine(embeddings[i], embeddings[j]))
    return distances

# Function to generate Monte Carlo samples and calculate diversity measures
def monte_carlo_diversity(responses_df, n_samples=7, seed=None):
    if seed is not None:
        random.seed(seed)
    # Sample a specific "item_name" first, then sample 7 ideas for that item
    unique_items = list(set(responses_df['item_name']))
    sampled_item = random.choice(unique_items)
    sampled_responses = responses_df[responses_df['item_name'] == sampled_item]['response'].tolist()
    sampled_responses = random.sample(sampled_responses, min(len(sampled_responses), n_samples))
    distances = calculate_pairwise_distances(sampled_responses)
    if not distances:
        return np.nan, np.nan, np.nan, np.nan
    return np.mean(distances) * 100, np.median(distances) * 100, np.max(distances) * 100, np.min(distances) * 100

# Generate Monte Carlo samples and calculate diversity measures for each combination
monte_carlo_results = []
for (condition, phase, item_order), group in responses_df.groupby(['condition_renamed', 'phase', 'item_order']):
    for seed in range(150):
        mean_dist, median_dist, max_dist, min_dist = monte_carlo_diversity(group, seed=seed)
        monte_carlo_results.append({'condition': condition, 'phase': phase, 'item_order': item_order,
                                    'mean_dist': mean_dist, 'median_dist': median_dist,
                                    'max_dist': max_dist, 'min_dist': min_dist})

monte_carlo_df = pd.DataFrame(monte_carlo_results)

# Calculate mean and SEM for each diversity measure by condition and phase
diversity_measures_summary = monte_carlo_df.groupby(['condition', 'phase']).agg(
    mean_mean_dist=('mean_dist', 'mean'),
    sem_mean_dist=('mean_dist', 'sem'),
    mean_median_dist=('median_dist', 'mean'),
    sem_median_dist=('median_dist', 'sem'),
    mean_max_dist=('max_dist', 'mean'),
    sem_max_dist=('max_dist', 'sem'),
    mean_min_dist=('min_dist', 'mean'),
    sem_min_dist=('min_dist', 'sem')
).reset_index()

# Sort the conditions
order = ['No LLM Response', 'List of Ideas', 'List of Strategies']
diversity_measures_summary['condition'] = pd.Categorical(diversity_measures_summary['condition'], categories=order, ordered=True)
diversity_measures_summary = diversity_measures_summary.sort_values(['condition', 'phase'])

In [None]:
# Figure 8: Group Diversity by Condition and Phase

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

# Define color-blind-friendly colors, markers, and offsets for consistency
colors = ['#7F7F7F', '#e57a77', '#3D65A5']  # Gray, Red, Yellow
markers = ['o', 's', 'D']  # Circle, Square, Diamond
linestyles = ['-']  # Solid for Median
offsets = [-0.1, 0, 0.1]  # Small offsets to separate the conditions

# Create the plot
fig, ax = plt.subplots(figsize=(7, 7))

# Plot Mean of Median Pairwise Cosine Distance
for i, condition in enumerate(order):
    condition_data = diversity_measures_summary[diversity_measures_summary['condition'] == condition]
    ax.errorbar(x=np.arange(len(condition_data['phase'])) + offsets[i],
                y=condition_data['mean_median_dist'],
                yerr=condition_data['sem_median_dist'],
                fmt=markers[i], capsize=10, markersize=12, linestyle=linestyles[0],
                label=f'{condition} (Median)', color=colors[i], linewidth=3)

# Set x-ticks to align with 'Practice' and 'Test' phases
ax.set_xticks([0, 1])
ax.set_xticklabels(['Exposure', 'Test'])

# Set title and labels
ax.set_title('Group Diversity (Median) by Condition and Phase')
ax.set_xlabel('Phase', fontsize=20)
ax.set_ylabel('Pairwise Cosine Distance', fontsize=20)

# Modify x and y tick labels to be bold and larger
ax.tick_params(axis='x', labelsize=16)
ax.tick_params(axis='y', labelsize=16)

# Adjust legend to differentiate by condition
legend_elements = [
    Line2D([0], [0], marker='o', color=colors[0], label='No LLM Response (Median)', linestyle='-', markersize=12),
    Line2D([0], [0], marker='s', color=colors[1], label='List of Ideas (Median)', linestyle='-', markersize=12),
    Line2D([0], [0], marker='D', color=colors[2], label='List of Strategies (Median)', linestyle='-', markersize=12),
]

# ax.legend(handles=legend_elements, title='Condition (Median)', loc='upper left', bbox_to_anchor=(1, 1))

# Keep all spines and remove gridlines
ax.grid(False)  # Remove gridlines
for spine in ax.spines.values():
    spine.set_linewidth(0.5)  # Keep all spines with minimal thickness

plt.tight_layout()
plt.show()

In [None]:
# Kruskal-Wallis and Dunn's Test 

# Separate the data for Exposure and Test phases
diversity_exposure = diversity_measures_summary[diversity_measures_summary['phase'] == 'Exposure']
diversity_test = diversity_measures_summary[diversity_measures_summary['phase'] == 'Test']
# Kruskal-Wallis test for Exposure phase
kruskal_exposure = kruskal(
    diversity_exposure[diversity_exposure['condition'] == 'No LLM Response']['mean_median_dist'],
    diversity_exposure[diversity_exposure['condition'] == 'List of Ideas']['mean_median_dist'],
    diversity_exposure[diversity_exposure['condition'] == 'List of Strategies']['mean_median_dist']
)
print(f"Kruskal-Wallis test result for Exposure phase: H-statistic = {kruskal_exposure.statistic}, p-value = {kruskal_exposure.pvalue}")

# Kruskal-Wallis test for Test phase
kruskal_test = kruskal(
    diversity_test[diversity_test['condition'] == 'No LLM Response']['mean_median_dist'],
    diversity_test[diversity_test['condition'] == 'List of Ideas']['mean_median_dist'],
    diversity_test[diversity_test['condition'] == 'List of Strategies']['mean_median_dist']
)
print(f"Kruskal-Wallis test result for Test phase: H-statistic = {kruskal_test.statistic}, p-value = {kruskal_test.pvalue}")
# Dunn's test for Exposure phase
if kruskal_exposure.pvalue < 0.05:
    dunn_exposure = posthoc_dunn(
        diversity_exposure, val_col='mean_median_dist', group_col='condition', p_adjust='bonferroni'
    )
    print("Dunn's post-hoc test for Exposure phase:")
    print(dunn_exposure)

# Dunn's test for Test phase
if kruskal_test.pvalue < 0.05:
    dunn_test = posthoc_dunn(
        diversity_test, val_col='mean_median_dist', group_col='condition', p_adjust='bonferroni'
    )
    print("Dunn's post-hoc test for Test phase:")
    print(dunn_test)

In [None]:
# Exclude conditions with constant values
diversity_exposure_filtered = diversity_exposure.groupby('condition').filter(lambda x: len(x['mean_median_dist'].unique()) > 1)

# Then re-run the Kruskal-Wallis test
kruskal_exposure_filtered = kruskal(
    diversity_exposure_filtered[diversity_exposure_filtered['condition'] == 'No LLM Response']['mean_median_dist'],
    diversity_exposure_filtered[diversity_exposure_filtered['condition'] == 'List of Ideas']['mean_median_dist'],
    diversity_exposure_filtered[diversity_exposure_filtered['condition'] == 'List of Strategies']['mean_median_dist']
)
print(f"Filtered Kruskal-Wallis test result for Exposure phase: H-statistic = {kruskal_exposure_filtered.statistic}, p-value = {kruskal_exposure_filtered.pvalue}")

In [None]:
# Re-run Kruskal-Wallis test after handling issues
kruskal_exposure = kruskal(
    diversity_exposure[diversity_exposure['condition'] == 'No LLM Response']['mean_median_dist'],
    diversity_exposure[diversity_exposure['condition'] == 'List of Ideas']['mean_median_dist'],
    diversity_exposure[diversity_exposure['condition'] == 'List of Strategies']['mean_median_dist']
)
print(f"Kruskal-Wallis test result for Exposure phase: H-statistic = {kruskal_exposure.statistic}, p-value = {kruskal_exposure.pvalue}")