# Data Visualization

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv('../data/selfReported/selfReportedImmersion.csv')

# Set the style
sns.set_style('darkgrid')

# Reshape data for Task 1 (T1P measures)
t1_data = pd.melt(data, 
                  id_vars=['role'],
                  value_vars=['T1P1', 'T1P2', 'T1P3'],
                  var_name='Measure',
                  value_name='Score')

# Reshape data for Task 2 (T2V measures)
t2_data = pd.melt(data,
                  id_vars=['role'],
                  value_vars=['T2V1', 'T2V2', 'T2V3'],
                  var_name='Measure',
                  value_name='Score')

# Create and save Task 1 plot
plt.figure(figsize=(3, 4))
sns.boxplot(data=t1_data,
            x='role',
            y='Score',
            hue='Measure',
            palette='Set2')

# plt.title('Task 1 Immersion Scores by Role', fontsize=14)
plt.xlabel('Role', fontsize=10)
plt.ylabel('Preference', fontsize=11)
plt.ylim(0.5, 7.5)

# Adjust legend for Task 1
handles, labels = plt.gca().get_legend_handles_labels()

new_labels = ['Profile 1', 'Profile 2', 'Profile 3']
plt.legend(handles, new_labels, title='', fontsize=7, bbox_to_anchor=(0.5, -0.13), 
          loc='upper center', ncol=3, frameon=False)

# Save Task 1 plot
plt.savefig('../dataVisualization/selfReportedData_immersion/task1_immersion_scores.pdf', dpi=300, bbox_inches='tight')
plt.close()  # Close the figure to free memory

# Create and save Task 2 plot
plt.figure(figsize=(3, 4))
sns.boxplot(data=t2_data,
            x='role',
            y='Score',
            hue='Measure',
            palette='Set2')

# plt.title('Task 2 Immersion Scores by Role', fontsize=14)
plt.xlabel('Role', fontsize=10)
plt.ylabel('Preference', fontsize=10)
plt.ylim(0.5, 7.5)

# Adjust legend for Task 2
handles, labels = plt.gca().get_legend_handles_labels()
new_labels = ['Company 1', 'Company 2', 'Company 3']
plt.legend(handles,new_labels, title='', fontsize=7, bbox_to_anchor=(0.5, -0.13), 
          loc='upper center', ncol=3, frameon=False)

# Save Task 2 plot
plt.savefig('../dataVisualization/selfReportedData_immersion/task2_immersion_scores.pdf', dpi=300, bbox_inches='tight')
plt.close()

# Print summary statistics
print("\nTask 1 Summary Statistics:")
print(t1_data.groupby(['role', 'Measure'])['Score'].describe())
print("\nTask 2 Summary Statistics:")
print(t2_data.groupby(['role', 'Measure'])['Score'].describe())


Task 1 Summary Statistics:
                count      mean       std  min   25%  50%   75%  max
role   Measure                                                      
Junior T1P1      24.0  2.416667  1.886258  1.0  1.00  2.0  2.25  7.0
       T1P2      24.0  5.958333  1.680558  1.0  6.00  6.0  7.00  7.0
       T1P3      24.0  3.375000  1.929463  1.0  1.00  3.5  5.00  7.0
Senior T1P1      72.0  5.277778  2.208959  1.0  3.75  6.0  7.00  7.0
       T1P2      72.0  4.013889  2.024450  1.0  2.00  4.5  6.00  7.0
       T1P3      72.0  2.486111  1.719753  1.0  1.00  2.0  3.00  6.0

Task 2 Summary Statistics:
                count      mean       std  min   25%  50%   75%  max
role   Measure                                                      
Junior T2V1      24.0  2.916667  2.083406  1.0  1.00  2.0  5.00  7.0
       T2V2      24.0  5.000000  1.888178  1.0  3.75  5.5  6.25  7.0
       T2V3      24.0  4.708333  1.573674  1.0  4.00  5.0  6.00  7.0
Senior T2V1      72.0  5.875000  1.582808  1.0 