# Final Decision

In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set the general style and theme
sns.set_theme(style="darkgrid", font="Arial", font_scale=1.2)
plt.rcParams['figure.dpi'] = 300

# Load and process the data
df = pd.read_csv('../data/finalDecision/finalDecision.csv')

# Remove null values
df = df.dropna(subset=['task', 'finalDecision'])

# Calculate percentages for each task and decision
def calculate_percentages(group):
    total = len(group)
    return group.value_counts() / total * 100

percentages = df.groupby('task')['finalDecision'].apply(calculate_percentages).reset_index()
percentages.columns = ['task', 'finalDecision', 'percentage']

# Create the visualization
plt.figure(figsize=(14, 8))

# Process each task separately - now in reverse order
tasks = sorted(df['task'].unique(), reverse=True)  # Reverse the order
task_spacing = 0.5
current_position = 0

# Color mapping to ensure consistent colors for each decision
color_map = {1: '#FF6B6B', 2: '#4ECDC4', 3: '#45B7D1'}

# Track which decisions have been added to legend
legend_added = set()

for task in tasks:  # Will now process Task 2 first, then Task 1
    task_data = percentages[percentages['task'] == task].sort_values('finalDecision')
    
    left = 0
    for _, row in task_data.iterrows():
        decision = int(row['finalDecision'])
        color = color_map[decision]
        
        # Create the bar
        plt.barh(current_position, row['percentage'], left=left, 
                color=color, height=0.3)
        
        # Add percentage label in the center of each bar
        center = left + row['percentage'] / 2
        plt.text(center, current_position, f'{row["percentage"]:.1f}%',
                ha='center', va='center',
                color='white', fontweight='bold', fontsize=10)
        
        # Add to legend only if this decision hasn't been added yet
        if decision not in legend_added:
            plt.barh(current_position, 0, color=color,
                    label=f'Candidate {decision}')
            legend_added.add(decision)
        
        left += row['percentage']
    
    # Add task label
    plt.text(-5, current_position, f'Task {int(task)}',
            ha='right', va='center',
            color='darkslategray', fontweight='bold', fontsize=12)
    
    current_position += task_spacing

# Customize the plot
plt.title('Distribution of Final Decisions by Task (%)', 
         pad=20, fontsize=14, color='darkslategray')
plt.xlabel('Percentage', fontsize=12, color='darkslategray', labelpad=10)

# Adjust the plot layout
plt.xlim(-10, 100)
plt.ylim(-0.5, len(tasks) * task_spacing)

# Remove y-axis ticks as we have custom labels
plt.yticks([])

# Add grid only for x-axis
plt.grid(axis='x', linestyle='--', alpha=0.5)

# Customize legend with sorted order
handles, labels = plt.gca().get_legend_handles_labels()
# Sort by candidate number
labels, handles = zip(*sorted(zip(labels, handles), 
                            key=lambda t: int(t[0].split()[-1])))
plt.legend(handles, labels,
          loc='upper center', bbox_to_anchor=(0.5, -0.15),
          ncol=3, fontsize=10, frameon=False)

# Adjust layout and save
plt.tight_layout()
plt.savefig('../dataVisualization/finalDecision/final_decision_distribution.png', 
            dpi=300, bbox_inches='tight', facecolor='white')
plt.close()

# Print the exact percentages for verification
print("\nPercentages for each task and decision:")
for task in reversed(sorted(df['task'].unique())):  # Print in the same order as visualization
    task_data = percentages[percentages['task'] == task].sort_values('finalDecision')
    print(f"\nTask {int(task)}:")
    for _, row in task_data.iterrows():
        print(f"Candidate {int(row['finalDecision'])}: {row['percentage']:.1f}%")


Percentages for each task and decision:

Task 2:
Candidate 1: 83.3%
Candidate 3: 16.7%

Task 1:
Candidate 1: 79.2%
Candidate 2: 12.5%
Candidate 3: 8.3%
