In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set style for better-looking plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Set figure size for better display
plt.rcParams['figure.figsize'] = (12, 8)

In [4]:
# Load the results from the genetic algorithm
try:
    results_df = pd.read_csv('matching_ergebnis_new_format.csv')
    print("Loaded results from CSV file")
    print(f"Total assignments: {len(results_df)}")
except FileNotFoundError:
    print("Results file not found. Please run the genetic algorithm first.")
    results_df = None

# Load original data for comparison
try:
    personnel_df, project_df = parse_datenbank_data("../data/datenbank.txt")
    print(f"Loaded {len(personnel_df)} personnel records")
    print(f"Loaded {len(project_df)} project tasks")
except Exception as e:
    print(f"Error loading original data: {e}")
    personnel_df, project_df = None, None

Results file not found. Please run the genetic algorithm first.
Error loading original data: name 'parse_datenbank_data' is not defined


In [5]:
if results_df is not None and personnel_df is not None and project_df is not None:
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 12))
    
    # 1. Assignment Status
    assigned_count = len(results_df[results_df['person_id'] != 'Unassigned'])
    unassigned_count = len(results_df[results_df['person_id'] == 'Unassigned'])
    
    ax1.pie([assigned_count, unassigned_count], 
            labels=['Assigned', 'Unassigned'], 
            autopct='%1.1f%%',
            colors=['lightgreen', 'lightcoral'],
            startangle=90)
    ax1.set_title('Project Assignment Status', fontsize=14, fontweight='bold')
    
    # 2. Competency Match Distribution
    competency_matches = results_df['competency_match'].value_counts()
    ax2.pie(competency_matches.values, 
            labels=['Competency Match', 'No Match'] if competency_matches.index[0] else ['No Match', 'Competency Match'],
            autopct='%1.1f%%',
            colors=['lightblue', 'lightyellow'],
            startangle=90)
    ax2.set_title('Competency Match Distribution', fontsize=14, fontweight='bold')
    
    # 3. Person-Months Distribution
    ax3.hist(results_df['person_months'], bins=10, color='skyblue', alpha=0.7, edgecolor='black')
    ax3.set_xlabel('Person-Months')
    ax3.set_ylabel('Number of Projects')
    ax3.set_title('Distribution of Person-Months per Project', fontsize=14, fontweight='bold')
    ax3.grid(True, alpha=0.3)
    
    # 4. Monthly Effort Distribution
    ax4.hist(results_df['monthly_effort'], bins=10, color='lightgreen', alpha=0.7, edgecolor='black')
    ax4.set_xlabel('Monthly Effort (Person-Months)')
    ax4.set_ylabel('Number of Projects')
    ax4.set_title('Distribution of Monthly Effort', fontsize=14, fontweight='bold')
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

In [6]:
if results_df is not None:
    # Create timeline visualization
    fig, ax = plt.subplots(figsize=(16, 10))
    
    # Convert dates to datetime for plotting
    results_df['start_dt'] = pd.to_datetime(results_df['start_date'], format='%d.%m.%y')
    results_df['end_dt'] = pd.to_datetime(results_df['end_date'], format='%d.%m.%y')
    
    # Create color map for different people
    unique_people = results_df['person_id'].unique()
    colors = plt.cm.Set3(np.linspace(0, 1, len(unique_people)))
    color_map = dict(zip(unique_people, colors))
    
    # Plot each project as a horizontal bar
    for idx, row in results_df.iterrows():
        if row['person_id'] != 'Unassigned':
            color = color_map[row['person_id']]
            ax.barh(y=row['task_name'], 
                   width=(row['end_dt'] - row['start_dt']).days,
                   left=row['start_dt'],
                   color=color,
                   alpha=0.7,
                   edgecolor='black',
                   linewidth=0.5)
    
    # Customize the plot
    ax.set_xlabel('Timeline', fontsize=12)
    ax.set_ylabel('Projects', fontsize=12)
    ax.set_title('Project Timeline and Assignments', fontsize=16, fontweight='bold')
    
    # Format x-axis to show dates nicely
    ax.xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator(interval=2))
    ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%b %Y'))
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
    
    # Add legend
    legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color_map[person], 
                                   label=f"Person {person}") 
                      for person in unique_people if person != 'Unassigned']
    ax.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(1.15, 1))
    
    plt.tight_layout()
    plt.show()

In [7]:
if results_df is not None and personnel_df is not None:
    # Calculate workload per person
    workload_per_person = results_df[results_df['person_id'] != 'Unassigned'].groupby('person_id').agg({
        'person_months': 'sum',
        'monthly_effort': 'sum'
    }).reset_index()
    
    # Merge with personnel data to get names
    workload_per_person = workload_per_person.merge(
        personnel_df[['id', 'name']], 
        left_on='person_id', 
        right_on='id', 
        how='left'
    )
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    # 1. Total Person-Months per Person
    bars1 = ax1.bar(workload_per_person['name'], workload_per_person['person_months'], 
                    color='lightcoral', alpha=0.7, edgecolor='black')
    ax1.set_xlabel('Person')
    ax1.set_ylabel('Total Person-Months')
    ax1.set_title('Workload Distribution per Person', fontsize=14, fontweight='bold')
    ax1.tick_params(axis='x', rotation=45)
    
    # Add value labels on bars
    for bar in bars1:
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height + 0.1,
                f'{height:.1f}', ha='center', va='bottom')
    
    # 2. Monthly Effort per Person
    bars2 = ax2.bar(workload_per_person['name'], workload_per_person['monthly_effort'], 
                    color='lightblue', alpha=0.7, edgecolor='black')
    ax2.set_xlabel('Person')
    ax2.set_ylabel('Total Monthly Effort')
    ax2.set_title('Monthly Effort Distribution per Person', fontsize=14, fontweight='bold')
    ax2.tick_params(axis='x', rotation=45)
    
    # Add value labels on bars
    for bar in bars2:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height + 0.1,
                f'{height:.1f}', ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()

In [8]:
if results_df is not None and personnel_df is not None:
    # Analyze competency matches
    competency_analysis = results_df[results_df['person_id'] != 'Unassigned'].groupby('assigned_competency').agg({
        'competency_match': 'sum',
        'person_months': 'sum'
    }).reset_index()
    
    competency_analysis['total_projects'] = results_df[results_df['person_id'] != 'Unassigned'].groupby('assigned_competency').size().values
    competency_analysis['match_rate'] = competency_analysis['competency_match'] / competency_analysis['total_projects'] * 100
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    # 1. Competency Match Rate
    bars1 = ax1.bar(competency_analysis['assigned_competency'], competency_analysis['match_rate'], 
                    color='lightgreen', alpha=0.7, edgecolor='black')
    ax1.set_xlabel('Competency')
    ax1.set_ylabel('Match Rate (%)')
    ax1.set_title('Competency Match Rate by Type', fontsize=14, fontweight='bold')
    ax1.set_ylim(0, 100)
    
    # Add value labels on bars
    for bar in bars1:
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height + 2,
                f'{height:.1f}%', ha='center', va='bottom')
    
    # 2. Person-Months by Competency
    bars2 = ax2.bar(competency_analysis['assigned_competency'], competency_analysis['person_months'], 
                    color='orange', alpha=0.7, edgecolor='black')
    ax2.set_xlabel('Competency')
    ax2.set_ylabel('Total Person-Months')
    ax2.set_title('Total Workload by Competency', fontsize=14, fontweight='bold')
    
    # Add value labels on bars
    for bar in bars2:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height + 0.1,
                f'{height:.1f}', ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()

In [9]:
if results_df is not None:
    # Create a formatted results table
    print("Detailed Assignment Results:")
    print("="*80)
    
    # Sort by start date
    results_sorted = results_df.sort_values('start_date')
    
    for idx, row in results_sorted.iterrows():
        status = "✓" if row['competency_match'] else "✗"
        assignment_status = "Assigned" if row['person_id'] != 'Unassigned' else "Unassigned"
        
        print(f"{idx+1:2d}. {row['task_name']:<20} → {row['name']:<20} "
              f"{status} ({assignment_status})")
        print(f"     Duration: {row['start_date']} - {row['end_date']} "
              f"({row['duration_months']} months, {row['person_months']:.1f} PM)")
    
    print(f"\nSummary Statistics:")
    print(f"Total projects: {len(results_df)}")
    print(f"Assigned projects: {len(results_df[results_df['person_id'] != 'Unassigned'])}")
    print(f"Unassigned projects: {len(results_df[results_df['person_id'] == 'Unassigned'])}")
    print(f"Competency matches: {results_df['competency_match'].sum()}")
    print(f"Total person-months: {results_df['person_months'].sum():.1f}")
    
    if len(results_df[results_df['person_id'] != 'Unassigned']) > 0:
        match_rate = results_df['competency_match'].sum() / len(results_df[results_df['person_id'] != 'Unassigned']) * 100
        print(f"Competency match rate: {match_rate:.1f}%")

In [10]:
# Save the main visualizations as PNG files
if results_df is not None:
    # Save timeline visualization
    fig, ax = plt.subplots(figsize=(16, 10))
    
    # Convert dates to datetime for plotting
    results_df['start_dt'] = pd.to_datetime(results_df['start_date'], format='%d.%m.%y')
    results_df['end_dt'] = pd.to_datetime(results_df['end_date'], format='%d.%m.%y')
    
    # Create color map for different people
    unique_people = results_df['person_id'].unique()
    colors = plt.cm.Set3(np.linspace(0, 1, len(unique_people)))
    color_map = dict(zip(unique_people, colors))
    
    # Plot each project as a horizontal bar
    for idx, row in results_df.iterrows():
        if row['person_id'] != 'Unassigned':
            color = color_map[row['person_id']]
            ax.barh(y=row['task_name'], 
                   width=(row['end_dt'] - row['start_dt']).days,
                   left=row['start_dt'],
                   color=color,
                   alpha=0.7,
                   edgecolor='black',
                   linewidth=0.5)
    
    ax.set_xlabel('Timeline', fontsize=12)
    ax.set_ylabel('Projects', fontsize=12)
    ax.set_title('Project Timeline and Assignments', fontsize=16, fontweight='bold')
    
    ax.xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator(interval=2))
    ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%b %Y'))
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
    
    legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color_map[person], 
                                   label=f"Person {person}") 
                      for person in unique_people if person != 'Unassigned']
    ax.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(1.15, 1))
    
    plt.tight_layout()
    plt.savefig('project_timeline.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("Visualizations saved as 'project_timeline.png'")