In [1]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import numpy as np
import sys


In [2]:
class WorkloadAnalyzer:
    def __init__(self):
        
        self.teams = {
            'Leadershiptasks': ['Ahmad_Al-Rashid', 'Michael_Anderson', 'Fatima_Al-Qassim'],
            'projectalpha': ['William_Thompson', 'Sarah_Mitchell', 'Omar_Al-Sayed', 'Mariam_Al-Hashimi'],
            'projectbeta': ['Hassan_Al-Mahmoud', 'Elizabeth_Parker', 'James_Cooper', 'Noura_Al-Zahrani'],
            'projectgamma': ['Mohammed_Al-Qahtani', 'Katherine_Wilson', 'Robert_Stevens', 'Leila_Al-Harbi'],
            'operations': ['Khalid_Al-Nasser', 'Jennifer_Brooks', 'Aisha_Al-Dubai'],
            'technical': ['Thomas_Richardson', 'Ibrahim_Al-Kuwaiti']
        }
        
        self.colors = plt.cm.Set3(np.linspace(0, 1, 10))
        self.current_date = datetime.now()
        self.month_year = self.current_date.strftime('%Y_%m')

    def load_data(self):
        """Load all JSON files and create DataFrames"""
        all_data = []
        
        
        base_dir = 'tasks'
        
        for team, members in self.teams.items():
            team_path = os.path.join(base_dir, team)
            
            for member in members:
                file_path = os.path.join(team_path, f"{member}_{self.month_year}.json")
                
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        tasks = json.load(f)
                        
                        
                        df = pd.DataFrame(tasks)
                        df['team'] = team
                        df['person'] = member.replace('_', ' ')
                        df['date'] = pd.to_datetime(df['date'])
                        
                        all_data.append(df)
                except Exception as e:
                    print(f"Error loading data for {member} in {team}: {str(e)}")
        
        return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()

    def create_analysis_directory(self):
        """Create directory structure for analysis outputs"""
        base_dir = os.path.join('analysis', self.month_year)
        os.makedirs(base_dir, exist_ok=True)
        
        for team in self.teams.keys():
            team_dir = os.path.join(base_dir, team)
            os.makedirs(team_dir, exist_ok=True)
            
            os.makedirs(os.path.join(team_dir, 'individual'), exist_ok=True)
            os.makedirs(os.path.join(team_dir, 'team'), exist_ok=True)
        
        return base_dir

    def plot_individual_workload(self, df, person, team, base_dir):
        """Create individual workload visualizations"""
        person_data = df[df['person'] == person]
        if person_data.empty:
            print(f"No data found for {person}")
            return
            
        fig = plt.figure(figsize=(15, 10))
        plt.suptitle(f"Workload Analysis for {person}", fontsize=16, y=0.95)
        
        ax1 = plt.subplot(221)
        daily_work = person_data.groupby('date')['estimated_hours'].sum()
        ax1.plot(daily_work.index, daily_work.values, marker='o')
        ax1.set_title('Daily Workload')
        ax1.set_xlabel('Date')
        ax1.set_ylabel('Hours')
        plt.xticks(rotation=45)
        
        ax2 = plt.subplot(222)
        priority_counts = person_data['priority'].value_counts()
        if not priority_counts.empty:
            ax2.pie(priority_counts.values, labels=priority_counts.index, autopct='%1.1f%%')
            ax2.set_title('Task Priority Distribution')
        
        ax3 = plt.subplot(223)
        hours_dist = pd.cut(person_data['estimated_hours'], 
                          bins=[0, 2, 4, 6, float('inf')],
                          labels=['0-2 hrs', '2-4 hrs', '4-6 hrs', '6+ hrs'])
        hours_counts = hours_dist.value_counts()
        ax3.bar(hours_counts.index, hours_counts.values)
        ax3.set_title('Task Duration Distribution')
        ax3.set_xlabel('Hours Range')
        ax3.set_ylabel('Number of Tasks')
        
        ax4 = plt.subplot(224)
        task_types = person_data['task'].apply(lambda x: x.split()[0])
        type_counts = task_types.value_counts()
        if not type_counts.empty:
            ax4.pie(type_counts.values, labels=type_counts.index, autopct='%1.1f%%')
            ax4.set_title('Task Type Distribution')
        
        plt.tight_layout()
        
        output_path = os.path.join(base_dir, team, 'individual', f"{person.replace(' ', '_')}_analysis.png")
        plt.savefig(output_path, bbox_inches='tight', dpi=300)
        plt.close()

    def plot_team_workload(self, df, team, base_dir):
        """Create team workload visualizations"""
        team_data = df[df['team'] == team]
        if team_data.empty:
            print(f"No data found for team {team}")
            return
            
        fig = plt.figure(figsize=(15, 10))
        plt.suptitle(f"Team Workload Analysis - {team}", fontsize=16, y=0.95)
        
        ax1 = plt.subplot(221)
        pivot_data = team_data.pivot_table(
            index='date',
            columns='person',
            values='estimated_hours',
            aggfunc='sum'
        ).fillna(0)
        
        for person in pivot_data.columns:
            ax1.plot(pivot_data.index, pivot_data[person], label=person.split()[-1], marker='o')
        
        ax1.set_title('Daily Workload by Team Member')
        ax1.set_xlabel('Date')
        ax1.set_ylabel('Hours')
        ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.xticks(rotation=45)
        
        ax2 = plt.subplot(222)
        priority_by_person = pd.crosstab(team_data['person'], team_data['priority'])
        priority_by_person.plot(kind='bar', stacked=True, ax=ax2)
        ax2.set_title('Priority Distribution by Team Member')
        ax2.set_xlabel('Team Member')
        ax2.set_ylabel('Number of Tasks')
        plt.xticks(rotation=45)
        
        ax3 = plt.subplot(223)
        team_data['hours_category'] = pd.cut(team_data['estimated_hours'],
                                           bins=[0, 2, 4, 6, float('inf')],
                                           labels=['0-2 hrs', '2-4 hrs', '4-6 hrs', '6+ hrs'])
        hours_by_person = pd.crosstab(team_data['person'], team_data['hours_category'])
        hours_by_person.plot(kind='bar', stacked=True, ax=ax3)
        ax3.set_title('Task Duration Distribution by Team Member')
        ax3.set_xlabel('Team Member')
        ax3.set_ylabel('Number of Tasks')
        plt.xticks(rotation=45)
        
        ax4 = plt.subplot(224)
        total_hours = team_data.groupby('person')['estimated_hours'].sum()
        ax4.pie(total_hours.values, 
                labels=[f"{name.split()[-1]}\n({hours:.1f} hrs)" 
                       for name, hours in zip(total_hours.index, total_hours.values)],
                autopct='%1.1f%%')
        ax4.set_title('Total Workload Distribution')
        
        plt.tight_layout()
        
        output_path = os.path.join(base_dir, team, 'team', f"{team}_analysis.png")
        plt.savefig(output_path, bbox_inches='tight', dpi=300)
        plt.close()

    def plot_organization_overview(self, df, base_dir):
        """Create organization-wide analysis visualizations"""
        if df.empty:
            print("No data available for organization overview")
            return
            
        fig = plt.figure(figsize=(15, 10))
        plt.suptitle("Organization-wide Workload Analysis", fontsize=16, y=0.95)
        
        ax1 = plt.subplot(221)
        team_workload = df.groupby('team')['estimated_hours'].sum()
        ax1.bar(team_workload.index, team_workload.values)
        ax1.set_title('Total Workload by Team')
        ax1.set_ylabel('Total Hours')
        plt.xticks(rotation=45)
        
        ax2 = plt.subplot(222)
        priority_by_team = pd.crosstab(df['team'], df['priority'], normalize='index') * 100
        priority_by_team.plot(kind='bar', stacked=True, ax=ax2)
        ax2.set_title('Priority Distribution by Team (%)')
        ax2.set_xlabel('Team')
        ax2.set_ylabel('Percentage of Tasks')
        plt.xticks(rotation=45)
        
        ax3 = plt.subplot(223)
        df['hours_category'] = pd.cut(df['estimated_hours'],
                                    bins=[0, 2, 4, 6, float('inf')],
                                    labels=['0-2 hrs', '2-4 hrs', '4-6 hrs', '6+ hrs'])
        hours_dist_by_team = pd.crosstab(df['team'], df['hours_category'], normalize='index') * 100
        hours_dist_by_team.plot(kind='bar', stacked=True, ax=ax3)
        ax3.set_title('Task Duration Distribution by Team (%)')
        ax3.set_xlabel('Team')
        ax3.set_ylabel('Percentage of Tasks')
        plt.xticks(rotation=45)
        
        ax4 = plt.subplot(224)
        person_workload = df.groupby('person')['estimated_hours'].sum().sort_values(ascending=False).head(10)
        ax4.bar(range(len(person_workload)), person_workload.values)
        ax4.set_title('Top 10 Individual Workloads')
        ax4.set_xlabel('Team Members')
        ax4.set_ylabel('Total Hours')
        plt.xticks(range(len(person_workload)), 
                   [name.split()[-1] for name in person_workload.index],
                   rotation=45)
        
        plt.tight_layout()
        
        output_path = os.path.join(base_dir, 'organization_overview.png')
        plt.savefig(output_path, bbox_inches='tight', dpi=300)
        plt.close()

    def generate_reports(self):
        """Generate statistical reports for all levels"""
        print("Loading data...")
        df = self.load_data()
        
        if df.empty:
            print("No data found. Please check if the task files exist.")
            return
            
        print("Creating analysis directories...")
        base_dir = self.create_analysis_directory()
        
        print("Generating individual analysis...")
        for team, members in self.teams.items():
            for member in members:
                print(f"Processing {member}...")
                self.plot_individual_workload(df, member.replace('_', ' '), team, base_dir)
        
        print("Generating team analysis...")
        for team in self.teams.keys():
            print(f"Processing team {team}...")
            self.plot_team_workload(df, team, base_dir)
        
        print("Generating organization overview...")
        self.plot_organization_overview(df, base_dir)
        
        print(f"Analysis completed. Results saved in {base_dir}")

def main():
    try:
        print("Starting workload analysis...")
        analyzer = WorkloadAnalyzer()
        analyzer.generate_reports()
        print("Analysis completed successfully!")
    except Exception as e:
        print(f"Error during analysis: {str(e)}")
        import traceback
        traceback.print_exc()
        sys.exit(1)

if __name__ == "__main__":
    main()

Starting workload analysis...
Loading data...
Creating analysis directories...
Generating individual analysis...
Processing Ahmad_Al-Rashid...
Processing Michael_Anderson...
Processing Fatima_Al-Qassim...
Processing William_Thompson...
Processing Sarah_Mitchell...
Processing Omar_Al-Sayed...
Processing Mariam_Al-Hashimi...
Processing Hassan_Al-Mahmoud...
Processing Elizabeth_Parker...
Processing James_Cooper...
Processing Noura_Al-Zahrani...
Processing Mohammed_Al-Qahtani...
Processing Katherine_Wilson...
Processing Robert_Stevens...
Processing Leila_Al-Harbi...
Processing Khalid_Al-Nasser...
Processing Jennifer_Brooks...
Processing Aisha_Al-Dubai...
Processing Thomas_Richardson...
Processing Ibrahim_Al-Kuwaiti...
Generating team analysis...
Processing team Leadershiptasks...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data['hours_category'] = pd.cut(team_data['estimated_hours'],


Processing team projectalpha...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data['hours_category'] = pd.cut(team_data['estimated_hours'],


Processing team projectbeta...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data['hours_category'] = pd.cut(team_data['estimated_hours'],


Processing team projectgamma...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data['hours_category'] = pd.cut(team_data['estimated_hours'],


Processing team operations...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data['hours_category'] = pd.cut(team_data['estimated_hours'],


Processing team technical...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_data['hours_category'] = pd.cut(team_data['estimated_hours'],


Generating organization overview...
Analysis completed. Results saved in analysis\2024_11
Analysis completed successfully!
