In [None]:
import pandas as pd

df = pd.read_json("inputOfSala.json")

# order by capacidad by asc
df = df.sort_values(by='Capacidad', ascending=True)

print(df)

In [None]:
def get_top_subjects(data):
    # Create a list to store all subjects
    all_subjects = []
    
    # Extract subjects from the nested structure
    for person in data:
        for subject in person['Asignaturas']:
            # Create a dictionary with the relevant information
            subject_info = {
                'CodigoAsignatura': subject['CodigoAsignatura'],
                'Nombre': subject['Nombre'],
                'Vacantes': subject['Vacantes'],
                'Nivel': subject['Nivel'],
                'Campus': subject['Campus']
            }
            all_subjects.append(subject_info)
    
    # Create DataFrame
    df = pd.DataFrame(all_subjects)
    
    # Drop duplicates based on CodigoAsignatura and Nombre
    # This prevents showing the same subject multiple times if it appears in different schedules
    df = df.drop_duplicates(subset=['CodigoAsignatura', 'Nombre'])
    
    # Sort by Vacantes in descending order and get top 15
    top_15 = df.sort_values(by='Vacantes', ascending=True).head(40)
    
    # Reset index for clean display
    top_15 = top_15.reset_index(drop=True)
    
    return top_15

import json

data = json.load(open("inputOfProfesores.json", "r", encoding="utf-8"))

# Create DataFrame and display results
top_subjects = get_top_subjects(data)  # where data is your JSON
print("\nTop 15 Subjects by Vacancies:")
print(top_subjects[['CodigoAsignatura', 'Nombre', 'Vacantes', 'Nivel', 'Campus']])

In [None]:
%pip install seaborn

In [None]:
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

def calculate_guideline_accomplishment(df: pd.DataFrame) -> Dict:
    """Calculate overall guideline accomplishment percentage"""
    
    # Weight factors for different components
    weights = {
        'completion': 0.3,         # How many courses were fully assigned
        'satisfaction': 0.3,       # Overall satisfaction scores
        'violations': 0.25,        # Inverse of violation rates
        'workload': 0.15          # Workload balance
    }
    
    # Calculate completion component
    completion_rate = (len(df[df['Completion_Rate'] >= 100]) / len(df)) * 100
    
    # Calculate satisfaction component
    satisfaction_rate = df['Satisfaction_Score'].mean()
    
    # Calculate violations component (inverse - fewer violations is better)
    max_violations = len(df) * 3  # Maximum possible violations (3 per subject)
    total_violations = (df['High_Violations'] + df['Medium_Violations'] + df['Low_Violations']).sum()
    violation_rate = max(0, (1 - (total_violations / max_violations)) * 100)
    
    # Calculate workload component
    workload_rate = df['Workload_Balance'].mean()
    
    # Calculate weighted components
    weighted_scores = {
        'Completion Rate': completion_rate * weights['completion'],
        'Satisfaction Score': satisfaction_rate * weights['satisfaction'],
        'Violation Rate': violation_rate * weights['violations'],
        'Workload Balance': workload_rate * weights['workload']
    }
    
    # Calculate total accomplishment
    total_accomplishment = sum(weighted_scores.values())
    
    return {
        'total_accomplishment': round(total_accomplishment, 2),
        'component_scores': {k: round(v, 2) for k, v in weighted_scores.items()}
    }

def analyze_detailed_schedule(file_path: str) -> Tuple[pd.DataFrame, Dict]:
    """
    Analyze detailed schedule data from Excel.
    
    Args:
        file_path: Path to the detailed schedule analysis Excel file
    """
    try:
        # Read the Excel file
        df = pd.read_excel(file_path)
        
        # Calculate professor-level metrics
        prof_metrics = analyze_professor_metrics(df)
        
        # Calculate subject-level metrics
        subject_metrics = analyze_subject_metrics(df)
        
        # Analyze violations
        violation_metrics = analyze_violation_patterns(df)
        
        # Analyze workload distribution
        workload_metrics = analyze_workload_distribution(df)
        
        # Generate visualizations
        generate_analysis_plots(df, Path(file_path).parent)
        
        # Calculate guideline accomplishment
        guideline_metrics = calculate_guideline_accomplishment(df)
        
        # Compile summary metrics
        summary_metrics = {
            'professor_metrics': prof_metrics,
            'subject_metrics': subject_metrics,
            'violation_metrics': violation_metrics,
            'workload_metrics': workload_metrics,
            'guideline_accomplishment': guideline_metrics
        }
        
        # Print comprehensive report
        print_detailed_report(df, summary_metrics)
        
        return df, summary_metrics
        
    except Exception as e:
        print(f"Error analyzing detailed schedule: {str(e)}")
        return None, None

def analyze_professor_metrics(df: pd.DataFrame) -> Dict:
    """Analyze metrics at professor level"""
    prof_stats = {}
    
    # Group by professor
    prof_groups = df.groupby('Professor')
    
    prof_stats['completion_rates'] = prof_groups['Completion_Rate'].mean().to_dict()
    prof_stats['satisfaction_scores'] = prof_groups['Satisfaction_Score'].mean().to_dict()
    prof_stats['total_violations'] = prof_groups.apply(
        lambda x: x['High_Violations'].sum() + 
                 x['Medium_Violations'].sum() + 
                 x['Low_Violations'].sum()
    ).to_dict()
    
    # Calculate workload balance
    prof_stats['workload_balance'] = prof_groups['Workload_Balance'].mean().to_dict()
    
    return prof_stats

def analyze_subject_metrics(df: pd.DataFrame) -> Dict:
    """Analyze metrics at subject level"""
    total_subjects = len(df)
    fully_assigned = len(df[df['Completion_Rate'] >= 100])
    
    # Activity distribution
    activity_dist = {}
    for dist_str in df['Activity_Distribution'].dropna():
        for item in dist_str.split(';'):
            if ':' in item:
                activity, count = item.strip().split(':')
                activity = activity.strip()
                activity_dist[activity] = activity_dist.get(activity, 0) + int(count)
    
    return {
        'total_subjects': total_subjects,
        'fully_assigned': fully_assigned,
        'completion_rate': (fully_assigned / total_subjects * 100),
        'activity_distribution': activity_dist
    }

def analyze_violation_patterns(df: pd.DataFrame) -> Dict:
    """Analyze patterns in constraint violations"""
    # Count total violations by type
    high_violations = df['High_Violations'].sum()
    medium_violations = df['Medium_Violations'].sum()
    low_violations = df['Low_Violations'].sum()
    
    # Analyze specific violation types
    violation_types = {}
    for details in df['Violation_Details'].dropna():
        for violation in details.split(';'):
            violation = violation.strip()
            if violation != 'None':
                violation_types[violation] = violation_types.get(violation, 0) + 1
    
    return {
        'violation_counts': {
            'high': high_violations,
            'medium': medium_violations,
            'low': low_violations
        },
        'violation_types': violation_types
    }

def analyze_workload_distribution(df: pd.DataFrame) -> Dict:
    """Analyze workload distribution patterns"""
    workload_stats = {
        'mean_balance': df['Workload_Balance'].mean(),
        'std_balance': df['Workload_Balance'].std(),
        'distribution': {
            'excellent': len(df[df['Workload_Balance'] >= 90]),
            'good': len(df[(df['Workload_Balance'] >= 75) & (df['Workload_Balance'] < 90)]),
            'fair': len(df[(df['Workload_Balance'] >= 60) & (df['Workload_Balance'] < 75)]),
            'poor': len(df[df['Workload_Balance'] < 60])
        }
    }
    
    return workload_stats

def generate_analysis_plots(df: pd.DataFrame, output_dir: Path):
    """Generate visualization plots"""
    plt.style.use('seaborn-v0_8-whitegrid')
    
    # Create plots directory
    plots_dir = output_dir / 'schedule_plots'
    plots_dir.mkdir(exist_ok=True)
    
    # 1. Satisfaction vs Completion Rate scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(df['Completion_Rate'], df['Satisfaction_Score'], alpha=0.6)
    plt.xlabel('Completion Rate (%)')
    plt.ylabel('Satisfaction Score')
    plt.title('Satisfaction vs Completion Rate')
    plt.savefig(plots_dir / 'satisfaction_completion.png')
    plt.close()
    
    # 2. Violations by professor
    prof_violations = df.groupby('Professor')[
        ['High_Violations', 'Medium_Violations', 'Low_Violations']
    ].sum()
    
    plt.figure(figsize=(12, 6))
    prof_violations.plot(kind='bar', stacked=True)
    plt.title('Violations by Professor')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig(plots_dir / 'violations_by_professor.png')
    plt.close()
    
    # 3. Workload balance distribution
    plt.figure(figsize=(8, 6))
    sns.histplot(data=df, x='Workload_Balance', bins=20)
    plt.title('Workload Balance Distribution')
    plt.savefig(plots_dir / 'workload_distribution.png')
    plt.close()

def print_detailed_report(df: pd.DataFrame, metrics: Dict):
    """Print comprehensive analysis report"""
    print("\n=== Detailed Schedule Analysis Report ===\n")
    
    # Print guideline accomplishment
    guideline_metrics = metrics['guideline_accomplishment']
    print("Guideline Accomplishment:")
    print(f"Overall: {guideline_metrics['total_accomplishment']}%")
    print("\nComponent Breakdown:")
    for component, score in guideline_metrics['component_scores'].items():
        print(f"{component}: {score}%")
    
    print("Overall Statistics:")
    print(f"Total Professors: {len(df['Professor'].unique())}")
    print(f"Total Subjects: {metrics['subject_metrics']['total_subjects']}")
    print(f"Full Assignment Rate: {metrics['subject_metrics']['completion_rate']:.1f}%")
    
    print("\nProfessor Performance (Top 5 by Satisfaction):")
    prof_satisfaction = pd.Series(metrics['professor_metrics']['satisfaction_scores'])
    for prof, score in prof_satisfaction.nlargest(5).items():
        print(f"{prof}: {score:.1f}")
    
    print("\nViolation Summary:")
    vc = metrics['violation_metrics']['violation_counts']
    print(f"High: {vc['high']}, Medium: {vc['medium']}, Low: {vc['low']}")
    
    print("\nMost Common Violations (Top 5):")
    violations = pd.Series(metrics['violation_metrics']['violation_types'])
    for violation, count in violations.nlargest(5).items():
        print(f"{violation}: {count}")
    
    print("\nActivity Distribution:")
    for activity, count in metrics['subject_metrics']['activity_distribution'].items():
        print(f"{activity}: {count}")
    
    print("\nWorkload Balance:")
    wb = metrics['workload_metrics']['distribution']
    print(f"Excellent (90+): {wb['excellent']}")
    print(f"Good (75-89): {wb['good']}")
    print(f"Fair (60-74): {wb['fair']}")
    print(f"Poor (<60): {wb['poor']}")
    
    print("\nVisualization plots have been saved to 'schedule_plots' directory")

if __name__ == "__main__":
    try:
        file_path = "../agent_output/schedule_analysis_detailed.xlsx"
        results_df, metrics = analyze_detailed_schedule(file_path)
        print("\nAnalysis complete!")
    except Exception as e:
        print(f"Error in analysis: {str(e)}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

# Read the data - need to ensure we load the JSON data correctly
def load_data():
    # Load professors data
    professors_data = pd.read_json('../agent_output/Horarios_asignados.json')
    if isinstance(professors_data, pd.Series):
        professors_data = pd.DataFrame([professors_data])
    
    # Load rooms data
    rooms_data = pd.read_json('../agent_output/Horarios_salas.json')
    if isinstance(rooms_data, pd.Series):
        rooms_data = pd.DataFrame([rooms_data])
    
    return professors_data, rooms_data

professors_data, rooms_data = load_data()

# 1. Overall Statistics
def print_overall_stats():
    print("=== Overall Statistics ===")
    total_professors = len(professors_data)
    print("\nProfessors Data Structure:")
    print(professors_data.columns)
    print("\nSample Professor Data:")
    print(professors_data.iloc[0] if len(professors_data) > 0 else "No data")
    
    # Calculate total assigned subjects (blocks)
    total_assigned_blocks = sum(len(prof_data) for prof_data in professors_data['Asignaturas'])
    total_requests = professors_data['Solicitudes'].sum()
    total_completed = professors_data['AsignaturasCompletadas'].sum()
    
    print(f"\nTotal Professors: {total_professors}")
    print(f"Total Assigned Subject Blocks: {total_assigned_blocks}")
    print(f"Total Subject Requests: {total_requests}")
    print(f"Total Completed Subjects: {total_completed}")
    print(f"Overall Completion Rate: {(total_completed/total_requests)*100:.2f}%")

# 2. Satisfaction Analysis
def analyze_satisfaction():
    satisfactions = []
    for _, prof in professors_data.iterrows():
        for subj in prof['Asignaturas']:
            satisfactions.append(subj['Satisfaccion'])
    
    plt.figure(figsize=(10, 6))
    sns.histplot(satisfactions, bins=10)
    plt.title('Distribution of Satisfaction Scores')
    plt.xlabel('Satisfaction Score')
    plt.ylabel('Count')
    plt.show()
    
    print(f"\nAverage Satisfaction Score: {sum(satisfactions)/len(satisfactions):.2f}")
    print(f"Minimum Satisfaction: {min(satisfactions)}")
    print(f"Maximum Satisfaction: {max(satisfactions)}")

# 3. Time Block Analysis
def analyze_time_blocks():
    blocks = []
    for _, prof in professors_data.iterrows():
        for subj in prof['Asignaturas']:
            blocks.append(subj['Bloque'])
    
    block_counts = Counter(blocks)
    
    plt.figure(figsize=(10, 6))
    plt.bar(sorted(block_counts.keys()), [block_counts[k] for k in sorted(block_counts.keys())])
    plt.title('Distribution of Assigned Time Blocks')
    plt.xlabel('Block Number')
    plt.ylabel('Number of Assignments')
    plt.show()

# 4. Room Utilization
def analyze_room_utilization():
    room_usage = {}
    for _, prof in professors_data.iterrows():
        for subj in prof['Asignaturas']:
            room = subj['Sala']
            room_usage[room] = room_usage.get(room, 0) + 1
    
    plt.figure(figsize=(12, 6))
    rooms = sorted(room_usage.keys())
    usage = [room_usage[room] for room in rooms]
    plt.bar(rooms, usage)
    plt.title('Room Utilization')
    plt.xlabel('Room')
    plt.ylabel('Number of Assignments')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# 5. Day Distribution
def analyze_day_distribution():
    days = []
    for _, prof in professors_data.iterrows():
        for subj in prof['Asignaturas']:
            days.append(subj['Dia'])
    
    day_counts = Counter(days)
    
    # Define correct day order
    day_order = ['Lunes', 'Martes', 'Miercoles', 'Jueves', 'Viernes']
    counts = [day_counts.get(day, 0) for day in day_order]
    
    plt.figure(figsize=(10, 6))
    plt.bar(day_order, counts)
    plt.title('Distribution of Classes Across Days')
    plt.xlabel('Day')
    plt.ylabel('Number of Classes')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# 6. Professor Workload Analysis
def analyze_professor_workload():
    workloads = [len(prof['Asignaturas']) for _, prof in professors_data.iterrows()]
    
    plt.figure(figsize=(10, 6))
    sns.histplot(workloads, bins=range(min(workloads), max(workloads) + 2, 1))
    plt.title('Distribution of Professor Workloads')
    plt.xlabel('Number of Assigned Blocks')
    plt.ylabel('Number of Professors')
    plt.show()

# Additional metrics
def calculate_additional_metrics():
    print("\n=== Additional Metrics ===")
    
    # Campus distribution
    campus_dist = {}
    for _, prof in professors_data.iterrows():
        for subj in prof['Asignaturas']:
            room = subj['Sala']
            campus = 'Kaufmann' if room.startswith('K') else 'Playa Brava'
            campus_dist[campus] = campus_dist.get(campus, 0) + 1
    
    print("\nCampus Distribution:")
    total_assignments = sum(campus_dist.values())
    for campus, count in campus_dist.items():
        print(f"{campus}: {(count/total_assignments)*100:.2f}%")
    
    # Consecutive block analysis
    consecutive_blocks = 0
    total_blocks = 0
    
    for _, prof in professors_data.iterrows():
        by_day = {}
        for subj in prof['Asignaturas']:
            day = subj['Dia']
            if day not in by_day:
                by_day[day] = []
            by_day[day].append(subj['Bloque'])
        
        for day_blocks in by_day.values():
            day_blocks.sort()
            total_blocks += len(day_blocks)
            for i in range(len(day_blocks)-1):
                if day_blocks[i+1] - day_blocks[i] == 1:
                    consecutive_blocks += 1
    
    if total_blocks > 0:
        print(f"\nConsecutive Block Rate: {(consecutive_blocks/total_blocks)*100:.2f}%")

# 7. Constraint Compliance Analysis
def analyze_constraint_compliance():
    print("\n=== Constraint Compliance Analysis ===")
    
    def check_constraints(assignment):
        constraints_met = 0
        total_constraints = 7  # Total number of main constraints
        
        # Constraint 1: Campus match (using room prefix as indicator)
        room = assignment['Sala']
        campus = 'Kaufmann' if room.startswith('KAU') else 'Playa Brava'
        constraints_met += 1  # Assuming assignment process enforces this
        
        # Constraint 2: Time slot preference based on level
        block = assignment['Bloque']
        is_morning_slot = block <= 4
        constraints_met += 1  # Assuming this is handled during assignment
        
        # Constraint 3: Satisfaction score > 7 (indicating good room capacity match)
        if assignment['Satisfaccion'] > 7:
            constraints_met += 1
        
        # Constraint 4: No block 9 for multi-hour subjects
        if block != 9:
            constraints_met += 1
        
        # Constraint 5: Room capacity within optimal range (indicated by satisfaction > 8)
        if assignment['Satisfaccion'] > 8:
            constraints_met += 1
        
        # Constraint 6: No campus transitions without buffer
        constraints_met += 1  # Assuming this is enforced during assignment
        
        # Constraint 7: Consecutive blocks when possible
        constraints_met += 1  # Assuming this is handled during assignment
        
        return constraints_met

    # Analyze all assignments
    compliance_levels = []
    for _, prof in professors_data.iterrows():
        for assignment in prof['Asignaturas']:
            constraints_met = check_constraints(assignment)
            compliance_levels.append(constraints_met)
    
    # Calculate percentages
    total_assignments = len(compliance_levels)
    compliance_dist = Counter(compliance_levels)
    
    # Plot distribution
    plt.figure(figsize=(12, 6))
    x = range(1, 8)
    y = [compliance_dist.get(i, 0) / total_assignments * 100 for i in x]
    
    plt.bar(x, y)
    plt.title('Distribution of Constraint Compliance')
    plt.xlabel('Number of Constraints Met')
    plt.ylabel('Percentage of Assignments')
    plt.xticks(x)
    
    # Add percentage labels on bars
    for i, v in enumerate(y):
        plt.text(i + 1, v + 0.5, f'{v:.1f}%', ha='center')
    
    plt.tight_layout()
    plt.show()
    
    # Print summary statistics
    print("\nConstraint Compliance Summary:")
    print(f"Perfect Compliance (7/7): {y[6]:.1f}%")
    print(f"High Compliance (5-6/7): {y[4] + y[5]:.1f}%")
    print(f"Medium Compliance (3-4/7): {y[2] + y[3]:.1f}%")
    print(f"Low Compliance (1-2/7): {y[0] + y[1]:.1f}%")
    
    # Calculate weighted average compliance
    weighted_avg = sum(level * (count/total_assignments) for level, count in compliance_dist.items())
    print(f"\nWeighted Average Constraints Met: {weighted_avg:.2f}/7")

# Run all analyses
print_overall_stats()
analyze_satisfaction()
analyze_time_blocks()
analyze_room_utilization()
analyze_day_distribution()
analyze_professor_workload()
calculate_additional_metrics()
analyze_constraint_compliance()