In [2]:
import json
import pandas as pd
from collections import Counter

def analyze_professor_data(json_data):
    """
    Analyze professor data to extract timetabling statistics
    
    Args:
        json_data (str): JSON string containing professor data
    
    Returns:
        dict: Dictionary containing various statistics
    """
    # Parse JSON data
    try:
        data = json.loads(json_data)
    except json.JSONDecodeError:
        print("Error: Invalid JSON data")
        return {}
    
    # Initialize statistics
    stats = {
        "total_professors": len(data),
        "total_courses": 0,
        "total_blocks": 0,
        "blocks_per_professor": {},
        "courses_per_professor": {},
        "blocks_by_campus": Counter(),
        "blocks_by_activity_type": Counter(),
        "blocks_by_level": Counter(),
        "blocks_by_parallel": Counter(),
        "avg_vacancies": 0,
        "max_vacancies": 0,
        "min_vacancies": float('inf'),
    }
    
    all_vacancies = []
    courses_set = set()  # Track unique courses
    
    # Process each professor
    for professor in data:
        rut = professor["RUT"]
        name = professor["Nombre"]
        courses = professor["Asignaturas"]
        
        # Count courses for this professor
        num_courses = len(courses)
        stats["courses_per_professor"][name] = num_courses
        stats["total_courses"] += num_courses
        
        # Calculate total blocks for this professor
        total_blocks = sum(course["Horas"] for course in courses)
        stats["blocks_per_professor"][name] = total_blocks
        stats["total_blocks"] += total_blocks
        
        # Process each course
        for course in courses:
            course_code = course["CodigoAsignatura"]
            hours = course["Horas"]
            vacancies = course["Vacantes"]
            campus = course["Campus"]
            activity = course["Actividad"]
            level = course["Nivel"]
            parallel = course["Paralelo"]
            
            # Add to unique courses set
            courses_set.add(course_code)
            
            # Update statistics
            stats["blocks_by_campus"][campus] += hours
            stats["blocks_by_activity_type"][activity] += hours
            stats["blocks_by_level"][level] += hours
            stats["blocks_by_parallel"][parallel] += hours
            
            # Track vacancies
            all_vacancies.append(vacancies)
            stats["max_vacancies"] = max(stats["max_vacancies"], vacancies)
            stats["min_vacancies"] = min(stats["min_vacancies"], vacancies)
    
    # Calculate average vacancies
    if all_vacancies:
        stats["avg_vacancies"] = sum(all_vacancies) / len(all_vacancies)
    
    # Calculate unique courses
    stats["unique_courses"] = len(courses_set)
    
    # Average blocks per professor
    if stats["total_professors"] > 0:
        stats["avg_blocks_per_professor"] = stats["total_blocks"] / stats["total_professors"]
    
    return stats

def print_statistics(stats):
    """
    Print statistics in a formatted way
    
    Args:
        stats (dict): Dictionary containing statistics
    """
    print("\n===== PROFESSOR TIMETABLE ANALYSIS =====")
    print(f"Total Professors: {stats['total_professors']}")
    print(f"Total Course Assignments: {stats['total_courses']}")
    print(f"Unique Courses: {stats['unique_courses']}")
    print(f"Total Teaching Blocks: {stats['total_blocks']}")
    print(f"Average Blocks per Professor: {stats['avg_blocks_per_professor']:.2f}")
    
    print("\n----- Blocks Per Professor -----")
    for name, blocks in stats["blocks_per_professor"].items():
        print(f"{name}: {blocks} blocks")
    
    print("\n----- Courses Per Professor -----")
    for name, courses in stats["courses_per_professor"].items():
        print(f"{name}: {courses} courses")
    
    print("\n----- Blocks by Campus -----")
    for campus, blocks in stats["blocks_by_campus"].items():
        print(f"{campus}: {blocks} blocks")
    
    print("\n----- Blocks by Activity Type -----")
    for activity, blocks in stats["blocks_by_activity_type"].items():
        print(f"{activity}: {blocks} blocks")
    
    print("\n----- Blocks by Level -----")
    for level, blocks in stats["blocks_by_level"].items():
        print(f"Level {level}: {blocks} blocks")
    
    print("\n----- Vacancies -----")
    print(f"Average Vacancies per Course: {stats['avg_vacancies']:.2f}")
    print(f"Maximum Vacancies: {stats['max_vacancies']}")
    print(f"Minimum Vacancies: {stats['min_vacancies']}")

def main():
    """
    Main function to run the analysis
    """
    # Load JSON data from file
    try:
        with open(f'full/profesores.json', 'r', encoding="utf-8") as file:
            json_data = file.read()
    except FileNotFoundError:
        print("Error: 'professors.json' file not found. Using sample data instead.")
        # If file isn't found, use the sample data string instead
        json_data = ""
    # Analyze the data
    stats = analyze_professor_data(json_data)
    
    # Print the statistics
    print_statistics(stats)
    
    # Return dataframes for further analysis if needed
    professors_df = pd.DataFrame([{
        'RUT': p['RUT'],
        'Nombre': p['Nombre'],
        'Turno': p['Turno'],
        'Num_Asignaturas': len(p['Asignaturas']),
        'Total_Horas': sum(a['Horas'] for a in p['Asignaturas'])
    } for p in json.loads(json_data)])
    
    print("\n----- Professor Dataframe -----")
    print(professors_df)
    
    # Generate timetabling summary
    print("\n===== TIMETABLING REQUIREMENTS SUMMARY =====")
    print(f"Total blocks to be scheduled: {stats['total_blocks']}")
    print(f"Number of professors needing scheduling: {stats['total_professors']}")
    print(f"Number of unique courses to schedule: {stats['unique_courses']}")
    
    # Calculate workload distribution
    blocks_list = list(stats['blocks_per_professor'].values())
    if blocks_list:
        max_blocks = max(blocks_list)
        min_blocks = min(blocks_list)
        workload_range = max_blocks - min_blocks
        print(f"Workload range: {workload_range} blocks (min: {min_blocks}, max: {max_blocks})")

if __name__ == "__main__":
    main()


===== PROFESSOR TIMETABLE ANALYSIS =====
Total Professors: 153
Total Course Assignments: 505
Unique Courses: 294
Total Teaching Blocks: 2258
Average Blocks per Professor: 14.76

----- Blocks Per Professor -----
ROJAS RODRIGUEZ RO: 12 blocks
PADILLA GUZMAN SIL: 26 blocks
OBILINOVIC GONZÁLE: 14 blocks
YÁÑEZ MUÑOZ RICARD: 14 blocks
AZÓCAR AZÓCAR PABL: 15 blocks
LUZA CONTRERAS JUA: 48 blocks
VILCA ASTORGA BELF: 36 blocks
TAPIA QUEZADA JAIM: 5 blocks
DIAZ MIRANDA MARCO: 6 blocks
CORTES CAMPOS EMEL: 3 blocks
MEZA CÁRDENAS ERIK: 25 blocks
LAM MORAGA JAIME A: 36 blocks
WONG BARREDA EDUAR: 16 blocks
ROJAS GONZALEZ MAR: 12 blocks
LÓPEZ SEGUEL JULIO: 9 blocks
URIBE PALACIOS MAR: 12 blocks
SALAZAR PEREZ INGR: 12 blocks
MASANA SEPULVEDA P: 12 blocks
CARMONA MALBRAN RA: 3 blocks
CHANDIA CONCHA HEC: 24 blocks
CARREÑO PASTRIAN E: 11 blocks
CONTRERAS AGUILAR: 8 blocks
RAMÍREZ CASTILLO W: 24 blocks
ARAYA TORRES CARLO: 12 blocks
RANGEL MORALES JUA: 5 blocks
SEGOVIA RIVERA JUA: 9 blocks
VALDIVIESO VEGA C

In [1]:
%pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl

   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openp