In [2]:
import pandas as pd

df = pd.read_json("inputOfSala.json")

# order by capacidad by asc
df = df.sort_values(by='Capacidad', ascending=True)

print(df)

    Turno Codigo  Capacidad       Campus
51     52   LFLO          8  Playa Brava
60     61    LSO          8  Playa Brava
18     19    IM4          9  Playa Brava
35     36  KAUT8         10     Kaufmann
67     68    TBD         11  Playa Brava
..    ...    ...        ...          ...
24     25  KAUS1         68     Kaufmann
7       8  CRP43         71  Playa Brava
6       7  CRP33         71  Playa Brava
14     15    IC4         77  Playa Brava
20     21     K1         82  Playa Brava

[73 rows x 4 columns]


In [5]:
def get_top_subjects(data):
    # Create a list to store all subjects
    all_subjects = []
    
    # Extract subjects from the nested structure
    for person in data:
        for subject in person['Asignaturas']:
            # Create a dictionary with the relevant information
            subject_info = {
                'CodigoAsignatura': subject['CodigoAsignatura'],
                'Nombre': subject['Nombre'],
                'Vacantes': subject['Vacantes'],
                'Nivel': subject['Nivel'],
                'Campus': subject['Campus']
            }
            all_subjects.append(subject_info)
    
    # Create DataFrame
    df = pd.DataFrame(all_subjects)
    
    # Drop duplicates based on CodigoAsignatura and Nombre
    # This prevents showing the same subject multiple times if it appears in different schedules
    df = df.drop_duplicates(subset=['CodigoAsignatura', 'Nombre'])
    
    # Sort by Vacantes in descending order and get top 15
    top_15 = df.sort_values(by='Vacantes', ascending=True).head(40)
    
    # Reset index for clean display
    top_15 = top_15.reset_index(drop=True)
    
    return top_15

import json

data = json.load(open("inputOfProfesores.json", "r", encoding="utf-8"))

# Create DataFrame and display results
top_subjects = get_top_subjects(data)  # where data is your JSON
print("\nTop 15 Subjects by Vacancies:")
print(top_subjects[['CodigoAsignatura', 'Nombre', 'Vacantes', 'Nivel', 'Campus']])


Top 15 Subjects by Vacancies:
   CodigoAsignatura                     Nombre  Vacantes  Nivel       Campus
0         (IC904-A)                 PRODUCCION         1      9  Playa Brava
1         (IC902-A)    SISTEMAS DE INFORMACIÓN         2      9  Playa Brava
2         (CCI95-A)  METODOLOGIA DE LA INVESTI         3      9  Playa Brava
3         (IEP3C-A)       SALUD OCUPACIONAL II         5      3  Playa Brava
4         (ICA94-A)  PLANIFICACION TERRITORIAL         5      9  Playa Brava
5        (INT56-FB)       INGENIERIA ECONOMICA        10      5  Playa Brava
6        (INT61-FB)        MECANICA DE FLUIDOS        10      6  Playa Brava
7         (ICE64-A)  LABORATORIO DE ELECTRÓNIC        10      6  Playa Brava
8         (IMEB9-A)  DERECHO MINERO Y AMBIENTA        10      9  Playa Brava
9         (ICE63-A)         SISTEMAS DIGITALES        10      6  Playa Brava
10        (ICA61-A)        MECANICA DE FLUIDOS        10      6  Playa Brava
11        (ICA63-A)    FENOMENOS DE TRANSPORT

In [9]:
import pandas as pd

def analyze_classroom_assignments(file_path):
    # Read the Excel file
    df = pd.read_excel(file_path)
    
    # Get total number of courses
    total_courses = len(df)
    
    # Count courses with assignments meeting expected hours (Assigned_Blocks == Expected_Hours)
    courses_fully_assigned = len(df[df['Assigned_Blocks'] == df['Expected_Hours']])
    courses_partially_assigned = len(df[df['Assigned_Blocks'] > 0 & (df['Assigned_Blocks'] < df['Expected_Hours'])])
    
    # Calculate percentages
    full_assignment_percentage = (courses_fully_assigned / total_courses) * 100
    partial_assignment_percentage = (courses_partially_assigned / total_courses) * 100
    
    # Print overall results
    print(f"\nClassroom Assignment Analysis:")
    print(f"Total number of courses: {total_courses}")
    print(f"Courses with full assignments (matching expected hours): {courses_fully_assigned}")
    print(f"Courses with partial assignments: {courses_partially_assigned}")
    print(f"Full assignment percentage: {full_assignment_percentage:.2f}%")
    print(f"Partial assignment percentage: {partial_assignment_percentage:.2f}%")
    
    # Show distribution by subject
    print("\nAssignment distribution by subject:")
    assignment_by_subject = df.groupby('Subject_Name').agg({
        'Subject_Name': 'count',  # Total courses per subject
        'Assigned_Blocks': lambda x: (x > 0).sum(),  # Courses with any assignments
        'Expected_Hours': 'sum',  # Total expected hours
        'Assigned_Blocks': 'sum'  # Total assigned blocks
    }).rename(columns={
        'Subject_Name': 'total_courses',
        'Assigned_Blocks': 'total_assigned_blocks'
    })
    
    # Calculate completion percentage for each subject
    assignment_by_subject['completion_percentage'] = (
        assignment_by_subject['total_assigned_blocks'] / 
        assignment_by_subject['Expected_Hours'] * 100
    )
    
    print(assignment_by_subject.sort_values('completion_percentage', ascending=False))
    
    # Show detailed statistics
    print("\nAssignment Statistics:")
    df['assignment_deficit'] = df['Expected_Hours'] - df['Assigned_Blocks']
    print("\nDeficit Statistics (Expected - Assigned):")
    print(df['assignment_deficit'].describe())
    
    # Count courses by assignment status
    print("\nAssignment Status Breakdown:")
    df['status'] = pd.cut(df['Assigned_Blocks']/df['Expected_Hours'] * 100,
                         bins=[-float('inf'), 0, 50, 99.9, 100, float('inf')],
                         labels=['No Assignment', 'Under 50%', '50-99%', 'Fully Assigned', 'Over-Assigned'])
    status_counts = df['status'].value_counts()
    status_percentages = status_counts / len(df) * 100
    for status, count in status_counts.items():
        print(f"{status}: {count} courses ({status_percentages[status]:.1f}%)")

# Usage
if __name__ == "__main__":
    file_path = "../schedule_analysis_validator.xlsx"
    analyze_classroom_assignments(file_path)


Classroom Assignment Analysis:
Total number of courses: 338
Courses with full assignments (matching expected hours): 305
Courses with partial assignments: 322
Full assignment percentage: 90.24%
Partial assignment percentage: 95.27%

Assignment distribution by subject:
                           total_courses  total_assigned_blocks  \
Subject_Name                                                      
ADMINISTRACION DE RIESGO               1                      3   
ARQUITECTURA Y MATERIALID              1                      3   
ANALISIS DE ALGORITMOS                 1                      4   
BIOQUIMICA Y MICROBIOLOGI              1                      5   
BALANCE DE MATERIA Y ENER              1                      3   
...                                  ...                    ...   
PREPARACION MECANICA DE M              2                      4   
ADMINISTRACION DE EMPRESA              1                      1   
CALCULO III                            1                    