In [6]:
import pandas as pd
import plotly.express as px  # if needed for further plotting
import re
import numpy as np


def process_resource_room(filepath):
    """
    Processes the resource room Excel file and returns a cleaned and categorized DataFrame.

    Parameters:
        filepath (str): The path to the Excel file (e.g., '/content/CSD - Resource Room.xlsx').

    Returns:
        pd.DataFrame: The processed DataFrame with an added 'Type' column.
    """
    # Read the Excel file
    df = pd.read_excel(filepath)
    Resource_Room = pd.DataFrame(df)
    
    # Remove unwanted columns
    Resource_Room = Resource_Room.drop(labels='Campus', axis=1)
    Resource_Room = Resource_Room.drop(labels='Workshop', axis=1)
    
    # Exclude rows containing 'IMus'
    Resource_Room = Resource_Room[
        ~Resource_Room.apply(lambda row: row.astype(str).str.contains('IMus', case=False, na=False).any(), axis=1)
    ]
    
    # Exclude rows where Lecture, Tutorial, Lab are all 'N'
    Resource_Room = Resource_Room[~(Resource_Room[['Lecture', 'Tutorial', 'Lab']].eq('N').all(axis=1))]
    
    # Fill NA values with 'N' and replace 'N' in Capacity with '20'
    Resource_Room = Resource_Room.fillna('N')
    Resource_Room['Capacity'] = Resource_Room['Capacity'].replace('N', '20')
    
    # Drop rows with Capacity equal to 0
    Resource_Room = Resource_Room.drop(Resource_Room[Resource_Room['Capacity'] == 0].index)
    
    # Categorize as General where Lecture, Tutorial, Lab are all 'Y'
    conditions = [['Y', 'Y', 'Y']]
    columns_to_check = ['Lecture', 'Tutorial', 'Lab']
    General = Resource_Room[
        Resource_Room[columns_to_check].apply(tuple, axis=1).isin(map(tuple, conditions))
    ]
    
    # Exclude specific conditions to form etc_Room (not strictly needed for exclusion logic in this snippet)
    conditions_to_exclude = [['Y', 'Y', 'N'], ['N', 'Y', 'Y']]
    etc_Room = Resource_Room[
        ~Resource_Room[columns_to_check].apply(tuple, axis=1).isin(map(tuple, conditions))
    ]
    
    # Identify Kitchen and Design related rooms
    Kitchen = etc_Room[
        etc_Room.apply(lambda row: row.astype(str).str.contains('Kitchen', regex=False).any(), axis=1)
    ]
    Design = etc_Room[
        etc_Room['Description'].str.contains('DESIGN', case=False, na=False)
    ]
    df_combined = pd.concat([Kitchen, Design])
    
    # Get remaining rows excluding those in df_combined
    LL = etc_Room[
        ~etc_Room.apply(tuple, axis=1).isin(df_combined.apply(tuple, axis=1))
    ]
    
    # Categorize Lab rooms: where [Lecture, Tutorial, Lab] == ['N', 'N', 'Y']
    lab_condition = [['N', 'N', 'Y']]
    Lab = LL[
        LL[columns_to_check].apply(tuple, axis=1).isin(map(tuple, lab_condition)) &
        ~LL.apply(tuple, axis=1).isin(df_combined.apply(tuple, axis=1))
    ]
    
    # Categorize Lecture rooms: where [Lecture, Tutorial, Lab] == ['Y', 'N', 'N']
    lecture_condition = [['Y', 'N', 'N']]
    Lecture = LL[
        LL[columns_to_check].apply(tuple, axis=1).isin(map(tuple, lecture_condition)) &
        ~LL.apply(tuple, axis=1).isin(df_combined.apply(tuple, axis=1))
    ]
    
    # Update df_combined with Lab and Lecture, then get ETC as remaining rows
    df_combined = pd.concat([Lab, Lecture])
    ETC = LL[~LL.apply(tuple, axis=1).isin(df_combined.apply(tuple, axis=1))]
    
    # Assign types
    General['Type'] = 'General'
    Lab['Type'] = 'Lab'
    Kitchen['Type'] = 'Kitchen'
    Design['Type'] = 'Art'
    Lecture['Type'] = 'Lecture'
    ETC['Type'] = 'Etc'
    
    # Combine all categorized DataFrames
    df_final = pd.concat([General, ETC, Kitchen, Design, Lab, Lecture], ignore_index=True)
    df_final = df_final.drop_duplicates(keep='first')
    
    # Override type to 'PBL' if any row contains 'PBL'
    df_final.loc[
        df_final.astype(str).apply(lambda row: row.str.contains('PBL', case=False, na=False).any(), axis=1),
        'Type'
    ] = 'PBL'
    
    return df_final

# Example usage:
filepath = "CSD - Resource Room (5).xlsx"
abc = process_resource_room(filepath)
abc


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  General['Type'] = 'General'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Lab['Type'] = 'Lab'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Kitchen['Type'] = 'Kitchen'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See t

Unnamed: 0,Resource Code,Description,Capacity,Lecture,Tutorial,Lab,Resource Status,Type
0,MPH - GBS,MPH - [MULTIPURPOSE HALL],100.0,Y,Y,Y,Active,General
1,C404,C404 - [C404],20.0,Y,Y,Y,Active,General
2,C3M04b,C3M04 - [C3M04b-SABE Computer Lab],30.0,Y,Y,Y,Active,General
3,C3M05,C3M05 - [C3M05-Computer Lab],30.0,Y,Y,Y,Active,General
4,C314,C314 - [Microteaching Lab],30.0,Y,Y,Y,Active,General
...,...,...,...,...,...,...,...,...
180,CHEM1,Chemistry 1 Lab,48.0,N,N,Y,Active,Lab
181,CHEM2,Chemistry 2 Lab,48.0,N,N,Y,Active,Lab
182,PHYTO,Phytochemistry Lab,20.0,N,N,Y,Active,Lab
183,Civil Workshop,"Civil Workshop, Level 4 Block E",30.0,N,N,Y,Active,Lab


In [None]:
import pandas as pd
import plotly.express as px  # if needed for further plotting
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import linear_sum_assignment


def process_resource_room(filepath):
    """
    Processes the resource room Excel file and returns a cleaned and categorized DataFrame.

    Parameters:
        filepath (str): The path to the Excel file (e.g., '/content/CSD - Resource Room.xlsx').

    Returns:
        pd.DataFrame: The processed DataFrame with an added 'Type' column.
    """
    # Read the Excel file
    df = pd.read_excel(filepath)
    Resource_Room = pd.DataFrame(df)
    
    # Remove unwanted columns
    Resource_Room = Resource_Room.drop(labels='Campus', axis=1)
    Resource_Room = Resource_Room.drop(labels='Workshop', axis=1)
    
    # Exclude rows containing 'IMus'
    Resource_Room = Resource_Room[
        ~Resource_Room.apply(lambda row: row.astype(str).str.contains('IMus', case=False, na=False).any(), axis=1)
    ]
    
    # Exclude rows where Lecture, Tutorial, Lab are all 'N'
    Resource_Room = Resource_Room[~(Resource_Room[['Lecture', 'Tutorial', 'Lab']].eq('N').all(axis=1))]
    
    # Fill NA values with 'N' and replace 'N' in Capacity with '20'
    Resource_Room = Resource_Room.fillna('N')
    Resource_Room['Capacity'] = Resource_Room['Capacity'].replace('N', '20')
    
    # Drop rows with Capacity equal to 0
    Resource_Room = Resource_Room.drop(Resource_Room[Resource_Room['Capacity'] == 0].index)
    
    # Categorize as General where Lecture, Tutorial, Lab are all 'Y'
    conditions = [['Y', 'Y', 'Y']]
    columns_to_check = ['Lecture', 'Tutorial', 'Lab']
    General = Resource_Room[
        Resource_Room[columns_to_check].apply(tuple, axis=1).isin(map(tuple, conditions))
    ]
    
    # Exclude specific conditions to form etc_Room (not strictly needed for exclusion logic in this snippet)
    conditions_to_exclude = [['Y', 'Y', 'N'], ['N', 'Y', 'Y']]
    etc_Room = Resource_Room[
        ~Resource_Room[columns_to_check].apply(tuple, axis=1).isin(map(tuple, conditions))
    ]
    
    # Identify Kitchen and Design related rooms
    Kitchen = etc_Room[
        etc_Room.apply(lambda row: row.astype(str).str.contains('Kitchen', regex=False).any(), axis=1)
    ]
    Design = etc_Room[
        etc_Room['Description'].str.contains('DESIGN', case=False, na=False)
    ]
    df_combined = pd.concat([Kitchen, Design])
    
    # Get remaining rows excluding those in df_combined
    LL = etc_Room[
        ~etc_Room.apply(tuple, axis=1).isin(df_combined.apply(tuple, axis=1))
    ]
    
    # Categorize Lab rooms: where [Lecture, Tutorial, Lab] == ['N', 'N', 'Y']
    lab_condition = [['N', 'N', 'Y']]
    Lab = LL[
        LL[columns_to_check].apply(tuple, axis=1).isin(map(tuple, lab_condition)) &
        ~LL.apply(tuple, axis=1).isin(df_combined.apply(tuple, axis=1))
    ]
    
    # Categorize Lecture rooms: where [Lecture, Tutorial, Lab] == ['Y', 'N', 'N']
    lecture_condition = [['Y', 'N', 'N']]
    Lecture = LL[
        LL[columns_to_check].apply(tuple, axis=1).isin(map(tuple, lecture_condition)) &
        ~LL.apply(tuple, axis=1).isin(df_combined.apply(tuple, axis=1))
    ]
    
    # Update df_combined with Lab and Lecture, then get ETC as remaining rows
    df_combined = pd.concat([Lab, Lecture])
    ETC = LL[~LL.apply(tuple, axis=1).isin(df_combined.apply(tuple, axis=1))]
    
    # Assign types
    General['Type'] = 'General'
    Lab['Type'] = 'Lab'
    Kitchen['Type'] = 'Kitchen'
    Design['Type'] = 'Art'
    Lecture['Type'] = 'Lecture'
    ETC['Type'] = 'Etc'
    
    # Combine all categorized DataFrames
    df_final = pd.concat([General, ETC, Kitchen, Design, Lab, Lecture], ignore_index=True)
    df_final = df_final.drop_duplicates(keep='first')
    
    # Override type to 'PBL' if any row contains 'PBL'
    df_final.loc[
        df_final.astype(str).apply(lambda row: row.str.contains('PBL', case=False, na=False).any(), axis=1),
        'Type'
    ] = 'PBL'
    
    return df_final


def preprocess_offer(file_path):
    # ÏóëÏÖÄ ÌååÏùºÏùÑ DataFrameÏúºÎ°ú Î∂àÎü¨ÏòµÎãàÎã§.
    offer = pd.read_excel(file_path)

    offer.drop(columns=['Unnamed: 0'], inplace=True)
    offer.drop(offer.index[1157], axis=0, inplace=True)
    offer['Capacity'] = offer['Capacity'].astype(int)
    offer['Min Per Session'] = offer['Min Per Session'].astype(int)

    offer['Session'] = offer['Session'].str.upper()
    offer['Lecturer'] = offer['Lecturer'].str.upper()
    offer['CourseCode'] = offer['CourseCode'].str.upper()
    offer['FacultyCode'] = offer['FacultyCode'].str.upper()

    offer['Session'] = offer['Session'].str.replace(r'\(S\)', '', regex=True)

    # "COMBINED TO /"Î•º Î≥µÏÇ¨ÌïòÏó¨ error ÌÖåÏù¥Î∏îÏóê Ï†ÄÏû•. Ïù¥ÌõÑ ÏõêÎ≥∏ Îç∞Ïù¥ÌÑ∞ÏóêÏÑú ÏÇ≠Ï†ú.
    mask = offer['Session'].str.contains("COMBINED TO /", na=False)

    # error Îç∞Ïù¥ÌÑ∞ÌîÑÎ†àÏûÑÏóê Ìï¥Îãπ ÌñâÎì§ÏùÑ Î≥µÏÇ¨ÌïòÏó¨ Ï†ÄÏû•Ìï©ÎãàÎã§.
    error = offer[mask].copy()

    # ÏõêÎ≥∏ offer Îç∞Ïù¥ÌÑ∞ÌîÑÎ†àÏûÑÏóêÏÑú Ìï¥Îãπ ÌñâÎì§ÏùÑ ÏÇ≠Ï†úÌï©ÎãàÎã§.
    offer.drop(offer[mask].index, inplace=True)

    #"GROUP A", "GROUP B", "GROUP C" Îì± Ìï¥ÎãπÌïòÎäî ÌñâÏùÑ Ï∞æÍ≥†, Session Í∞íÏùÑ "LECTURE"Î°ú Î≥ÄÍ≤Ω
    offer.loc[offer['Session'].str.contains(r'^GROUP\s+[A-Z]$', na=False), 'Session'] = 'LECTURE'

    group_cols = ['CourseCode', 'FacultyCode', 'Session', 'Capacity', 'Min Per Session', 'Lecturer']

    # Í∑∏Î£πÎ≥ÑÎ°ú Ï§ëÎ≥µÎêú ÌñâÏùÑ Ï≤òÎ¶¨Ìï©ÎãàÎã§.
    for _, group in offer.groupby(group_cols):
        if len(group) > 1:  # Ï§ëÎ≥µ Í∑∏Î£πÏù∏ Í≤ΩÏö∞
            # ÏõêÎûò Session Í∞íÏùÑ ÏïûÎí§ Í≥µÎ∞± Ï†úÍ±∞ ÌõÑ ÏÇ¨Ïö©Ìï©ÎãàÎã§.
            orig_session = group.iloc[0]['Session'].strip()
            # Session Í∞íÏù¥ Ïà´ÏûêÎ°ú ÎÅùÎÇòÎäîÏßÄ Í≤ÄÏÇ¨ (Ïòà: "TUTORIAL 1")
            m = re.search(r'^(.*?)(\d+)$', orig_session)
            if m:
                base = m.group(1).strip()  # Î¨∏Ïûê Î∂ÄÎ∂Ñ (Ïòà: "TUTORIAL")
                start_num = int(m.group(2))  # Í∏∞Ï°¥Ïóê Î∂ôÏùÄ Ïà´Ïûê (Ïòà: 1)
            else:
                base = orig_session
                start_num = 1

            # Í∑∏Î£π ÎÇ¥ Í∞Å ÌñâÏóê ÎåÄÌï¥ ÏàúÏ∞®Ï†ÅÏúºÎ°ú Î≤àÌò∏Î•º Î∂ôÏó¨ Session Í∞íÏùÑ Î≥ÄÍ≤ΩÌï©ÎãàÎã§.
            for offset, idx in enumerate(group.index):
                new_session = f"{base} {start_num + offset}"
                offer.at[idx, 'Session'] = new_session

    # (Ï¥àÍ∏∞ Ï†ÑÏ†ú: offer DataFrameÍ≥º error DataFrameÏù¥ Ï°¥Ïû¨Ìï®; errorÍ∞Ä ÏóÜÏúºÎ©¥ Îπà DataFrame ÏÉùÏÑ±)
    if 'error' not in globals():
        error = pd.DataFrame()

    ### Step 1. offerÏóêÏÑú "COMBINED TO" Ìñâ Ï∂îÏ∂ú ‚Üí child_classÎ°ú Ï†ÄÏû•, offerÏóêÏÑúÎäî Ï†úÍ±∞
    child_class = offer[offer['Session'].str.contains('COMBINED TO', na=False)].copy()
    offer.drop(child_class.index, inplace=True)

    ### Step 2. child_classÏùò Session Ïó¥ÏóêÏÑú Í≥ºÎ™©ÏΩîÎìú Ï∂îÏ∂ú
    child_class['extracted_code'] = child_class['Session'].str.extract(r'COMBINED TO\s+([A-Z0-9]+)', expand=False)

    ### Step 3. child_class ÎÇ¥ÏóêÏÑú Ïò§Î•ò Î∞è Ï†ïÏÉÅ/ÌõÑÏÜê Î∂ÑÎ¶¨

    # (A) "ÏûêÍ∏∞ ÏûêÏã† Ï∞∏Ï°∞" Ï°∞Í±¥: Ï∂îÏ∂úÌïú ÏΩîÎìúÍ∞Ä Ìï¥Îãπ ÌñâÏùò CourseCodeÏôÄ Í∞ôÏúºÎ©¥ ‚Üí Ïò§Î•ò
    self_ref = child_class['extracted_code'] == child_class['CourseCode']

    # (B) Ï†ïÏÉÅ(child) Ï°∞Í±¥: ÏûêÍ∏∞ ÏûêÏã† Ï∞∏Ï°∞Í∞Ä ÏïÑÎãàÍ≥†, Ï∂îÏ∂úÌïú ÏΩîÎìúÍ∞Ä offerÏùò CourseCodeÏóê Ï°¥Ïû¨ÌïòÎäî Í≤ΩÏö∞
    valid_child_mask = (~self_ref) & (child_class['extracted_code'].isin(offer['CourseCode']))

    # (C) ÌõÑÏÜê(grandchild) ÌõÑÎ≥¥: ÏûêÍ∏∞ ÏûêÏã† Ï∞∏Ï°∞Í∞Ä ÏïÑÎãàÍ≥†, Ï∂îÏ∂úÌïú ÏΩîÎìúÍ∞Ä offerÏóêÎäî ÏóÜÏßÄÎßå, child_classÏùò CourseCodeÏóêÎäî Ï°¥Ïû¨ÌïòÎäî Í≤ΩÏö∞  
    #     (Ï¶â, Î∂ÄÎ™®(ÎåÄÏÉÅ CourseCode)Í∞Ä offerÏóê ÏóÜÏúºÎØÄÎ°ú ÏûêÏãùÎÅºÎ¶¨ Ïó∞Í≤∞ÎêòÏñ¥ ÏûàÏùåÏùÑ ÏùòÎØ∏)
    valid_grand_mask = (~self_ref) & (~child_class['extracted_code'].isin(offer['CourseCode'])) & \
                        (child_class['extracted_code'].isin(child_class['CourseCode']))

    # (D) Ïò§Î•ò Ï°∞Í±¥:  
    #     - ÏûêÍ∏∞ ÏûêÏã† Ï∞∏Ï°∞Ïù∏ Í≤ΩÏö∞  
    #     - ÎòêÎäî Ï∂îÏ∂úÌïú ÏΩîÎìúÍ∞Ä offerÏóêÎèÑ child_classÏóêÎèÑ Ï°¥Ïû¨ÌïòÏßÄ ÏïäÎäî Í≤ΩÏö∞
    error_mask = self_ref | ((~child_class['extracted_code'].isin(offer['CourseCode'])) & 
                               (~child_class['extracted_code'].isin(child_class['CourseCode'])))

    # child_class Ïò§Î•ò Ï≤òÎ¶¨: Ï∂îÏ∂ú
    child_error = child_class[error_mask].copy()

    # Ï†ïÏÉÅ child_class (valid_child) Ï∂îÏ∂ú
    valid_child = child_class[valid_child_mask].copy()

    # ÌõÑÎ≥¥ ÌõÑÏÜê(grandchild) Ï∂îÏ∂ú
    candidate_grand = child_class[valid_grand_mask].copy()

    ### Step 4. grand_child_class ÎÇ¥ÏóêÏÑú Ï∂îÍ∞Ä Ïò§Î•ò Ï≤òÎ¶¨
    # Ïò§Î•ò Ï°∞Í±¥ (grandchild): ÏûêÍ∏∞ ÏûêÏã† Ï∞∏Ï°∞ ‚Üí Ïò§Î•ò
    grand_self_ref = candidate_grand['extracted_code'] == candidate_grand['CourseCode']
    error_grand = candidate_grand[grand_self_ref].copy()

    # ÏµúÏ¢Ö grand_child_class: ÌõÑÎ≥¥ÏóêÏÑú ÏûêÍ∏∞ ÏûêÏã† Ï∞∏Ï°∞ Ïò§Î•ò Ï†úÍ±∞
    grand_child_class = candidate_grand[~grand_self_ref].copy()

    # ÏµúÏ¢Ö error: Í∏∞Ï°¥ child Ïò§Î•òÏôÄ ÌõÑÏÜêÏóêÏÑú Î∞úÏÉùÌïú Ïò§Î•òÎ•º Î™®Îëê ÎàÑÏ†Å
    error_df = pd.concat([child_error, error_grand], ignore_index=True)
    error = pd.concat([error, error_df], ignore_index=True)

    ### ÏµúÏ¢Ö Ï†ïÏÉÅ Îç∞Ïù¥ÌÑ∞
    # child_classÎäî valid_childÎ°ú ÏóÖÎç∞Ïù¥Ìä∏ (offerÏùò Î∂ÄÎ™®Í∞Ä Ï°¥Ïû¨ÌïòÎäî Í≤ΩÏö∞)
    child_class = valid_child.copy()
    # grand_child_classÎäî ÏúÑÏóêÏÑú Íµ¨Ìïú ÎåÄÎ°ú

    # error DataFrameÏù¥ ÏóÜÏúºÎ©¥ Îπà DataFrame ÏÉùÏÑ±
    if 'error' not in globals():
        error = pd.DataFrame()

    ##############################################
    # ÍµêÏ∞®Í≤ÄÏ¶ù 1: child_classÏóêÏÑú ÏûêÍ∏∞ ÏûêÏã† Ï∞∏Ï°∞ÌïòÎäî Ìñâ Ï≤òÎ¶¨
    ##############################################
    child_self_ref_mask = (child_class['extracted_code'] == child_class['CourseCode'])
    child_self_ref_errors = child_class[child_self_ref_mask].copy()
    error = pd.concat([error, child_self_ref_errors], ignore_index=True)
    child_class = child_class[~child_self_ref_mask].copy()

    ##############################################
    # ÍµêÏ∞®Í≤ÄÏ¶ù 2: child_classÏóêÏÑú Ï∂îÏ∂úÌïú Í≥ºÎ™©ÏΩîÎìúÍ∞Ä offerÏùò CourseCodeÏóê ÏóÜÏùå(Î∂ÄÎ™® ÏóÜÏùå)
    ##############################################
    child_missing_parent_mask = ~child_class['extracted_code'].isin(offer['CourseCode'])
    child_missing_parent_errors = child_class[child_missing_parent_mask].copy()
    error = pd.concat([error, child_missing_parent_errors], ignore_index=True)
    child_class = child_class[~child_missing_parent_mask].copy()

    ##############################################
    # ÍµêÏ∞®Í≤ÄÏ¶ù 3: offerÏóêÏÑú ÏûêÍ∏∞ ÏûêÏã† Ï∞∏Ï°∞ÌïòÎäî Ìñâ Ï≤òÎ¶¨
    # (offerÏóê COMBINED TOÍ∞Ä ÎÇ®ÏïÑÏûàÎã§Î©¥ Ìï¥Îãπ ÌñâÏóê ÎåÄÌï¥ extracted_codeÎ•º Ï∂îÏ∂ú)
    ##############################################
    offer_combined_mask = offer['Session'].str.contains('COMBINED TO', na=False)
    if offer_combined_mask.any():
        offer.loc[offer_combined_mask, 'extracted_code'] = offer.loc[offer_combined_mask, 'Session'].str.extract(r'COMBINED TO\s+([A-Z0-9]+)', expand=False)
        offer_self_ref_mask = (offer['extracted_code'] == offer['CourseCode'])
        offer_self_ref_errors = offer[offer_self_ref_mask].copy()
        error = pd.concat([error, offer_self_ref_errors], ignore_index=True)
        offer.drop(offer[offer_self_ref_mask].index, inplace=True)

    ##############################################
    # ÍµêÏ∞®Í≤ÄÏ¶ù 4: grand_child_classÏóêÏÑú ÏûêÍ∏∞ ÏûêÏã† Ï∞∏Ï°∞ÌïòÎäî Ìñâ Ï≤òÎ¶¨
    ##############################################
    grand_self_ref_mask = (grand_child_class['extracted_code'] == grand_child_class['CourseCode'])
    grand_self_ref_errors = grand_child_class[grand_self_ref_mask].copy()
    error = pd.concat([error, grand_self_ref_errors], ignore_index=True)
    grand_child_class = grand_child_class[~grand_self_ref_mask].copy()

    ##############################################
    # ÍµêÏ∞®Í≤ÄÏ¶ù 5: grand_child_classÏóêÏÑú Ï∂îÏ∂úÌïú Í≥ºÎ™©ÏΩîÎìúÍ∞Ä offerÏùò CourseCodeÏóê ÏóÜÏùå(Î∂ÄÎ™® ÏóÜÏùå)
    ##############################################
    grand_missing_parent_mask = ~grand_child_class['extracted_code'].isin(offer['CourseCode'])
    grand_missing_parent_errors = grand_child_class[grand_missing_parent_mask].copy()
    error = pd.concat([error, grand_missing_parent_errors], ignore_index=True)
    grand_child_class = grand_child_class[~grand_missing_parent_mask].copy()

    ##############################################
    # ÍµêÏ∞®Í≤ÄÏ¶ù 6: Î∂ÄÎ™®(offer ÎòêÎäî child_class)ÏóêÏÑú ÎìúÎûçÎêú ÌñâÍ≥º Ïó∞Í≤∞Îêú ÏûêÏãù(ÎòêÎäî ÏÜêÏûê) Ìñâ Ï≤òÎ¶¨
    # ÎìúÎûçÎêú ÌñâÎì§Ïùò CourseCodeÎ•º Î™®ÏïÑÏÑú, Ïù¥ ÏΩîÎìúÎ•º extracted_codeÎ°ú ÏÇ¨Ïö©ÌïòÎäî ÌñâÏù¥ ÏûàÎã§Î©¥ errorÎ°ú Ï≤òÎ¶¨
    ##############################################
    # ÏßÄÍ∏àÍπåÏßÄ errorÏóê Ï∂îÍ∞ÄÎêú ÌñâÎì§Ïùò CourseCodeÎ•º Î™®ÏùçÎãàÎã§.
    dropped_codes = set(error['CourseCode'].unique())

    # child_classÏóêÏÑú Ïó∞Í≤∞Îêú ÏûêÏãù Ìñâ Í≤ÄÏÇ¨
    child_linked_mask = child_class['extracted_code'].isin(dropped_codes)
    child_linked_errors = child_class[child_linked_mask].copy()
    error = pd.concat([error, child_linked_errors], ignore_index=True)
    child_class = child_class[~child_linked_mask].copy()

    # grand_child_classÏóêÏÑú Ïó∞Í≤∞Îêú ÏÜêÏûê Ìñâ Í≤ÄÏÇ¨
    grand_linked_mask = grand_child_class['extracted_code'].isin(dropped_codes)
    grand_linked_errors = grand_child_class[grand_linked_mask].copy()
    error = pd.concat([error, grand_linked_errors], ignore_index=True)
    grand_child_class = grand_child_class[~grand_linked_mask].copy()

    # child_classÏùò Í∞Å ÌñâÏùÑ ÏàúÌöåÌïòÎ©∞ Ï°∞Í±¥Ïóê ÎßûÎäî offer ÌñâÏùò CapacityÏóê child_classÏùò CapacityÎ•º Ìï©ÏÇ∞Ìï©ÎãàÎã§.
    for idx, child_row in child_class.iterrows():
        # Ï°∞Í±¥: offerÏùò CourseCode, Min Per Session, LecturerÍ∞Ä child_rowÏùò extracted_code, Min Per Session, LecturerÏôÄ ÏùºÏπò
        mask = (
            (offer['CourseCode'] == child_row['extracted_code']) &
            (offer['Min Per Session'] == child_row['Min Per Session']) &
            (offer['Lecturer'] == child_row['Lecturer'])
        )
        # ÏùºÏπòÌïòÎäî ÌñâÏù¥ ÏûàÎã§Î©¥, offerÏùò CapacityÏóê child_rowÏùò CapacityÎ•º ÎçîÌï®
        if mask.any():
            offer.loc[mask, 'Capacity'] += child_row['Capacity']

    # Í∏∞Ï°¥ Ï°∞Í±¥
    conditions = [
        offer['Session'].str.contains('LECTURE & TUTORIAL', na=False, case=False),
        offer['Session'].str.contains('TUTORIAL', na=False, case=False),
        offer['Session'].str.contains('LECTURE', na=False, case=False),
        offer['Session'].str.contains('LAB', na=False, case=False),
        offer['Session'].str.contains('PBL 1', na=False, case=False),
        offer['Session'].str.contains('HAND DRAWING|CAD DRAWING 2|CAD DRAWING 1', na=False, case=False),
        offer['Session'].str.contains('KITCHEN', na=False, case=False),
        offer['Session'].str.contains('OPTOM CLINIC|SOO - EXAM CLINIC', na=False, case=False)
    ]

    # Í∏∞Ï°¥ ÏÑ†ÌÉùÏßÄ + ÏÉà ÏÑ†ÌÉùÏßÄ
    choices = [
        'GENERAL',
        'TUTORIAL',
        'LECTURE',
        'LAB',
        'PBL',
        'ART',
        'KITCHEN',
        'LECTURE'
    ]

    # Ï°∞Í±¥ÏùÑ Ï†ÅÏö©ÌïòÏó¨ 'Category' Ïó¥ ÏÉùÏÑ±
    offer['Category'] = np.select(conditions, choices, default=np.nan)
    
    return offer


def expand_room_slots(df_final):
    time_slots = ['8-11', '13-16', '18-21']
    days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri']

    expanded_rooms = []
    for _, row in df_final.iterrows():
        for day in days:
            for time in time_slots:
                expanded_rooms.append({
                    'Resource Code': row['Resource Code'],
                    'Type': row['Type'],
                    'Capacity': row['Capacity'],
                    'Assigned Day': day,
                    'Assigned Time Slot': time,
                    'Lecture': row.get('Lecture', 'N'),
                    'Tutorial': row.get('Tutorial', 'N'),
                    'Lab': row.get('Lab', 'N')
                })

    return pd.DataFrame(expanded_rooms)

def preprocess_offers(offer, max_room_capacity):
    expanded_offers = []
    for _, row in offer.iterrows():
        remaining_students = row['Capacity']
        total_duration = row['Min Per Session']
        session_count = 1

        while remaining_students > 0:
            assigned_capacity = min(max_room_capacity, remaining_students)
            remaining_students -= assigned_capacity

            remaining_duration = total_duration
            sub_session = 1

            while remaining_duration > 0:
                session_duration = min(180, remaining_duration)
                remaining_duration -= session_duration

                expanded_offers.append({
                    'Course Code': f"{row['CourseCode']}_S{session_count}_D{sub_session}",
                    'Capacity': assigned_capacity,
                    'Min Per Session': session_duration,
                    'Category': row['Category'],
                    'Lecturer': row['Lecturer']
                })
                sub_session += 1

            session_count += 1

    return pd.DataFrame(expanded_offers)

def match_classroom_type(df_final, offer):
    offer['Category'] = offer['Category'].str.lower()
    df_final['Type'] = df_final['Type'].str.lower()
    df_final['Type'] = df_final.apply(lambda x: 'tutorial' if x['Type'] == 'etc' else x['Type'], axis=1)
    return df_final, offer

def create_cost_matrix(df_final, offer):
    num_offers = len(offer)
    num_rooms = len(df_final)
    max_size = max(num_offers, num_rooms)
    cost_matrix = np.full((max_size, max_size), 1e6)

    df_final['Capacity'] = df_final['Capacity'].astype(int)
    offer['Capacity'] = offer['Capacity'].astype(int)

    # üîπ ÍµêÏàòÎãòÏù¥ ÌäπÏ†ï ÏãúÍ∞ÑÏóê Ï§ëÎ≥µÎêòÏßÄ ÏïäÎèÑÎ°ù ÌïòÍ∏∞ ÏúÑÌïú Îß§Ìïë
    professor_time_map = {}

    for i, offer_row in offer.iterrows():
        for j, room_row in df_final.iterrows():
            assigned_time = (room_row['Assigned Day'], room_row['Assigned Time Slot'])
            professor = offer_row['Lecturer']

            if (offer_row['Category'] == room_row['Type'] or
                (offer_row['Category'] in ['lecture', 'tutorial'] and room_row['Type'] in ['general', 'lecture']) or
                (offer_row['Category'] == 'lecture' and room_row['Type'] == 'tutorial' and room_row['Lecture'] == 'Y')):

                if room_row['Capacity'] >= offer_row['Capacity']:
                    # üîπ Í∞ôÏùÄ ÏãúÍ∞ÑÎåÄÏóê Ïù¥ÎØ∏ Î∞∞Ï†ïÎêú ÍµêÏàòÎãòÏù¥ ÏûàÎäî Í≤ΩÏö∞ Ìå®ÎÑêÌã∞Î•º Ï§å
                    if assigned_time in professor_time_map and professor in professor_time_map[assigned_time]:
                        cost_matrix[i, j] = 1e6  # Î∂àÍ∞ÄÎä•Ìïú Î∞∞Ï†ï (ÌÅ∞ Í∞í)
                    else:
                        cost_matrix[i, j] = room_row['Capacity'] - offer_row['Capacity']

    return cost_matrix


def assign_classes(df_final, offer):
    df_final_expanded = expand_room_slots(df_final)
    cost_matrix = create_cost_matrix(df_final_expanded, offer)

    num_offers, num_rooms = cost_matrix.shape
    max_size = max(num_offers, num_rooms)

    if num_offers < max_size:
        cost_matrix = np.vstack([cost_matrix, np.full((max_size - num_offers, max_size), 1e6)])
    if num_rooms < max_size:
        cost_matrix = np.hstack([cost_matrix, np.full((max_size, max_size - num_rooms), 1e6)])

    row_ind, col_ind = linear_sum_assignment(cost_matrix)

    assignments = []
    professor_time_map = {}  # üîπ ÍµêÏàòÎãò ÏãúÍ∞Ñ Ï§ëÎ≥µ Ï≤¥ÌÅ¨

    for r, c in zip(row_ind, col_ind):
        if r < len(offer) and c < len(df_final_expanded) and cost_matrix[r, c] != 1e6:
            assigned_day = df_final_expanded.iloc[c]['Assigned Day']
            assigned_time = df_final_expanded.iloc[c]['Assigned Time Slot']
            professor = offer.iloc[r]['Lecturer']

            # üîπ Í∞ôÏùÄ ÏãúÍ∞ÑÏóê Í∞ôÏùÄ ÍµêÏàòÎãòÏù¥ Ïù¥ÎØ∏ Î∞∞Ï†ïÎêú Í≤ΩÏö∞ Î¨¥Ï°∞Í±¥ Ï†úÏô∏
            if (assigned_day, assigned_time) in professor_time_map and professor in professor_time_map[(assigned_day, assigned_time)]:
                continue  # üö® Ïù¥ Î∞∞Ï†ïÏùÄ Î¨¥Ìö®! Ïä§ÌÇµ!

            # üîπ ÍµêÏàòÎãò Î∞∞Ï†ï ÌôïÏ†ï (ÏãúÍ∞ÑÎåÄ Ï†ÄÏû•)
            if (assigned_day, assigned_time) not in professor_time_map:
                professor_time_map[(assigned_day, assigned_time)] = set()
            professor_time_map[(assigned_day, assigned_time)].add(professor)

            assignments.append({
                'Group': offer.iloc[r]['Course Code'],
                'Category': offer.iloc[r]['Category'],
                'Lecturer': professor,
                'Assigned Room': df_final_expanded.iloc[c]['Resource Code'],
                'Room Type': df_final_expanded.iloc[c]['Type'],
                'Group Capacity': offer.iloc[r]['Capacity'],
                'Room Capacity': df_final_expanded.iloc[c]['Capacity'],
                'Assigned Day': assigned_day,
                'Assigned Time Slot': assigned_time,
                'Lecture': df_final_expanded.iloc[c]['Lecture'],
                'Tutorial': df_final_expanded.iloc[c]['Tutorial'],
                'Lab': df_final_expanded.iloc[c]['Lab'],
                'Duration (min)': offer.iloc[r]['Min Per Session']
            })

    return pd.DataFrame(assignments)


def generate_assigned_schedule(df_final, offer):
    """
    Generates the assigned schedule by processing offers and matching them with available rooms.

    Parameters:
        df_final (pd.DataFrame): The processed resource room DataFrame.
        offer (pd.DataFrame): The course offer DataFrame.

    Returns:
        pd.DataFrame: A DataFrame containing the final class assignments.
    """
    # Determine the maximum room capacity from the resource room DataFrame
    max_room_capacity = df_final['Capacity'].astype(int).max()
    
    # Process the offers by expanding them based on room capacity
    offer = preprocess_offers(offer, max_room_capacity)
    
    # Align the room types and offer categories for matching
    df_final, offer = match_classroom_type(df_final, offer)
    
    # Assign classes based on the constructed cost matrix and constraints
    assigned_schedule = assign_classes(df_final, offer)
    
    return assigned_schedule

# Example usage:
# assigned_schedule = generate_assigned_schedule(df_final, offer)
# print(assigned_schedule)
