In [1]:
# Importing Relevant Packages
from ortools.sat.python import cp_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import math
import random
import re
import csv
import unicodedata
from collections import defaultdict
from dataclasses import dataclass
from typing import List, Optional
from statistics import stdev
from fpdf import FPDF
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from scipy.optimize import minimize

load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\zlib1.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\abseil_dll.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\utf8_validity.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\re2.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\libprotobuf.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\highs.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\ortools.dll...


In [3]:
# Open and read the content of the file "test15.txt"
with open("test15.txt", "r") as f:
    text = f.read()

# --- Extract rooms and patients sections safely ---
# Use regex to find the text block under "ROOMS:" and before "BEDS:"
match_rooms = re.search(r"ROOMS:(.*?)BEDS:", text, re.DOTALL)
# Use regex to find the text block under "PATIENTS:" until the end of the file
match_patients = re.search(r"PATIENTS:(.*)", text, re.DOTALL)

# Extract the matched text for rooms, strip whitespace, and assign to rooms_raw. If no match, use an empty string.
rooms_raw = match_rooms.group(1).strip() if match_rooms else ""
# Extract the matched text for patients, strip whitespace, and assign to patients_raw. If no match, use an empty string.
patients_raw = match_patients.group(1).strip() if match_patients else ""

# --- Parse Rooms ---
rooms = [] # Initialize an empty list to store parsed room data
# Iterate through each line in the raw rooms text
for line in rooms_raw.splitlines():
    # Split the line by spaces and '|' characters, strip whitespace from parts
    parts = re.split(r'\s+\|\s+', line.strip())
    # Skip the line if it doesn't have the expected number of parts (6)
    if len(parts) != 6:
        continue
    try:
        # Parse RoomID and RoomNumber as integers from the first part
        room_id, room_number = map(int, parts[0].split())
        # Parse DepartmentID as an integer from the third part
        department_id = int(parts[2])
        # Parse Capacity as an integer from the second part
        specialism_id = int(parts[1])
        # Get Gender from the fourth part
        gender = parts[3]
        # Parse Specialties as a list of integers from the fifth part
        specialties = list(map(int, parts[4].split()))
        # Parse Properties as a list of integers from the sixth part
        properties = list(map(int, parts[5].split()))

        # Create a dictionary to hold the room data
        room = {
            "RoomID": room_id,
            "RoomNumber": room_number,
            "DepartmentID": department_id,
            "Capacity": specialism_id, # Note: This seems mislabeled, should likely be Capacity based on problem description
            "Gender": gender,
            "ReqSpecialism1": specialties[1], # Required Specialism 1
            "ReqSpecialism2": specialties[3], # Required Specialism 2
            "ReqSpecialism3": specialties[5], # Required Specialism 3
            "PenaltySpecialism1": specialties[0], # Penalty for Specialism 1
            "PenaltySpecialism2": specialties[2], # Penalty for Specialism 2
            "PenaltySpecialism3": specialties[4] # Penalty for Specialism 3
        }

        # Add equipment properties if they exist in the properties list
        if len(properties) >= 1:
            room["Telemetry"] = properties[0]
        if len(properties) >= 2:
            room["Oxygen"] = properties[1]
        if len(properties) >= 3:
            room["Nitrogen"] = properties[2]
        if len(properties) >= 4:
            room["Television"] = properties[3]

        # Append the parsed room dictionary to the rooms list
        rooms.append(room)
    except Exception as e:
        # Print an error message if parsing fails for a line and continue to the next line
        print(f"Skipping room line due to error: {e}")
        continue

# --- Parse Patients ---
patients = [] # Initialize an empty list to store parsed patient data
# Iterate through each line in the raw patients text
for line in patients_raw.splitlines():
    line = line.strip() # Remove leading/trailing whitespace
    # Skip the line if it's empty, contains 'END', or doesn't have enough '|' separators
    if not line or 'END' in line or line.count('|') < 5:
        print(f" Skipping line: '{line}' (invalid format)") # Print message for skipped lines
        continue
    try:
        # Split the line by spaces and '|' characters, strip whitespace from parts
        parts = re.split(r'\s+\|\s+', line.strip())
        # Split the first part into patient ID, name, age, and gender
        id_name_age_gender = parts[0].split()
        # Raise a ValueError if not enough parts are found in the first section
        if len(id_name_age_gender) < 4:
            raise ValueError("Missing patient ID/name/age/gender")

        # Parse patient ID, name, age, and gender
        patient_id = int(id_name_age_gender[0])
        name = id_name_age_gender[1]
        age = int(id_name_age_gender[2])
        gender = id_name_age_gender[3]

        # Parse AdmissionDay and ReleaseDay as integers from the second part
        admission_day, release_day = map(int, parts[1].split())

        # Parse specialism information as a list of integers from the third part
        specialism_info = list(map(int, parts[2].split()))
        # Get the count of specialisms (first element)
        specialism_count = specialism_info[0]
        # Get the specialism ID and days pairs (remaining elements)
        specialism_pairs = specialism_info[1:]

        # Initialize specialism IDs and days to None
        spec1_id = spec1_days = spec2_id = spec2_days = None

        # Extract specialism 1 ID and days if available
        if len(specialism_pairs) >= 2:
            spec1_id = specialism_pairs[0]
            spec1_days = specialism_pairs[1]
        # Extract specialism 2 ID and days if available
        if len(specialism_pairs) >= 4:
            spec2_id = specialism_pairs[2]
            spec2_days = specialism_pairs[3]

        # Parse PreferredRoomCapacity as an integer from the fourth part
        preferred_capacity = int(parts[3])
        # Parse Needed Properties as a list of integers from the fifth part
        needed_props = list(map(int, parts[4].split()))
        # Parse Preferred Properties as a list of integers from the sixth part
        preferred_props = list(map(int, parts[5].split()))

        # Create a dictionary to hold the patient data
        patient = {
            "PatientID": patient_id,
            "Name": name,
            "Age": age,
            "Gender": gender,
            "AdmissionDay": admission_day,
            "ReleaseDay": release_day,
            "PreferredRoomCapacity": preferred_capacity,
            "SpecialismID1": spec1_id,
            "SpecialismDays1": spec1_days,
            "SpecialismID2": spec2_id,
            "SpecialismDays2": spec2_days
        }

        # Add needed properties if they exist in the needed_props list
        if len(needed_props) >= 1:
            patient["NeedsTelemetry"] = needed_props[0]
        if len(needed_props) >= 2:
            patient["NeedsOxygen"] = needed_props[1]
        if len(needed_props) >= 3:
            patient["NeedsNitrogen"] = needed_props[2]
        if len(needed_props) >= 4:
            patient["NeedsTV"] = needed_props[3]

        # Add preferred properties if they exist in the preferred_props list
        if len(preferred_props) >= 1:
            patient["PrefersTelemetry"] = preferred_props[0]
        if len(preferred_props) >= 2:
            patient["PrefersOxygen"] = preferred_props[1]
        if len(preferred_props) >= 3:
            patient["PrefersNitrogen"] = preferred_props[2]
        if len(preferred_props) >= 4:
            patient["PrefersTV"] = preferred_props[3]

        # Append the parsed patient dictionary to the patients list
        patients.append(patient)

    except Exception as e:
        # Print an error message if parsing fails for a line and continue to the next line
        print(f" Skipping patient line due to error: {e}")
        continue

# --- Write CSV Files ---
# Check if any room data was parsed
if rooms:
    # Open a CSV file for writing room data
    with open("rooms_data15.csv", "w", newline='') as f:
        # Create a DictWriter object with the keys of the first room dictionary as fieldnames
        writer = csv.DictWriter(f, fieldnames=rooms[0].keys())
        # Write the header row to the CSV file
        writer.writeheader()
        # Write all the parsed room rows to the CSV file
        writer.writerows(rooms)
else:
    # Print a message if no rooms were parsed
    print("No rooms parsed.")

# Check if any patient data was parsed
if patients:
    # Open a CSV file for writing patient data
    with open("patients_data15.csv", "w", newline='') as f:
        # Create a DictWriter object with the keys of the first patient dictionary as fieldnames
        writer = csv.DictWriter(f, fieldnames=patients[0].keys())
        # Write the header row to the CSV file
        writer.writeheader()
        # Write all the parsed patient rows to the CSV file
        writer.writerows(patients)
else:
    # Print a message if no patients were parsed
    print("No patients parsed.")

# Print a confirmation message indicating that CSV files have been generated
print(" CSV files generated: rooms_data.csv and patients_data.csv")

 Skipping line: '' (invalid format)
 Skipping line: 'END.' (invalid format)
 CSV files generated: rooms_data.csv and patients_data.csv


In [5]:
# Load the room and patient data from CSV files
rooms_df = pd.read_csv("rooms_data15.csv")
patients_df = pd.read_csv("patients_data15.csv")

print("\nStarting data cleaning...")

# --- Clean Rooms ---
print("\nCleaning Room Data")
initial_room_count = len(rooms_df) # Store the initial number of rows in rooms_df

# Drop rows with missing values in critical fields only (optional)
if rooms_df.isnull().values.any(): # Check if there are any missing values in the DataFrame
    print("Missing values found in room data. Dropping rows with missing values:")
    print(rooms_df[rooms_df.isnull().any(axis=1)]) # Print rows with missing values
rooms_df.dropna(inplace=True) # Drop rows with any missing values

# Filter invalid capacity (capacity must be greater than 0)
invalid_capacity = rooms_df[rooms_df["Capacity"] <= 0] # Identify rows with invalid capacity
if not invalid_capacity.empty: # Check if there are any rows with invalid capacity
    print(f"{len(invalid_capacity)} rooms with invalid capacity (<=0):")
    print(invalid_capacity) # Print rows with invalid capacity
rooms_df = rooms_df[rooms_df["Capacity"] > 0] # Filter out rows with invalid capacity

# Valid gender values (M, F, D, N)
valid_genders = ["M", "F", "D", "N"] # Define valid gender values
invalid_gender = rooms_df[~rooms_df["Gender"].isin(valid_genders)] # Identify rows with invalid gender
if not invalid_gender.empty: # Check if there are any rows with invalid gender
    print(f"{len(invalid_gender)} rooms with invalid gender values:")
    print(invalid_gender) # Print rows with invalid gender
rooms_df = rooms_df[rooms_df["Gender"].isin(valid_genders)] # Filter out rows with invalid gender

# Convert equipment and penalty fields to int
# Define lists of equipment and penalty column names
equip_cols_all = ["Telemetry", "Oxygen", "Nitrogen", "Television"]
penalty_cols = ["PenaltySpecialism1", "PenaltySpecialism2", "PenaltySpecialism3"]

# Keep only the equipment columns that are actually in the DataFrame
equip_cols_present = [col for col in equip_cols_all if col in rooms_df.columns]
# Convert the data type of the present equipment columns and penalty columns to integer
rooms_df[equip_cols_present + penalty_cols] = rooms_df[equip_cols_present + penalty_cols].astype(int)


# Clean required specialism fields (must be ≥ 0)
specialism_cols = ["ReqSpecialism1", "ReqSpecialism2", "ReqSpecialism3"] # Define specialism column names
for col in specialism_cols: # Iterate through each specialism column
    invalid_specs = rooms_df[rooms_df[col] < 0] # Identify rows with negative specialism values
    if not invalid_specs.empty: # Check if there are any rows with negative specialism values
        print(f"{len(invalid_specs)} rooms with negative {col} values:")
        print(invalid_specs) # Print rows with negative specialism values
    rooms_df = rooms_df[rooms_df[col] >= 0] # Filter out rows with negative specialism values

# Print the number of rows before and after cleaning rooms data
print(f"Room data cleaned: {initial_room_count} → {len(rooms_df)} rows")

# --- Clean Patients ---
print("\nCleaning Patient Data")
initial_patient_count = len(patients_df) # Store the initial number of rows in patients_df

# Only drop rows missing critical values (excluding optional specialism 2 fields)
# Define a list of required column names
required_cols = [
    "PatientID", "Name", "Age", "Gender",
    "AdmissionDay", "ReleaseDay", "PreferredRoomCapacity",
    "SpecialismID1", "SpecialismDays1"
]
# Check if there are any missing values in the required columns
if patients_df[required_cols].isnull().any(axis=1).any():
    print("Missing values found in critical patient fields. Dropping such rows:")
    print(patients_df[patients_df[required_cols].isnull().any(axis=1)]) # Print rows with missing values in required columns
patients_df.dropna(subset=required_cols, inplace=True) # Drop rows with missing values in the required columns

# Age validity (age must be between 0 and 120)
invalid_ages = patients_df[(patients_df["Age"] < 0) | (patients_df["Age"] > 120)] # Identify rows with unrealistic ages
if not invalid_ages.empty: # Check if there are any rows with unrealistic ages
    print(f"{len(invalid_ages)} patients with unrealistic ages:")
    print(invalid_ages) # Print rows with unrealistic ages
patients_df = patients_df[(patients_df["Age"] >= 0) & (patients_df["Age"] <= 120)] # Filter out rows with unrealistic ages

# Valid gender (M, F)
invalid_gender = patients_df[~patients_df["Gender"].isin(["M", "F"])] # Identify rows with invalid gender
if not invalid_gender.empty: # Check if there are any rows with invalid gender
    print(f"{len(invalid_gender)} patients with invalid gender:")
    print(invalid_gender) # Print rows with invalid gender
patients_df = patients_df[patients_df["Gender"].isin(["M", "F"])] # Filter out rows with invalid gender

# Admission before release (AdmissionDay must be less than or equal to ReleaseDay)
bad_dates = patients_df[patients_df["AdmissionDay"] > patients_df["ReleaseDay"]] # Identify rows where admission day is after release day
if not bad_dates.empty: # Check if there are any rows with bad dates
    print(f"{len(bad_dates)} patients with AdmissionDay > ReleaseDay:")
    print(bad_dates) # Print rows with bad dates
patients_df = patients_df[patients_df["AdmissionDay"] <= patients_df["ReleaseDay"]] # Filter out rows with bad dates

# Validate binary columns (values must be 0 or 1)
# Define a list of binary column names
binary_cols = [
    "NeedsTelemetry", "NeedsOxygen", "NeedsNitrogen", "NeedsTV",
    "PrefersTelemetry", "PrefersOxygen", "PrefersNitrogen", "PrefersTV"
]
for col in binary_cols: # Iterate through each binary column
    if col in patients_df.columns: # Check if the column exists in the DataFrame
        invalid_vals = patients_df[~patients_df[col].isin([0, 1])] # Identify rows with invalid binary values
        if not invalid_vals.empty: # Check if there are any rows with invalid binary values
            print(f"Invalid values in column {col} (should be 0 or 1):")
            print(invalid_vals) # Print rows with invalid binary values
        patients_df = patients_df[patients_df[col].isin([0, 1])] # Filter out rows with invalid binary values

# SpecialismID1 must be ≥ 0
invalid_spec1 = patients_df[patients_df["SpecialismID1"] < 0] # Identify rows with negative SpecialismID1
if not invalid_spec1.empty: # Check if there are any rows with negative SpecialismID1
    print(f"{len(invalid_spec1)} patients with negative SpecialismID1:")
    print(invalid_spec1) # Print rows with negative SpecialismID1
patients_df = patients_df[patients_df["SpecialismID1"] >= 0] # Filter out rows with negative SpecialismID1

# Preferred capacity ≥ 1
invalid_capacity = patients_df[patients_df["PreferredRoomCapacity"] < 1] # Identify rows with preferred capacity less than 1
if not invalid_capacity.empty: # Check if there are any rows with invalid preferred capacity
    print(f"{len(invalid_capacity)} patients with PreferredRoomCapacity < 1:")
    print(invalid_capacity) # Print rows with invalid preferred capacity
patients_df = patients_df[patients_df["PreferredRoomCapacity"] >= 1] # Filter out rows with invalid preferred capacity

# Print the number of rows before and after cleaning patients data
print(f"Patient data cleaned: {initial_patient_count} → {len(patients_df)} rows")

# Save cleaned CSVs
rooms_df.to_csv("cleaned_rooms15.csv", index=False) # Save the cleaned rooms DataFrame to a CSV file
patients_df.to_csv("cleaned_patients15.csv", index=False) # Save the cleaned patients DataFrame to a CSV file

# Print a confirmation message that data cleaning is complete and files are saved
print("\nData cleaning complete. Cleaned CSVs saved as 'cleaned_rooms.csv' and 'cleaned_patients.csv'")


Starting data cleaning...

Cleaning Room Data
Room data cleaned: 148 → 148 rows

Cleaning Patient Data
Patient data cleaned: 890 → 890 rows

Data cleaning complete. Cleaned CSVs saved as 'cleaned_rooms.csv' and 'cleaned_patients.csv'
