In [1]:
# Importing Relevant Packages
from ortools.sat.python import cp_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import math
import random
import re
import csv
import unicodedata
from collections import defaultdict
from dataclasses import dataclass
from typing import List, Optional
from statistics import stdev
from fpdf import FPDF
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from scipy.optimize import minimize

load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\zlib1.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\abseil_dll.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\utf8_validity.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\re2.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\libprotobuf.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\highs.dll...
load C:\Users\andri\anaconda3\Lib\site-packages\ortools\.libs\ortools.dll...


In [3]:
# Read the txt file
with open("test15.txt", "r") as f:
    text = f.read()

# Extract rooms and patients sections
match_rooms = re.search(r"ROOMS:(.*?)BEDS:", text, re.DOTALL)
match_patients = re.search(r"PATIENTS:(.*)", text, re.DOTALL)

# Extract the matched text for rooms
rooms_raw = match_rooms.group(1).strip() if match_rooms else ""
# Extract the matched text for patients
patients_raw = match_patients.group(1).strip() if match_patients else ""

# Parse Rooms
rooms = [] # Empty list for parsed room data
# Iterate through each line
for line in rooms_raw.splitlines():
    # Split the line
    parts = re.split(r'\s+\|\s+', line.strip())
    # Skip the line if there are not 6 parts
    if len(parts) != 6:
        continue
    try:
        # Parse RoomID and RoomNumber as integers
        room_id, room_number = map(int, parts[0].split())
        # DepartmentID
        department_id = int(parts[2])
        # Capacity
        capacity = int(parts[1])
        # Get Gender
        gender = parts[3]
        # Specialties as a list
        specialties = list(map(int, parts[4].split()))
        # Properties as a list
        properties = list(map(int, parts[5].split()))

        # Dictionary for room data
        room = {
            "RoomID": room_id,
            "RoomNumber": room_number,
            "DepartmentID": department_id,
            "Capacity": capacity, 
            "Gender": gender,
            "ReqSpecialism1": specialties[1], # Required Specialism 1
            "ReqSpecialism2": specialties[3], # Required Specialism 2
            "ReqSpecialism3": specialties[5], # Required Specialism 3
            "PenaltySpecialism1": specialties[0], # Penalty for Specialism 1
            "PenaltySpecialism2": specialties[2], # Penalty for Specialism 2
            "PenaltySpecialism3": specialties[4] # Penalty for Specialism 3
        }

        # Add equipment properties if they exist in the properties list
        if len(properties) >= 1:
            room["Telemetry"] = properties[0]
        if len(properties) >= 2:
            room["Oxygen"] = properties[1]
        if len(properties) >= 3:
            room["Nitrogen"] = properties[2]
        if len(properties) >= 4:
            room["Television"] = properties[3]

        # Append the dictionary to the rooms list
        rooms.append(room)
    except Exception as e:
        # Print an error message
        print(f"Skipping room line due to error: {e}")
        continue

# Parse Patients
patients = [] # Empty list for parsed patient data
# Iterate through each line
for line in patients_raw.splitlines():
    line = line.strip() 
    # Skip the line 
    if not line or 'END' in line or line.count('|') < 5:
        print(f" Skipping line: '{line}' (invalid format)") # Print message for skipped lines
        continue
    try:
        # Split the line
        parts = re.split(r'\s+\|\s+', line.strip())
        id_name_age_gender = parts[0].split()
        # Raise a ValueError if not enough parts
        if len(id_name_age_gender) < 4:
            raise ValueError("Missing patient ID/name/age/gender")

        # Parse patient ID, name, age, and gender
        patient_id = int(id_name_age_gender[0])
        name = id_name_age_gender[1]
        age = int(id_name_age_gender[2])
        gender = id_name_age_gender[3]

        # Parse AdmissionDay and ReleaseDay
        admission_day, release_day = map(int, parts[1].split())

        # Specialism
        specialism_info = list(map(int, parts[2].split()))
        # Count of specialisms
        specialism_count = specialism_info[0]
        # Specialism ID and days
        specialism_pairs = specialism_info[1:]

        # Initialize to None
        spec1_id = spec1_days = spec2_id = spec2_days = None

        # Specialism 1 ID and days
        if len(specialism_pairs) >= 2:
            spec1_id = specialism_pairs[0]
            spec1_days = specialism_pairs[1]
        # Specialism 2 ID and days (if available)
        if len(specialism_pairs) >= 4:
            spec2_id = specialism_pairs[2]
            spec2_days = specialism_pairs[3]

        # Preferred Room Capacity
        preferred_capacity = int(parts[3])
        # Needed Properties as a list
        needed_props = list(map(int, parts[4].split()))
        # Preferred Properties as a list
        preferred_props = list(map(int, parts[5].split()))

        # Dictionary for patient data
        patient = {
            "PatientID": patient_id,
            "Name": name,
            "Age": age,
            "Gender": gender,
            "AdmissionDay": admission_day,
            "ReleaseDay": release_day,
            "PreferredRoomCapacity": preferred_capacity,
            "SpecialismID1": spec1_id,
            "SpecialismDays1": spec1_days,
            "SpecialismID2": spec2_id,
            "SpecialismDays2": spec2_days
        }

        # Add needed properties
        if len(needed_props) >= 1:
            patient["NeedsTelemetry"] = needed_props[0]
        if len(needed_props) >= 2:
            patient["NeedsOxygen"] = needed_props[1]
        if len(needed_props) >= 3:
            patient["NeedsNitrogen"] = needed_props[2]
        if len(needed_props) >= 4:
            patient["NeedsTV"] = needed_props[3]

        # Add preferred properties
        if len(preferred_props) >= 1:
            patient["PrefersTelemetry"] = preferred_props[0]
        if len(preferred_props) >= 2:
            patient["PrefersOxygen"] = preferred_props[1]
        if len(preferred_props) >= 3:
            patient["PrefersNitrogen"] = preferred_props[2]
        if len(preferred_props) >= 4:
            patient["PrefersTV"] = preferred_props[3]

        # Append dictionary to the patients list
        patients.append(patient)

    except Exception as e:
        # Print an error message
        print(f" Skipping patient line due to error: {e}")
        continue

# Write CSV Files
# Check if there is any room data
if rooms:
    # Open CSV to write room data
    with open("rooms_data15.csv", "w", newline='') as f:
        writer = csv.DictWriter(f, fieldnames=rooms[0].keys())
        # Write the header
        writer.writeheader()
        # Write all the parsed room rows
        writer.writerows(rooms)
else:
    # Print a message if there are no data
    print("No rooms parsed.")

# Check if there is any patient data
if patients:
    # Open CSV to write patient data
    with open("patients_data15.csv", "w", newline='') as f:
        writer = csv.DictWriter(f, fieldnames=patients[0].keys())
        # Write the header
        writer.writeheader()
        # Write all the parsed patient rows
        writer.writerows(patients)
else:
    # Print a message if there are no data
    print("No patients parsed.")

# Confirmation message
print(" CSV files generated: rooms_data.csv and patients_data.csv")

 Skipping line: '' (invalid format)
 Skipping line: 'END.' (invalid format)
 CSV files generated: rooms_data.csv and patients_data.csv


In [None]:
#Data Cleaning and Preprocessing
# Load the room and patient CSV
rooms_df = pd.read_csv("rooms_data15.csv")
patients_df = pd.read_csv("patients_data15.csv")

print("\nStarting data cleaning...")

# Clean Room Data
print("\nCleaning Room Data")
initial_room_count = len(rooms_df) # Store number of rows

# Missing values
if rooms_df.isnull().values.any(): # Check for missing values
    print("Missing values found in room data. Dropping rows with missing values:")
    print(rooms_df[rooms_df.isnull().any(axis=1)]) # Print rows with missing values
rooms_df.dropna(inplace=True) # Drop rows

# Invalid capacity
invalid_capacity = rooms_df[rooms_df["Capacity"] <= 0] # Identify rows with invalid capacity
if not invalid_capacity.empty: # Check if there are any
    print(f"{len(invalid_capacity)} rooms with invalid capacity (<=0):")
    print(invalid_capacity) 
rooms_df = rooms_df[rooms_df["Capacity"] > 0] # Filter out 

# Gender values
valid_genders = ["M", "F", "D", "N"] # Valid gender values
invalid_gender = rooms_df[~rooms_df["Gender"].isin(valid_genders)] # Rows with invalid gender
if not invalid_gender.empty: # Check if there are any
    print(f"{len(invalid_gender)} rooms with invalid gender values:")
    print(invalid_gender) 
rooms_df = rooms_df[rooms_df["Gender"].isin(valid_genders)] # Filter out

# Convert type
# Equipment and penalty column names
equip_cols_all = ["Telemetry", "Oxygen", "Nitrogen", "Television"]
penalty_cols = ["PenaltySpecialism1", "PenaltySpecialism2", "PenaltySpecialism3"]

# Keep the ones in the DataFrame
equip_cols_present = [col for col in equip_cols_all if col in rooms_df.columns]
# Convert the data type to integer
rooms_df[equip_cols_present + penalty_cols] = rooms_df[equip_cols_present + penalty_cols].astype(int)


# Required specialism (must be positive)
specialism_cols = ["ReqSpecialism1", "ReqSpecialism2", "ReqSpecialism3"] # Specialism column names
for col in specialism_cols: # Iterate through specialism columns
    invalid_specs = rooms_df[rooms_df[col] < 0] # Negative specialism values rows
    if not invalid_specs.empty: # Check if there are any
        print(f"{len(invalid_specs)} rooms with negative {col} values:")
        print(invalid_specs) 
    rooms_df = rooms_df[rooms_df[col] >= 0] # Filter out

# Print the number of rows before and after
print(f"Room data cleaned: {initial_room_count} → {len(rooms_df)} rows")

# Clean Patient Data
print("\nCleaning Patient Data")
initial_patient_count = len(patients_df) # Store number of rows

# Missing Values
# Required column names
required_cols = [
    "PatientID", "Name", "Age", "Gender",
    "AdmissionDay", "ReleaseDay", "PreferredRoomCapacity",
    "SpecialismID1", "SpecialismDays1"
]
# Missing Values
if patients_df[required_cols].isnull().any(axis=1).any():
    print("Missing values found in critical patient fields. Dropping such rows:")
    print(patients_df[patients_df[required_cols].isnull().any(axis=1)]) # Print rows with missing values
patients_df.dropna(subset=required_cols, inplace=True) # Drop rows with missing values

# Age validity
invalid_ages = patients_df[(patients_df["Age"] < 0) | (patients_df["Age"] > 120)] # Rows with unrealistic ages
if not invalid_ages.empty: # Check if there are any
    print(f"{len(invalid_ages)} patients with unrealistic ages:")
    print(invalid_ages)
patients_df = patients_df[(patients_df["Age"] >= 0) & (patients_df["Age"] <= 120)] # Filter out

# Valid gender
invalid_gender = patients_df[~patients_df["Gender"].isin(["M", "F"])] # Rows with invalid gender
if not invalid_gender.empty: # Check if there are any
    print(f"{len(invalid_gender)} patients with invalid gender:")
    print(invalid_gender) 
patients_df = patients_df[patients_df["Gender"].isin(["M", "F"])] # Filter out

# Admission before release
bad_dates = patients_df[patients_df["AdmissionDay"] > patients_df["ReleaseDay"]] # Rows where admission is after release
if not bad_dates.empty: # Check if there are any
    print(f"{len(bad_dates)} patients with AdmissionDay > ReleaseDay:")
    print(bad_dates) 
patients_df = patients_df[patients_df["AdmissionDay"] <= patients_df["ReleaseDay"]] # Filter out

# Validate binary columns
# Binary column names
binary_cols = [
    "NeedsTelemetry", "NeedsOxygen", "NeedsNitrogen", "NeedsTV",
    "PrefersTelemetry", "PrefersOxygen", "PrefersNitrogen", "PrefersTV"
]
for col in binary_cols: # Iterate through each binary column
    if col in patients_df.columns: # If the column exists
        invalid_vals = patients_df[~patients_df[col].isin([0, 1])] # Rows with invalid values
        if not invalid_vals.empty: # Check if there are any
            print(f"Invalid values in column {col} (should be 0 or 1):")
            print(invalid_vals)
        patients_df = patients_df[patients_df[col].isin([0, 1])] # Filter out

# SpecialismID
invalid_spec1 = patients_df[patients_df["SpecialismID1"] < 0] # Rows with negative SpecialismID
if not invalid_spec1.empty: # Check if there are any
    print(f"{len(invalid_spec1)} patients with negative SpecialismID1:")
    print(invalid_spec1) 
patients_df = patients_df[patients_df["SpecialismID1"] >= 0] # Filter out 

# Preferred capacity
invalid_capacity = patients_df[patients_df["PreferredRoomCapacity"] < 1] # Rows with preferred capacity less than 1
if not invalid_capacity.empty: # Check if there are any
    print(f"{len(invalid_capacity)} patients with PreferredRoomCapacity < 1:")
    print(invalid_capacity) 
patients_df = patients_df[patients_df["PreferredRoomCapacity"] >= 1] # Filter out 

# Print the number of rows before and after cleaning patients data
print(f"Patient data cleaned: {initial_patient_count} → {len(patients_df)} rows")

# Save cleaned CSVs
rooms_df.to_csv("cleaned_rooms15.csv", index=False) # Save the cleaned rooms DataFrame to a CSV file
patients_df.to_csv("cleaned_patients15.csv", index=False) # Save the cleaned patients DataFrame to a CSV file

# Print a confirmation message that data cleaning is complete and files are saved
print("\nData cleaning complete. Cleaned CSVs saved as 'cleaned_rooms.csv' and 'cleaned_patients.csv'")