In [15]:
import pandas as pd

# Load the CSV file
file_path = "ScanRecords.csv"  # Replace with the actual path to your CSV file
try:
    data = pd.read_csv(file_path)
    print("CSV file loaded successfully!")
    print(data.head())  # Display the first few rows of the DataFrame
except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred: {e}")

CSV file loaded successfully!
         Date   Time  Duration PatientType
0  2023-08-01   8.23  0.949176      Type 2
1  2023-08-01   8.49  0.479593      Type 1
2  2023-08-01   9.12  0.496112      Type 2
3  2023-08-01  10.26  0.691947      Type 2
4  2023-08-01  10.64  0.345412      Type 1


In [16]:
data

Unnamed: 0,Date,Time,Duration,PatientType
0,2023-08-01,8.23,0.949176,Type 2
1,2023-08-01,8.49,0.479593,Type 1
2,2023-08-01,9.12,0.496112,Type 2
3,2023-08-01,10.26,0.691947,Type 2
4,2023-08-01,10.64,0.345412,Type 1
...,...,...,...,...
613,2023-08-31,15.54,0.409158,Type 1
614,2023-08-31,15.62,0.447213,Type 2
615,2023-08-31,15.87,0.468037,Type 1
616,2023-08-31,16.49,0.713091,Type 2


In [17]:
# Filter the data for Type 1 patients
type1_data = data[data['PatientType'] == 'Type 1']

# Group by date and count arrivals per day
daily_arrivals = type1_data.groupby('Date').size()
lambda_estimate = daily_arrivals.mean()  # Mean of daily arrivals
print(f"Estimated daily arrival rate (λ): {lambda_estimate:.2f}")

Estimated daily arrival rate (λ): 16.48


In [18]:
# Convert 'Duration' to a numpy array
scan_durations = type1_data['Duration'].values
mu_estimate = scan_durations.mean()  # Mean of scan durations
sigma_estimate = scan_durations.std()  # Standard deviation of scan durations
print(f"Estimated mean (μ): {mu_estimate:.2f}")
print(f"Estimated standard deviation (σ): {sigma_estimate:.2f}")

Estimated mean (μ): 0.43
Estimated standard deviation (σ): 0.10


In [19]:
import numpy as np
from scipy.stats import norm

def bootstrap_parametric(n_bootstraps=1000):
    boot_means = []
    boot_stds = []
    for _ in range(n_bootstraps):
        # Generate bootstrap samples from the fitted normal distribution
        boot_sample = norm.rvs(loc=mu_estimate, scale=sigma_estimate, size=len(scan_durations))
        boot_means.append(boot_sample.mean())
        boot_stds.append(boot_sample.std())
    return np.array(boot_means), np.array(boot_stds)

# Run the bootstrap
n_bootstraps = 1000
boot_means, boot_stds = bootstrap_parametric(n_bootstraps)

# Confidence intervals
mean_ci = (np.percentile(boot_means, 2.5), np.percentile(boot_means, 97.5))
std_ci = (np.percentile(boot_stds, 2.5), np.percentile(boot_stds, 97.5))
print(f"95% CI for mean (μ): {mean_ci}")
print(f"95% CI for standard deviation (σ): {std_ci}")

95% CI for mean (μ): (np.float64(0.42232260993751947), np.float64(0.4419682813305272))
95% CI for standard deviation (σ): (np.float64(0.09032704610972543), np.float64(0.10445100786830823))


In [20]:
# The daily_arrivals mean already serves as the MLE for λ in a Poisson distribution
print(f"MLE for daily arrival rate (λ): {lambda_estimate:.2f}")

MLE for daily arrival rate (λ): 16.48


In [21]:
# Estimate confidence intervals for daily arrival rate
arrival_ci = (np.percentile(daily_arrivals, 2.5), np.percentile(daily_arrivals, 97.5))
print(f"95% CI for daily arrivals: {arrival_ci}")

95% CI for daily arrivals: (np.float64(10.55), np.float64(23.0))


In [22]:
# Print previously calculated CI for scan duration
print(f"95% CI for mean (μ): {mean_ci}")
print(f"95% CI for standard deviation (σ): {std_ci}")

95% CI for mean (μ): (np.float64(0.42232260993751947), np.float64(0.4419682813305272))
95% CI for standard deviation (σ): (np.float64(0.09032704610972543), np.float64(0.10445100786830823))


In [23]:
from scipy.stats import norm
import numpy as np

# Given parameters for Type 1 patients
mean_duration = 0.43  # in hours
std_duration = 0.10   # in hours
confidence_level = 0.95

# Calculate the appropriate timeslot length
z_score = norm.ppf((1 + confidence_level) / 2)  # 99% confidence interval
timeslot_length = mean_duration + z_score * std_duration
print(f"Recommended timeslot length for Type 1 patients (95% CI): {timeslot_length:.2f} hours ({timeslot_length * 60:.2f} minutes)")


Recommended timeslot length for Type 1 patients (95% CI): 0.63 hours (37.56 minutes)


In [24]:
import numpy as np
import pandas as pd
from scipy.stats import poisson, norm, expon
import datetime

# Set random seed for reproducibility
np.random.seed(42)

# Given parameters for Type 1 patients
mean_daily_arrivals = 16.48  # Estimated Poisson parameter (lambda)
mean_duration = 0.43         # Mean scan duration (in hours)
std_duration = 0.10          # Standard deviation of scan duration (in hours)

# Simulation settings
n_days = 30  # Number of days to simulate
start_date = datetime.date(2023, 8, 1)  # Start date
working_start_hour = 8  # Start of working day
working_end_hour = 17  # End of working day (inclusive)
working_hours = working_end_hour - working_start_hour  # Daily operational hours

# Function to simulate a single day of Type 1 patients
def simulate_type1_day(mean_daily_arrivals, mean_duration, std_duration, day_index):
    """
    Simulates one day of Type 1 patient arrivals and durations.
    """
    records = []
    
    # Simulate number of daily arrivals (Poisson distributed)
    n_arrivals = poisson.rvs(mean_daily_arrivals)
    
    # Generate exponential inter-arrival times (in hours)
    inter_arrivals = expon.rvs(scale=1.0 / (n_arrivals / working_hours), size=n_arrivals)
    arrival_times = np.cumsum(inter_arrivals) + working_start_hour  # Adjust for working hours start
    
    # Truncate to working hours (8:00 to 17:00)
    arrival_times = arrival_times[arrival_times < working_end_hour]
    
    # Simulate scan durations (normally distributed)
    durations = norm.rvs(mean_duration, std_duration, size=len(arrival_times))
    
    # Create records for each patient
    for arrival_time, duration in zip(arrival_times, durations):
        # Format arrival time as float for Time column (e.g., 8.23)
        hours = int(arrival_time)
        minutes = (arrival_time - hours) * 60
        time_float = round(hours + (minutes / 60), 2)
        
        record = {
            "Date": (start_date + datetime.timedelta(days=day_index)).strftime("%Y-%m-%d"),
            "Time": time_float,
            "Duration": duration,
            "PatientType": "Type 1"
        }
        records.append(record)
    
    return records

# Simulate data for multiple days
def simulate_type1_patients(n_days, mean_daily_arrivals, mean_duration, std_duration):
    all_records = []
    for day_idx in range(n_days):
        day_records = simulate_type1_day(mean_daily_arrivals, mean_duration, std_duration, day_idx)
        all_records.extend(day_records)
    return pd.DataFrame(all_records)

# Run simulation for Type 1 patients
simulated_data = simulate_type1_patients(n_days, mean_daily_arrivals, mean_duration, std_duration)

# Save simulated data to CSV
simulated_data.to_csv("simulated_type1_patients_formatted.csv", index=False)

# Print a preview of the simulated data
print(simulated_data.head())

print("Simulated data saved to 'simulated_type1_patients_formatted.csv'")


         Date  Time  Duration PatientType
0  2023-08-01  8.79  0.168745      Type 1
1  2023-08-01  9.34  0.525037      Type 1
2  2023-08-01  9.44  0.511645      Type 1
3  2023-08-01  9.54  0.277612      Type 1
4  2023-08-01  9.58  0.387195      Type 1
Simulated data saved to 'simulated_type1_patients_formatted.csv'


In [25]:
simulated_data

Unnamed: 0,Date,Time,Duration,PatientType
0,2023-08-01,8.79,0.168745,Type 1
1,2023-08-01,9.34,0.525037,Type 1
2,2023-08-01,9.44,0.511645,Type 1
3,2023-08-01,9.54,0.277612,Type 1
4,2023-08-01,9.58,0.387195,Type 1
...,...,...,...,...
407,2023-08-30,14.30,0.367713,Type 1
408,2023-08-30,15.23,0.380403,Type 1
409,2023-08-30,15.28,0.413189,Type 1
410,2023-08-30,15.61,0.392660,Type 1
