In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Load the dataset
df = pd.read_csv('Correlation V2.csv')

# Data Cleaning - remove rows where AvgOfWaitingTime is zero or NaN
df_clean = df[(df['AvgOfWaitingTime'] > 0) & (df['No.OfBedStaffed'] > 0) & (df['No.OfPhysicians'] > 0)]

# Feature and target selection
features = ['No.OfBedStaffed', 'No.OfPhysicians']
target = 'AvgOfWaitingTime'
X = df_clean[features]
y = df_clean[target]

# Initialize the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model on the entire clean dataset
model.fit(X, y)

# Optimization function applying the ML model
def optimize_with_ml(row, model, max_increase_factor):
    best_configuration = {
        'beds': row['No.OfBedStaffed'],
        'physicians': row['No.OfPhysicians'],
        'waiting_time': row['AvgOfWaitingTime']
    }

    # Test within 20% increase for both beds and physicians
    for increase in np.linspace(0, max_increase_factor, 10):
        test_beds = row['No.OfBedStaffed'] * (1 + increase)
        test_physicians = row['No.OfPhysicians'] * (1 + increase)
        predicted_waiting_time = model.predict([[test_beds, test_physicians]])[0]

        if predicted_waiting_time < best_configuration['waiting_time']:
            best_configuration['beds'] = test_beds
            best_configuration['physicians'] = test_physicians
            best_configuration['waiting_time'] = predicted_waiting_time

    return pd.Series([best_configuration['beds'], best_configuration['physicians'], best_configuration['waiting_time']])

# Apply optimization function to each row in the dataframe
df_clean[['OptimizedBeds', 'OptimizedPhysicians', 'OptimizedWaitingTime']] = df_clean.apply(
    optimize_with_ml, axis=1, args=(model, 0.2))

# Save the final dataframe with the optimized values
output_file = 'optimized_waiting_times_v1.csv'
df_clean.to_csv(output_file, index=False)

print(f"Optimized data saved to {output_file}")