In [3]:
# %%
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [4]:
# %%
# Load the data
data_path = 'regression_data.csv'  # replace with your actual path
data = pd.read_csv(data_path)

In [5]:
# %%
# Drop the unnecessary columns
data = data.drop(columns=['Unnamed: 0', 'call_date'])

In [6]:
# Separate the target variable and the features
y = data['staffing_requirements']
X = data.drop(columns=['staffing_requirements'])

In [7]:
# Split the dataset into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# %%
# Create a Random Forest Regressor model and fit it to the training data
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

RandomForestRegressor(random_state=42)

In [10]:
# %%
# Define the range of changes for each variable
sl_goal_changes = range(65, 30, -5)
aht_changes = range(0, 151, 30)  # Hypothetical changes to average handle time
occupancy_changes = range(0, 41, 5)  # Hypothetical changes to occupancy
senior_calls_changes = range(0, 301, 50)  # Hypothetical changes to senior calls

# Create a list to hold all scenarios
simulations = []


# Generate scenarios
for sl_goal in sl_goal_changes:
    for aht_change in aht_changes:
        for occupancy_change in occupancy_changes:
            for senior_calls_change in senior_calls_changes:
                # Create a copy of the test set
                scenario = X_test.copy()
                # Adjust the variables
                scenario['service_level_percent_to_goal'] = sl_goal / 100
                scenario['avg_handle_time'] += aht_change
                scenario['occupancy'] += occupancy_change / 100
                scenario['senior_calls'] += senior_calls_change
                # Make predictions for each scenario
                scenario['predicted_staff_requirements'] = model.predict(scenario)
                # Add the scenario to the simulations list
                simulations.append(scenario)


# Combine all scenarios into one DataFrame
simulations_df = pd.concat(simulations, ignore_index=True)

# Group the simulations by service level goal and calculate the mean staffing requirements
simulations_grouped = simulations_df.groupby(['service_level_percent_to_goal', 'avg_handle_time', 'occupancy', 'senior_calls'])['predicted_staff_requirements'].mean()

# Convert the grouped object into a DataFrame
simulations_grouped_df = simulations_grouped.reset_index()

In [11]:
# %%
# Export the resulting DataFrame to a CSV file
simulations_grouped_df.to_csv('sensitivity_analysis_results.csv', index=False)