In [30]:
import pandas as pd
import numpy as np
import pickle

artifacts_filename = 'service_completion_artifacts.pkl'
with open(artifacts_filename, 'rb') as file:
        artifacts = pickle.load(file)

loaded_model = artifacts['model']
le_task = artifacts['le_task']
le_section = artifacts['le_section']
features = artifacts['features']
tasks_df = artifacts['tasks_df']
staffing_df = artifacts['staffing_df']

print("Model loaded successfully.")

Model loaded successfully.


Load the New Input Data

In [31]:
input_filename = 'task1_test_inputs.csv'
test_df = pd.read_csv(input_filename)
print(f"{len(test_df)} rows from '{input_filename}'.")

49906 rows from 'task1_test_inputs.csv'.


Prediction Function

In [34]:
def predict_completion_time(date_str, time_str, task_id):
    """Predicts completion time using the retrained model."""
    input_data = pd.DataFrame({'date': [date_str], 'time': [time_str], 'task_id': [task_id]})
    
    input_data['appointment_date'] = pd.to_datetime(input_data['date'])
    input_data['appointment_hour'] = pd.to_datetime(input_data['time'], format='%H:%M').dt.hour
    input_data['appointment_day_of_week'] = input_data['appointment_date'].dt.dayofweek
    input_data['appointment_month'] = input_data['appointment_date'].dt.month
    input_data['appointment_day_of_year'] = input_data['appointment_date'].dt.dayofyear
    
    input_data = pd.merge(input_data, tasks_df[['task_id', 'section_id']], on='task_id', how='left')
    input_data = pd.merge(input_data, staffing_df, how='left', left_on=['appointment_date', 'section_id'], right_on=['date', 'section_id'])
    
    input_data.ffill(inplace=True)
    input_data.bfill(inplace=True)
    
    input_data['task_id_encoded'] = le_task.transform(input_data['task_id'])
    input_data['section_id_encoded'] = le_section.transform(input_data['section_id'])
    
    prediction = loaded_model.predict(input_data[features])
    return int(round(prediction[0]))


In [35]:
print("\nGenerating predictions...")

# Add the new prediction column to your input dataframe
test_df['expected_completion_time_minutes'] = test_df.apply(
    lambda row: predict_completion_time(row['date'], row['time'], row['task_id']),
    axis=1
)


Generating predictions...


In [42]:
output_filename = 'task1_predictions.csv'
test_df.to_csv(output_filename, index=False)

In [43]:
print(test_df.head())

                                     row_id        date   time   task_id  \
0  a19eed30dfbab7586131ca2329207b9cff81d5d5  2025-01-01  09:01  TASK-001   
1  8da6066c865c7053bb39092977fc0e513e045159  2025-01-01  09:01  TASK-008   
2  0cec7d70a44a71f0ceb7f78fabd2d0fef1439649  2025-01-01  09:01  TASK-017   
3  de2e5611033bbae4fb959d008455103b2da53e01  2025-01-01  09:02  TASK-007   
4  40736d54ee59e94c3dc5f34127d160db395805a3  2025-01-01  09:05  TASK-016   

   expected_completion_time_minutes  
0                                59  
1                                27  
2                                82  
3                                21  
4                                55  
