In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

import logging

logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [3]:
def load_data(filepath:str)-> pd.DataFrame:
    """
    Loads the dataset from a CSV file
    """
    df = pd.read_csv(filepath)
    return df

In [27]:
def preprocess_data(df:pd.DataFrame, random_state:int = 42):
    """
    Performs data preprocessing:
    - Handling missing values.
    - Coding categorical variables.
    - Scaling numerical variables.
    - Dividing into training and test sets.
    """

    ## Identify numeric and categorical columns
    numerical_features = ['Distance_km', 'Preparation_Time_min', 'Courier_Experience_yrs']
    categorical_features = ['Weather', 'Traffic_Level', 'Time_of_Day', 'Vehicle_Type']

    ## Create transformers for preprocessing
    numeric_transformer = Pipeline(steps=[
        ('imputer',SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ])

    categorical_transformer = Pipeline(steps=[
        ('imputer',SimpleImputer(strategy='most_frequent')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))
    ])

    ## Create a preprocessor that applies the transformations
    preprocessor = ColumnTransformer(
        transformers=[
            ('num',numeric_transformer,numerical_features),
            ('cat',categorical_transformer,categorical_features)
        ],
     
    )

    X = df.drop(columns=['Delivery_Time_min'])
    y = df['Delivery_Time_min']

    ## Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

    ## Fit the preprocessor
    preprocessor.fit(X_train)
    X_train_processed = preprocessor.transform(X_train)
    X_test_processed = preprocessor.transform(X_test)
    logging.info("Data successfully preprocessed")

    ## Save the preprocessor
    joblib.dump(preprocessor, "../models/preprocessor.pkl")
    logging.info("Preprocessor saved successfully")

    num_features = preprocessor.transformers_[0][2]
    ohe = preprocessor.transformers_[1][1]
    cat_features = preprocessor.transformers_[1][2]
    ohe_feature_names = ohe.get_feature_names_out(cat_features)
    all_feature_names = list(num_features) + list(ohe_feature_names)

    ## Convert the processed data with the new columns
    X_train_processed_df = pd.DataFrame(X_train_processed,columns=all_feature_names)
    X_test_processed_df = pd.DataFrame(X_test_processed,columns=all_feature_names)

    return X_train_processed_df, X_test_processed_df, y_train, y_test


In [29]:
df = load_data("../data/Food_Delivery_Times.csv")
X_train, X_test, y_train, y_test = preprocess_data(df)


2025-06-30 21:45:31,945 - root - INFO - Data successfully preprocessed
2025-06-30 21:45:31,950 - root - INFO - Preprocessor saved successfully


In [35]:
X_train

Unnamed: 0,Distance_km,Preparation_Time_min,Courier_Experience_yrs,Weather_Clear,Weather_Foggy,Weather_Rainy,Weather_Snowy,Weather_Windy,Traffic_Level_High,Traffic_Level_Low,Traffic_Level_Medium,Time_of_Day_Afternoon,Time_of_Day_Evening,Time_of_Day_Morning,Time_of_Day_Night,Vehicle_Type_Bike,Vehicle_Type_Car,Vehicle_Type_Scooter
0,1.389200,-0.980721,0.820833,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
1,-1.391140,0.126852,-0.557994,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,-1.200658,-0.011595,1.510246,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
3,-0.452710,1.372871,0.476126,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
4,-0.957750,-1.257614,-0.557994,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,1.308813,0.957531,0.476126,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
796,-0.124172,-0.426935,-0.902701,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
797,1.097361,-1.534508,-0.902701,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
798,1.158525,-1.119168,0.820833,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [33]:
X_test

Unnamed: 0,Distance_km,Preparation_Time_min,Courier_Experience_yrs,Weather_Clear,Weather_Foggy,Weather_Rainy,Weather_Snowy,Weather_Windy,Traffic_Level_High,Traffic_Level_Low,Traffic_Level_Medium,Time_of_Day_Afternoon,Time_of_Day_Evening,Time_of_Day_Morning,Time_of_Day_Night,Vehicle_Type_Bike,Vehicle_Type_Car,Vehicle_Type_Scooter
0,-0.838917,-0.150042,0.131419,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
1,0.062815,1.095978,-0.557994,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2,-1.059107,-0.426935,0.476126,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
3,-1.183183,0.957531,-0.902701,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
4,1.282600,0.819085,-1.592114,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,0.964548,0.819085,0.820833,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
196,-1.450557,-0.426935,-1.247407,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
197,-0.473680,1.095978,0.476126,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
198,-0.069998,-1.534508,0.131419,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0


In [31]:
loaded_model = joblib.load("model_pipeline.pkl")

FileNotFoundError: [Errno 2] No such file or directory: 'model_pipeline.pkl'

In [34]:
load_data("../data/Food_Delivery_Times.csv")

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
0,522,7.93,Windy,Low,Afternoon,Scooter,12,1.0,43
1,738,16.42,Clear,Medium,Evening,Bike,20,2.0,84
2,741,9.52,Foggy,Low,Night,Scooter,28,1.0,59
3,661,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0,37
4,412,19.03,Clear,Low,Morning,Bike,16,5.0,68
...,...,...,...,...,...,...,...,...,...
995,107,8.50,Clear,High,Evening,Car,13,3.0,54
996,271,16.28,Rainy,Low,Morning,Scooter,8,9.0,71
997,861,15.62,Snowy,High,Evening,Scooter,26,2.0,81
998,436,14.17,Clear,Low,Afternoon,Bike,8,0.0,55


'd:\\CONCURSOS\\PG\\Food-Delivery-Time-Prediction\\food_delivery_project\\notebooks'