# Libraries

In [101]:
# Data manipulation libraries
import pandas as pd # Dataframes
import numpy as np # Numerical operations

# Statistical libraries
from sklearn import preprocessing # Data preprocessing and scaling

# Drop missing values

In [102]:
df = pd.read_csv(
    'data/food_delivery_times.csv',
    dtype = {'Order_ID': str}
).dropna()
df.head()

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
0,522,7.93,Windy,Low,Afternoon,Scooter,12,1.0,43
1,738,16.42,Clear,Medium,Evening,Bike,20,2.0,84
2,741,9.52,Foggy,Low,Night,Scooter,28,1.0,59
3,661,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0,37
4,412,19.03,Clear,Low,Morning,Bike,16,5.0,68


## Export Dataset without missing values

In [103]:
df.to_csv(
    'data/clean_data_1.csv',
    index = False
)

# Feature encoding

In [None]:
target_col = 'Delivery_Time_min'
X = df.drop(columns = target_col)
y = df[target_col]

In [105]:
nominal_features = ['Weather','Vehicle_Type']
ordinal_features = ['Traffic_Level','Time_of_Day']

## Ordinal encoding

In [106]:
feature_order = {
    'Traffic_Level': [['Low','Medium','High']],
    'Time_of_Day': [['Morning','Afternoon','Evening','Night']]
}

for col in ordinal_features:
    ordinal_encoder = preprocessing.OrdinalEncoder(categories = feature_order[col])
    X[col] = ordinal_encoder.fit_transform(X.loc[:,[col]])

## Nominal encoding

In [107]:
X = pd.get_dummies(
    X,
    columns = nominal_features,
    dtype = int
)

## Export Dataset with encoded features

In [108]:
pd.concat(
    [X,y],
    axis = 1
).to_csv(
    'data/clean_data_2.csv',
    index = False
)

# Feature scaling

## Scale only numerical features

In [109]:
num_cols = ['Distance_km','Preparation_Time_min','Courier_Experience_yrs']

# Create a RobustScaler object
robust_scaler = preprocessing.RobustScaler()

# Fit and transform the data
for col in num_cols:
    X[col] = robust_scaler.fit_transform(X.loc[:,[col]])

### Export Dataset with scaled numerical features

In [110]:
pd.concat(
    [X,y],
    axis = 1
).to_csv(
    'data/clean_data_3.csv',
    index = False
)

## Scale all features

In [111]:
# Fit and transform the data
for col in X.columns[1:]:
    X[col] = robust_scaler.fit_transform(X.loc[:,[col]])

### Export Dataset with scaled features

In [112]:
pd.concat(
    [X,y],
    axis = 1
).to_csv(
    'data/clean_data_4.csv',
    index = False
)