In [2]:
import pandas as pd
import numpy as np
df = pd.read_csv('Food_Delivery_Times.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Order_ID                1000 non-null   int64  
 1   Distance_km             1000 non-null   float64
 2   Weather                 970 non-null    object 
 3   Traffic_Level           970 non-null    object 
 4   Time_of_Day             970 non-null    object 
 5   Vehicle_Type            1000 non-null   object 
 6   Preparation_Time_min    1000 non-null   int64  
 7   Courier_Experience_yrs  970 non-null    float64
 8   Delivery_Time_min       1000 non-null   int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 70.4+ KB


In [8]:
df.isnull().sum()

Order_ID                   0
Distance_km                0
Weather                   30
Traffic_Level             30
Time_of_Day               30
Vehicle_Type               0
Preparation_Time_min       0
Courier_Experience_yrs    30
Delivery_Time_min          0
dtype: int64

In [9]:
df

Unnamed: 0,Order_ID,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Courier_Experience_yrs,Delivery_Time_min
0,522,7.93,Windy,Low,Afternoon,Scooter,12,1.0,43
1,738,16.42,Clear,Medium,Evening,Bike,20,2.0,84
2,741,9.52,Foggy,Low,Night,Scooter,28,1.0,59
3,661,7.44,Rainy,Medium,Afternoon,Scooter,5,1.0,37
4,412,19.03,Clear,Low,Morning,Bike,16,5.0,68
...,...,...,...,...,...,...,...,...,...
995,107,8.50,Clear,High,Evening,Car,13,3.0,54
996,271,16.28,Rainy,Low,Morning,Scooter,8,9.0,71
997,861,15.62,Snowy,High,Evening,Scooter,26,2.0,81
998,436,14.17,Clear,Low,Afternoon,Bike,8,0.0,55


In [28]:
df.columns

Index(['Order_ID', 'Distance_km', 'Weather', 'Traffic_Level', 'Time_of_Day',
       'Vehicle_Type', 'Preparation_Time_min', 'Courier_Experience_yrs',
       'Delivery_Time_min'],
      dtype='object')

## droping null value rows

In [10]:
df.shape

(1000, 9)

In [12]:
df['Time_of_Day'].value_counts()

Time_of_Day
Morning      308
Evening      293
Afternoon    284
Night         85
Name: count, dtype: int64

In [13]:
df.dropna(inplace=True)

In [14]:
df.shape

(883, 9)

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 883 entries, 0 to 999
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Order_ID                883 non-null    int64  
 1   Distance_km             883 non-null    float64
 2   Weather                 883 non-null    object 
 3   Traffic_Level           883 non-null    object 
 4   Time_of_Day             883 non-null    object 
 5   Vehicle_Type            883 non-null    object 
 6   Preparation_Time_min    883 non-null    int64  
 7   Courier_Experience_yrs  883 non-null    float64
 8   Delivery_Time_min       883 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 69.0+ KB


## Checking Correlation with 'Preparation_Time_min' with other numerical columns

In [19]:
from scipy.stats import pearsonr

def correlation_with_preparation_time(df):
    """
    Calculates Pearson correlation between 'Preparation_Time_min'
    and all other numerical columns in the given DataFrame.

    Parameters:
        df (pd.DataFrame): Input DataFrame

    Returns:
        pd.DataFrame: Correlation coefficient and p-value for each comparison
    """
    target_col = 'Preparation_Time_min'
    
    # Select only numeric columns and exclude the target
    numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
    if target_col not in numeric_cols:
        raise ValueError(f"'{target_col}' not found in numeric columns.")
    numeric_cols.remove(target_col)

    results = []
    for col in numeric_cols:
        clean_df = df[[target_col, col]].dropna()
        if not clean_df.empty:
            r, p = pearsonr(clean_df[target_col], clean_df[col])
            results.append({
                'Compared With': col,
                'Correlation Coefficient': round(r, 4),
                'P-Value': p
            })
    return pd.DataFrame(results)


In [22]:
df = pd.read_csv('Food_Delivery_Times.csv')
result_df = correlation_with_preparation_time(df)
print(result_df)

            Compared With  Correlation Coefficient       P-Value
0                Order_ID                  -0.0351  2.674649e-01
1             Distance_km                  -0.0090  7.753279e-01
2  Courier_Experience_yrs                  -0.0308  3.374674e-01
3       Delivery_Time_min                   0.3073  2.549879e-23


## Checking Anova with wheather column with other columns in dataset.

In [23]:
from scipy.stats import f_oneway
import pandas as pd

def anova_with_weather(df):
    """
    Perform one-way ANOVA between 'Weather' (categorical) and each numerical column.

    Parameters:
        df (pd.DataFrame): Input DataFrame

    Returns:
        pd.DataFrame: F-statistics and p-values for each numeric column tested against 'Weather'
    """
    category_col = 'Weather'
    numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()

    results = []

    for num_col in numeric_cols:
        clean_df = df[[category_col, num_col]].dropna()
        grouped_data = clean_df.groupby(category_col)[num_col].apply(list)

        if len(grouped_data) >= 2:
            f_stat, p_val = f_oneway(*grouped_data)
            results.append({
                'Numeric Column': num_col,
                'F-Statistic': round(f_stat, 4),
                'P-Value': round(p_val, 6)
            })

    return pd.DataFrame(results)


In [24]:
df = pd.read_csv('Food_Delivery_Times.csv')
anova_results = anova_with_weather(df)
print(anova_results)


           Numeric Column  F-Statistic   P-Value
0                Order_ID       1.1221  0.344598
1             Distance_km       1.6936  0.149263
2    Preparation_Time_min       0.7687  0.545661
3  Courier_Experience_yrs       2.0990  0.079013
4       Delivery_Time_min      10.4565  0.000000


In [25]:
def run_chi_square_tests(df):
    from scipy.stats import chi2_contingency
    cat_cols = ['Weather', 'Traffic_Level', 'Time_of_Day', 'Vehicle_Type']
    results = []

    for i in range(len(cat_cols)):
        for j in range(i + 1, len(cat_cols)):
            col1, col2 = cat_cols[i], cat_cols[j]
            contingency = pd.crosstab(df[col1], df[col2])
            chi2, p, dof, _ = chi2_contingency(contingency)
            results.append({
                'Variable 1': col1,
                'Variable 2': col2,
                'Chi2 Statistic': round(chi2, 4),
                'P-Value': p,
                'Degrees of Freedom': dof
            })
    return pd.DataFrame(results)


In [27]:
df = pd.read_csv('Food_Delivery_Times.csv')


chi_square_df = run_chi_square_tests(df)

print("\nChi-Square:\n", chi_square_df)



Chi-Square:
       Variable 1     Variable 2  Chi2 Statistic   P-Value  Degrees of Freedom
0        Weather  Traffic_Level         12.7397  0.121124                   8
1        Weather    Time_of_Day          7.8375  0.797694                  12
2        Weather   Vehicle_Type         10.9221  0.206155                   8
3  Traffic_Level    Time_of_Day          5.3737  0.496853                   6
4  Traffic_Level   Vehicle_Type          6.0127  0.198199                   4
5    Time_of_Day   Vehicle_Type         12.2677  0.056257                   6


## Delivery Time Predictor

In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error
import joblib

# Load the dataset
df = pd.read_csv('Food_Delivery_Times.csv')

# Define features and target
features = ['Distance_km', 'Weather', 'Traffic_Level', 'Time_of_Day',
            'Vehicle_Type', 'Preparation_Time_min', 'Courier_Experience_yrs']
target = 'Delivery_Time_min'

# Drop rows with missing values in relevant columns
df = df.dropna(subset=features + [target])

X = df[features]
y = df[target]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Column types
categorical_cols = ['Weather', 'Traffic_Level', 'Time_of_Day', 'Vehicle_Type']
numerical_cols = ['Distance_km', 'Preparation_Time_min', 'Courier_Experience_yrs']

# Preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

# Pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train the model
model.fit(X_train, y_train)

# Evaluate
preds = model.predict(X_test)
mae = mean_absolute_error(y_test, preds)
print(f"Mean Absolute Error: {mae:.2f} minutes")

# Save the model
joblib.dump(model, 'delivery_time_predictor.pkl')

Mean Absolute Error: 6.79 minutes


['delivery_time_predictor.pkl']

In [37]:
import joblib
import pandas as pd

# Load the saved model
model = joblib.load('delivery_time_predictor.pkl')

# Sample input (replace these values with your real input)
input_data = pd.DataFrame([{
    'Distance_km': 6.2,
    'Weather': 'Rainy',
    'Traffic_Level': 'High',
    'Time_of_Day': 'Afternoon',
    'Vehicle_Type': 'Scooter',
    'Preparation_Time_min': 10,
    'Courier_Experience_yrs': 1
}])

# Predict delivery time
predicted_time = model.predict(input_data)
print(f"Predicted Delivery Time: {predicted_time[0]:.2f} minutes")


Predicted Delivery Time: 42.46 minutes


In [36]:
import gradio as gr
import pandas as pd
import joblib

# Load the model
model = joblib.load('delivery_time_predictor.pkl')

# Define prediction function
def predict_delivery_time(distance_km, weather, traffic_level, time_of_day,
                          vehicle_type, prep_time_min, courier_experience_yrs):
    input_df = pd.DataFrame([{
        'Distance_km': distance_km,
        'Weather': weather,
        'Traffic_Level': traffic_level,
        'Time_of_Day': time_of_day,
        'Vehicle_Type': vehicle_type,
        'Preparation_Time_min': prep_time_min,
        'Courier_Experience_yrs': courier_experience_yrs
    }])
    
    prediction = model.predict(input_df)[0]
    return f"Estimated Delivery Time: {prediction:.2f} minutes"

# Interface
interface = gr.Interface(
    fn=predict_delivery_time,
    inputs=[
        gr.Number(label="Distance (km)"),
        gr.Dropdown(choices=['Sunny', 'Rainy', 'Windy', 'Cloudy', 'Stormy'], label="Weather"),
        gr.Dropdown(choices=['Low', 'Medium', 'High'], label="Traffic Level"),
        gr.Dropdown(choices=['Morning', 'Afternoon', 'Evening', 'Night'], label="Time of Day"),
        gr.Dropdown(choices=['Bike', 'Scooter', 'Car'], label="Vehicle Type"),
        gr.Number(label="Preparation Time (min)"),
        gr.Number(label="Courier Experience (yrs)")
    ],
    outputs="text",
    title="Food Delivery Time Predictor",
    description="Enter delivery details to predict estimated delivery time in minutes."
)

# Launch the interface
interface.launch()


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


