<a href="https://colab.research.google.com/github/Eamah04/ANLTFinalProject/blob/main/ANALT202FinalProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import files
uploaded = files.upload()

Saving Food_Delivery_Times.csv to Food_Delivery_Times.csv


In [1]:
import pandas as pd

import numpy as np
!pip install scikit-learn==1.3.2


# Load dataset
df = pd.read_csv("Food_Delivery_Times.csv")

# Quick check
print(df.shape)
print(df.head())

# Create classification target
df['Delayed'] = (df['Delivery_Time_min'] > 40).astype(int)

# Features
num_cols = ["Distance_km", "Preparation_Time_min", "Courier_Experience_yrs"]
cat_cols = ["Weather", "Traffic_Level", "Time_of_Day", "Vehicle_Type"]

X = df.drop(["Delivery_Time_min", "Delayed", "Order_ID"], axis=1)
y_reg = df["Delivery_Time_min"]
y_clf = df["Delayed"]

from sklearn.model_selection import train_test_split
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y_reg, test_size=0.2, random_state=42)
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X, y_clf, test_size=0.2, random_state=42)

# Preprocessing
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

numeric_transformer = Pipeline([
    ("imputer", SimpleImputer(strategy="mean")),
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ("num", numeric_transformer, num_cols),
    ("cat", categorical_transformer, cat_cols)
])

# Models
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Logistic Regression (fast baseline)
logreg_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(max_iter=200))
])
logreg_pipeline.fit(X_train_clf, y_train_clf)
y_pred_log = logreg_pipeline.predict(X_test_clf)
print("Logistic Regression Accuracy:", accuracy_score(y_test_clf, y_pred_log))

# Random Forest (reduced trees for speed)
rf_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(n_estimators=100, random_state=42))
])
rf_pipeline.fit(X_train_clf, y_train_clf)
y_pred_rf = rf_pipeline.predict(X_test_clf)
print("Random Forest Accuracy:", accuracy_score(y_test_clf, y_pred_rf))

# Linear Regression (fast baseline)
linreg_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])
linreg_pipeline.fit(X_train_reg, y_train_reg)
y_pred_lin = linreg_pipeline.predict(X_test_reg)
print("Linear Regression RMSE:", np.sqrt(mean_squared_error(y_test_reg, y_pred_lin)))

# Gradient Boosting (reduced trees for speed)
gbr_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", GradientBoostingRegressor(n_estimators=100, random_state=42))
])
gbr_pipeline.fit(X_train_reg, y_train_reg)
y_pred_gbr = gbr_pipeline.predict(X_test_reg)
print("Gradient Boosting RMSE:", np.sqrt(mean_squared_error(y_test_reg, y_pred_gbr)))

(1000, 9)
   Order_ID  Distance_km Weather Traffic_Level Time_of_Day Vehicle_Type  \
0       522         7.93   Windy           Low   Afternoon      Scooter   
1       738        16.42   Clear        Medium     Evening         Bike   
2       741         9.52   Foggy           Low       Night      Scooter   
3       661         7.44   Rainy        Medium   Afternoon      Scooter   
4       412        19.03   Clear           Low     Morning         Bike   

   Preparation_Time_min  Courier_Experience_yrs  Delivery_Time_min  
0                    12                     1.0                 43  
1                    20                     2.0                 84  
2                    28                     1.0                 59  
3                     5                     1.0                 37  
4                    16                     5.0                 68  
Logistic Regression Accuracy: 0.935
Random Forest Accuracy: 0.895
Linear Regression RMSE: 8.949455247557282
Gradient Boosting

In [2]:
!pip install pytest



In [3]:
#Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
from sklearn.pipeline import Pipeline
from scipy.stats import randint, uniform
import joblib
import os

def tune_random_forest_classifier(preprocessor, X_train, y_train):
    rf = RandomForestClassifier(random_state=42)
    pipe = Pipeline(steps=[("preprocessor", preprocessor), ("clf", rf)])

    param_grid = {
        "clf__n_estimators": [100, 200],
        "clf__max_depth": [None, 10, 20],
        "clf__min_samples_split": [2, 5]
    }

    gs = GridSearchCV(pipe, param_grid, cv=3, scoring="f1", n_jobs=-1)
    gs.fit(X_train, y_train)
    print("Best RF params:", gs.best_params_)
    print("Best RF F1:", gs.best_score_)
    joblib.dump(gs.best_estimator_, "models/tuned_rf.pkl")
    return gs

def tune_gradient_boosting_regressor(preprocessor, X_train, y_train):
    gbr = GradientBoostingRegressor(random_state=42)
    pipe = Pipeline(steps=[("preprocessor", preprocessor), ("reg", gbr)])

    param_dist = {
        "reg__n_estimators": randint(100, 300),
        "reg__learning_rate": uniform(0.01, 0.1),
        "reg__max_depth": randint(2, 5),
        "reg__subsample": uniform(0.7, 0.2)
    }

    rs = RandomizedSearchCV(pipe, param_distributions=param_dist, n_iter=10,
                            cv=3, scoring="neg_root_mean_squared_error",
                            random_state=42, n_jobs=-1)
    rs.fit(X_train, y_train)
    print("Best GBR params:", rs.best_params_)
    print("Best GBR RMSE:", -rs.best_score_)
    joblib.dump(rs.best_estimator_, "models/tuned_gbr.pkl")
    return rs

# Create 'models' directory if it doesn't exist
if not os.path.exists('models'):
    os.makedirs('models')

# Call tuning functions to train and save models
tuned_rf_grid_search = tune_random_forest_classifier(preprocessor, X_train_clf, y_train_clf)
tuned_gbr_random_search = tune_gradient_boosting_regressor(preprocessor, X_train_reg, y_train_reg)

Best RF params: {'clf__max_depth': None, 'clf__min_samples_split': 2, 'clf__n_estimators': 200}
Best RF F1: 0.9500922185248992
Best GBR params: {'reg__learning_rate': 0.07508884729488528, 'reg__max_depth': 2, 'reg__n_estimators': 101, 'reg__subsample': 0.8443997544533649}
Best GBR RMSE: 11.4312817023338


In [4]:
#Pipelines
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor

def make_classification_pipelines(preprocessor):
    logreg_pipeline = Pipeline(steps=[
        ("preprocessor", preprocessor),
        ("clf", LogisticRegression(max_iter=200))
    ])
    rf_pipeline = Pipeline(steps=[
        ("preprocessor", preprocessor),
        ("clf", RandomForestClassifier(n_estimators=100, random_state=42))
    ])
    return logreg_pipeline, rf_pipeline

def make_regression_pipelines(preprocessor):
    linreg_pipeline = Pipeline(steps=[
        ("preprocessor", preprocessor),
        ("reg", LinearRegression())
    ])
    gbr_pipeline = Pipeline(steps=[
        ("preprocessor", preprocessor),
        ("reg", GradientBoostingRegressor(random_state=42))
    ])
    return linreg_pipeline, gbr_pipeline


In [5]:
#Deep Learning
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

def build_mlp_reg(input_dim):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(32, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(1, activation="linear")
    ])
    model.compile(optimizer="adam", loss="mse", metrics=[tf.keras.metrics.RootMeanSquaredError()])
    return model

def build_mlp_clf(input_dim):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.3),
        layers.Dense(32, activation="relu"),
        layers.Dropout(0.3),
        layers.Dense(1, activation="sigmoid")
    ])
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model

def train_dl_models(X_train_reg, y_train_reg, X_test_reg, y_test_reg,
                    X_train_clf, y_train_clf, X_test_clf, y_test_clf, input_dim):
    reg_model = build_mlp_reg(input_dim)
    es = callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
    reg_model.fit(X_train_reg, y_train_reg, validation_split=0.2, epochs=10,
                  batch_size=16, callbacks=[es], verbose=1)
    print("DL Regression RMSE:", reg_model.evaluate(X_test_reg, y_test_reg, verbose=0)[1])

    clf_model = build_mlp_clf(input_dim)
    es_clf = callbacks.EarlyStopping(monitor="val_accuracy", patience=5, restore_best_weights=True)
    clf_model.fit(X_train_clf, y_train_clf, validation_split=0.2, epochs=10,
                  batch_size=16, callbacks=[es_clf], verbose=1)
    print("DL Classification Accuracy:", clf_model.evaluate(X_test_clf, y_test_clf, verbose=0)[1])


In [6]:
#Created a test folder using %%writefile
%%writefile test_suite.py
import pytest
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression, LinearRegression
from tensorflow.keras import Sequential, layers

def test_classification_pipeline():
    preprocessor = "passthrough"
    pipe = Pipeline([("preprocessor", preprocessor), ("clf", LogisticRegression())])
    X = pd.DataFrame([
    {"Distance_km": 5, "Preparation_Time_min": 10, "Courier_Experience_yrs": 2},
    {"Distance_km": 3, "Preparation_Time_min": 8, "Courier_Experience_yrs": 1}
])

    y = [1, 0]
    pipe.fit(X, y)
    assert pipe.predict(X).shape[0] == 2

def test_regression_pipeline():
    preprocessor = "passthrough"
    pipe = Pipeline([("preprocessor", preprocessor), ("reg", LinearRegression())])
    X = pd.DataFrame([
    {"Distance_km": 5, "Preparation_Time_min": 10, "Courier_Experience_yrs": 2},
])

    y = [30.0]
    pipe.fit(X, y)
    assert pipe.predict(X).shape[0] == 1

def test_dl_regression():
    model = Sequential([layers.Input(shape=(3,)), layers.Dense(1)])
    X = np.random.rand(5, 3)
    y = np.random.rand(5)
    model.compile(optimizer="adam", loss="mse")
    model.fit(X, y, epochs=1, verbose=0)
    preds = model.predict(X)
    assert preds.shape[0] == 5

def test_dl_classification():
    model = Sequential([layers.Input(shape=(3,)), layers.Dense(1, activation="sigmoid")])
    X = np.random.rand(5, 3)
    y = np.random.randint(0, 2, size=5)
    model.compile(optimizer="adam", loss="binary_crossentropy")
    model.fit(X, y, epochs=1, verbose=0)
    preds = model.predict(X)
    assert preds.shape[0] == 5


Writing test_suite.py


In [7]:
#This part was added to confirm the test status
!pytest test_suite.py -v


platform linux -- Python 3.12.12, pytest-8.4.2, pluggy-1.6.0 -- /usr/bin/python3
cachedir: .pytest_cache
rootdir: /content
plugins: anyio-4.12.0, langsmith-0.4.56, typeguard-4.4.4
collected 4 items                                                              [0m

test_suite.py::test_classification_pipeline [32mPASSED[0m[32m                       [ 25%][0m
test_suite.py::test_regression_pipeline [32mPASSED[0m[32m                           [ 50%][0m
test_suite.py::test_dl_regression [32mPASSED[0m[32m                                 [ 75%][0m
test_suite.py::test_dl_classification [32mPASSED[0m[32m                             [100%][0m



Flask application

In [8]:
from google.colab import files
uploaded = files.upload()


Saving tuned_gbr.pkl to tuned_gbr.pkl
Saving tuned_rf.pkl to tuned_rf.pkl


In [14]:
!pip install pyngrok


Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.5.0


In [15]:
from pyngrok import ngrok, conf
conf.get_default().auth_token = "36oSS4RRyxNq03tDbt35o007wfA_7YDKwGDhrWQZHvVhRTX78"


In [16]:
# 1. Install dependencies
!pip install flask pyngrok joblib pandas scikit-learn

# 2. Import libraries
from flask import Flask, render_template_string, request
import joblib
import pandas as pd
from pyngrok import ngrok

# 3. Load trained models
clf_model = joblib.load("tuned_rf.pkl")
reg_model = joblib.load("tuned_gbr.pkl")

# 4. Define Flask app
app = Flask(__name__)

@app.route("/", methods=["GET", "POST"])
def index():
    prediction = None
    delay_status = None

    if request.method == "POST":
        input_data = {
            "Distance_km": float(request.form["distance"]),
            "Preparation_Time_min": float(request.form["prep_time"]),
            "Courier_Experience_yrs": float(request.form["experience"]),
            "Weather": request.form["weather"],
            "Traffic_Level": request.form["traffic"],
            "Time_of_Day": request.form["time_of_day"],
            "Vehicle_Type": request.form["vehicle"]
        }

        df = pd.DataFrame([input_data])
        predicted_time = reg_model.predict(df)[0]
        delayed = clf_model.predict(df)[0]

        prediction = round(predicted_time, 2)
        delay_status = "Delayed" if delayed == 1 else "On Time"

    # Simple inline HTML form (instead of templates/index.html)
    html = """
    <style>
        body {
            font-family: Arial, sans-serif;
            background: #f4f6f9;
            display: flex;
            justify-content: center;
            align-items: flex-start;
            padding-top: 40px;
            min-height: 100vh;
            margin: 0;
        }

        .container {
            background: #fff;
            padding: 50px 40px 30px 40px;
            border-radius: 10px;
            box-shadow: 0 4px 12px rgba(0,0,0,0.1);
            width: 500px;
            text-align: center;
        }

        form {
            display: flex;
            flex-direction: column;
            gap: 15px;
        }

        label {
            text-align: left;
            font-weight: bold;
            margin-bottom: 5px;
            color: #444;
        }

        input[type="text"] {
            padding: 10px;
            border: 1px solid #ccc;
            border-radius: 6px;
            font-size: 12px;
            width: 100%;
        }

        input[type="submit"] {
            background: #007bff;
            color: #fff;
            border: none;
            padding: 12px;
            border-radius: 6px;
            font-size: 16px;
            cursor: pointer;
            transition: background 0.3s ease;
        }

        input[type="submit"]:hover {
            background: #0056b3;
        }

       h1 {
        font-size: 22px;
        font-weight: bold;
        margin-bottom: 0 0 20px 0;
        padding-top: 10px;
        color: #222;
    }

    h2 {
        font-size: 18px;
        font-weight: normal;
        margin-top: 20px;
        color: #333;
        word-wrap: break-word;
        }
    </style>

    <div class="container">
        <h1>TransLogix Freight Services Prediction System</h1>
        <form method="POST">
            <label>Distance (km):</label>
            <input type="text" name="distance">

            <label>Preparation Time (min):</label>
            <input type="text" name="prep_time">

            <label>Courier Experience (yrs):</label>
            <input type="text" name="experience">

            <label>Weather:</label>
            <input type="text" name="weather">

            <label>Traffic Level:</label>
            <input type="text" name="traffic">

            <label>Time of Day:</label>
            <input type="text" name="time_of_day">

            <label>Vehicle Type:</label>
            <input type="text" name="vehicle">

            <input type="submit" value="Predict">
        </form>

        {% if prediction %}
            <h2>Predicted Delivery Time: {{ prediction }} minutes</h2>
            <h2>Status: {{ delay_status }}</h2>
        {% endif %}
    </div>
    """

    return render_template_string(html, prediction=prediction, delay_status=delay_status)

# 5. Expose Flask with ngrok
public_url = ngrok.connect(5000)
print("Public URL:", public_url)

app.run(port=5000)

Public URL: NgrokTunnel: "https://unfiltered-karly-uncaptivating.ngrok-free.dev" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [14/Dec/2025 06:23:38] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [14/Dec/2025 06:23:38] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [14/Dec/2025 06:24:36] "[33mGET /sourceMap/chrome/css/download_scan_popup.css.map HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [14/Dec/2025 06:24:36] "[33mGET /sourceMap/chrome/css/interactive_balloon.css.map HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [14/Dec/2025 06:24:36] "[33mGET /sourceMap/chrome/css/download_scan_popup.css.map HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [14/Dec/2025 06:24:36] "[33mGET /sourceMap/chrome/css/download_scan_popup.css.map HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [14/Dec/2025 06:24:38] "POST / HTTP/1.1" 200 -
