In [23]:
!sudo apt-get install python3.13

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libpython3.13-stdlib
Suggested packages:
  python3.13-venv
The following NEW packages will be installed:
  libpython3.13-stdlib python3.13
0 upgraded, 2 newly installed, 0 to remove and 41 not upgraded.
Need to get 5,847 kB of archives.
After this operation, 22.3 MB of additional disk space will be used.
Get:1 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy/main amd64 libpython3.13-stdlib amd64 3.13.8-1+jammy1 [2,989 kB]
Get:2 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy/main amd64 python3.13 amd64 3.13.8-1+jammy1 [2,858 kB]
Fetched 5,847 kB in 15s (402 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 2.)
debconf: falling back to frontend: Read

In [24]:
import pandas as pd
import numpy as np

In [25]:
np.random.seed(42)

In [26]:
n = 5000


In [27]:
origins = ["Delhi", "Mumbai", "Chennai", "Bangalore", "Kolkata"]
destinations = ["New York", "London", "Dubai", "Singapore", "Frankfurt"]

In [28]:
data = {
    "shipment_id": np.arange(1, n+1),
    "origin": np.random.choice(origins, n),
    "destination": np.random.choice(destinations, n),
    "distance_km": np.random.randint(500, 9000, n),
    "package_weight_kg": np.round(np.random.uniform(0.1, 25.0, n), 2),
    "weather_severity": np.random.randint(0, 5, n),  # 0 = clear, 4 = storm
    "traffic_level": np.random.randint(1, 5, n),  # 1 = low, 4 = heavy
    "dispatch_hour": np.random.randint(0, 24, n),
}

In [29]:
df = pd.DataFrame(data)

In [30]:
# Logic for delay
df["delay_hours"] = (
    0.02 * df["distance_km"] +
    1.5 * df["weather_severity"] +
    2.0 * df["traffic_level"] +
    np.random.normal(0, 5, n)
)

In [31]:
df["is_delayed"] = (df["delay_hours"] > df["delay_hours"].median()).astype(int)

In [32]:
df.to_csv("synthetic_logistics_data.csv", index=False)

In [33]:
df.head()

Unnamed: 0,shipment_id,origin,destination,distance_km,package_weight_kg,weather_severity,traffic_level,dispatch_hour,delay_hours,is_delayed
0,1,Bangalore,London,8881,24.68,1,4,1,192.852895,1
1,2,Kolkata,Frankfurt,7354,14.04,4,3,9,153.586521,1
2,3,Chennai,Singapore,1255,17.42,3,2,5,36.451264,0
3,4,Kolkata,Singapore,5431,3.05,1,1,23,115.895139,1
4,5,Kolkata,Frankfurt,6224,6.34,2,2,18,136.409259,1


In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [35]:
df = pd.read_csv("/content/synthetic_logistics_data.csv")

In [37]:
X = df.drop(["is_delayed", "delay_hours", "shipment_id"], axis=1)
y = df["is_delayed"]


In [38]:
categorical_cols = ["origin", "destination"]
numeric_cols = [col for col in X.columns if col not in categorical_cols]

In [39]:
preprocess = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown='ignore'), categorical_cols),
        ("num", "passthrough", numeric_cols),
    ]
)


In [40]:
model = RandomForestClassifier(n_estimators=200, random_state=42)


In [41]:
pipeline = Pipeline(steps=[('preprocess', preprocess),
                          ('model', model)])


In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=42)


In [43]:
pipeline.fit(X_train, y_train)

In [44]:
y_pred = pipeline.predict(X_test)

In [45]:

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.967
              precision    recall  f1-score   support

           0       0.96      0.97      0.97       490
           1       0.97      0.96      0.97       510

    accuracy                           0.97      1000
   macro avg       0.97      0.97      0.97      1000
weighted avg       0.97      0.97      0.97      1000



In [46]:
import joblib
joblib.dump(pipeline, "delay_predictor.pkl")

['delay_predictor.pkl']