In [3]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Helper functions
def random_coord():
    return f"{round(random.uniform(-90,90),4)},{round(random.uniform(-180,180),4)}"

def random_dim():
    return f"{random.randint(15,50)}x{random.randint(10,40)}x{random.randint(8,30)}"

def random_date(start, end):
    return start + timedelta(days=random.randint(0, (end-start).days))

# Constants
num_rows = 10000
base_date = datetime(2025, 5, 1)

origins = ["New York", "Chicago", "Miami", "Boston", "Dallas", "Atlanta", "San Diego", "Philadelphia", "Houston", "Detroit"]
destinations = ["Los Angeles", "Houston", "Seattle", "San Francisco", "Denver", "Miami", "Las Vegas", "Phoenix", "Austin", "Orlando"]
carriers = ["FedEx", "UPS", "DHL"]
shipment_types = ["Standard", "Express", "Fragile"]
statuses = ["Delivered", "Delayed", "Disrupted"]
delay_reasons = ["", "Weather Delay", "Traffic Jam", "Strike", "Vehicle Breakdown", "Customs Issue"]
weather_types = ["Clear", "Rain", "Storm", "Cloudy", "Sunny"]
traffic_levels = ["Low", "Moderate", "High", "Severe"]
events = ["None", "Holiday", "Strike"]
maintenance_types = ["Oil Change", "Brake Check", "Inspection", "Battery Replacement", "Filter Change"]
image_labels = ["OK", "Damaged"]

rows = []

# Data generation
for i in range(num_rows):
    sid = 1001 + i
    oid = 5001 + i
    custid = f"C{random.randint(100,999)}"
    origin = random.choice(origins)
    dest = random.choice(destinations)
    carrier = random.choice(carriers)
    routeid = f"R{random.randint(1,20)}"
    planned_ship = random_date(base_date, base_date + timedelta(days=30))
    actual_ship = planned_ship + timedelta(days=random.choice([0,0,0,1]))
    planned_del = planned_ship + timedelta(days=random.randint(1,7))
    actual_del = planned_del + timedelta(days=random.choice([0,0,0,0,1,2]))
    status = random.choices(statuses, weights=[70,20,10])[0]
    stype = random.choice(shipment_types)
    weight = round(random.uniform(50,350),1)
    vol = round(weight/100 + random.uniform(0.5,3),1)
    dims = random_dim()
    delayflag = 1 if status != "Delivered" else 0
    disruption = random.choice(delay_reasons) if status == "Disrupted" else ""
    weather = random.choice(weather_types)
    traffic = random.choice(traffic_levels)
    event = random.choice(events)
    vid = f"V{random.randint(1,30):03d}"
    engine_hours = random.randint(1000,8000)
    mileage = random.randint(20000,150000)
    fuel = round(random.uniform(4.5,12.0),1)
    gps = f"{random_coord()};{random_coord()}"
    temp = random.randint(15,30)
    vibr = round(random.uniform(0.01,0.1),2)
    maint_date = planned_ship - timedelta(days=random.randint(1,30))
    maint_type = random.choice(maintenance_types)
    breakdown = random.choices([0,1],weights=[97,3])[0]
    pkg_img = f"package_{sid}_before.jpg"
    img_label = random.choice(image_labels)
    incident = random.choices([0,1],weights=[95,5])[0]
    anomaly = random.choices([0,1],weights=[96,4])[0]

    row = [
        sid, oid, custid, origin, dest, carrier, routeid,
        planned_ship.date(), actual_ship.date(), planned_del.date(), actual_del.date(), status, stype, weight, vol, dims, delayflag,
        disruption, weather, traffic, event, vid, engine_hours, mileage, fuel, gps, temp, vibr, maint_date.date(), maint_type, breakdown,
        pkg_img, img_label, incident, anomaly
    ]
    rows.append(row)

# Define column names
columns = [
    "ShipmentID","OrderID","CustomerID","Origin","Destination","Carrier","RouteID","PlannedShipDate","ActualShipDate",
    "PlannedDeliveryDate","ActualDeliveryDate","Status","ShipmentType","Weight","Volume","Dimensions","DelayFlag",
    "DisruptionReason","Weather","Traffic","PublicEvent","VehicleID","EngineHours","Mileage","FuelConsumption","GPS",
    "Temperature","Vibration","MaintenanceDate","MaintenanceType","BreakdownFlag","PackageImage","ImageLabel","IncidentFlag","AnomalyFlag"
]

# Save to CSV
df = pd.DataFrame(rows, columns=columns)
df.to_csv("shipment_logistics_dataset.csv", index=False)
print("✅ Dataset created and saved as 'shipment_logistics_dataset.csv'")


✅ Dataset created and saved as 'shipment_logistics_dataset.csv'


In [5]:
df

Unnamed: 0,ShipmentID,OrderID,CustomerID,Origin,Destination,Carrier,RouteID,PlannedShipDate,ActualShipDate,PlannedDeliveryDate,...,GPS,Temperature,Vibration,MaintenanceDate,MaintenanceType,BreakdownFlag,PackageImage,ImageLabel,IncidentFlag,AnomalyFlag
0,1001,5001,C525,Atlanta,Phoenix,UPS,R11,2025-05-20,2025-05-20,2025-05-26,...,"55.0719,-132.8542;-66.4432,90.3484",21,0.07,2025-05-06,Battery Replacement,0,package_1001_before.jpg,OK,0,0
1,1002,5002,C805,Miami,Los Angeles,FedEx,R13,2025-05-11,2025-05-12,2025-05-18,...,"-72.4671,-92.4337;-13.9777,-170.9333",26,0.05,2025-04-24,Oil Change,0,package_1002_before.jpg,OK,0,0
2,1003,5003,C926,San Diego,Los Angeles,FedEx,R18,2025-05-22,2025-05-23,2025-05-28,...,"-7.999,-87.9321;-4.5549,33.0992",17,0.10,2025-05-07,Filter Change,0,package_1003_before.jpg,Damaged,0,0
3,1004,5004,C486,Houston,Phoenix,DHL,R7,2025-05-21,2025-05-21,2025-05-27,...,"6.7314,-133.5211;-14.1871,122.965",18,0.09,2025-05-01,Battery Replacement,0,package_1004_before.jpg,Damaged,0,0
4,1005,5005,C923,Boston,Denver,FedEx,R17,2025-05-17,2025-05-17,2025-05-23,...,"-18.5173,16.6943;72.8763,-124.9511",25,0.04,2025-04-26,Battery Replacement,0,package_1005_before.jpg,OK,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,10996,14996,C467,Chicago,Houston,UPS,R16,2025-05-14,2025-05-15,2025-05-15,...,"6.631,-36.3833;-47.1611,172.091",29,0.05,2025-05-05,Brake Check,0,package_10996_before.jpg,OK,0,0
9996,10997,14997,C638,Houston,Los Angeles,DHL,R11,2025-05-11,2025-05-11,2025-05-16,...,"7.1184,-92.9857;13.3409,131.0448",17,0.04,2025-04-30,Battery Replacement,0,package_10997_before.jpg,OK,0,0
9997,10998,14998,C376,Houston,Denver,DHL,R9,2025-05-06,2025-05-07,2025-05-08,...,"77.3513,-64.8927;-63.182,-14.3878",28,0.09,2025-04-11,Oil Change,0,package_10998_before.jpg,Damaged,0,0
9998,10999,14999,C782,Detroit,Phoenix,FedEx,R14,2025-05-30,2025-05-30,2025-06-05,...,"-26.0669,-115.4695;51.2226,156.5128",29,0.03,2025-05-06,Oil Change,0,package_10999_before.jpg,OK,0,0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 35 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ShipmentID           10000 non-null  int64  
 1   OrderID              10000 non-null  int64  
 2   CustomerID           10000 non-null  object 
 3   Origin               10000 non-null  object 
 4   Destination          10000 non-null  object 
 5   Carrier              10000 non-null  object 
 6   RouteID              10000 non-null  object 
 7   PlannedShipDate      10000 non-null  object 
 8   ActualShipDate       10000 non-null  object 
 9   PlannedDeliveryDate  10000 non-null  object 
 10  ActualDeliveryDate   10000 non-null  object 
 11  Status               10000 non-null  object 
 12  ShipmentType         10000 non-null  object 
 13  Weight               10000 non-null  float64
 14  Volume               10000 non-null  float64
 15  Dimensions           10000 non-null  

In [9]:
df.isnull().sum()

ShipmentID             0
OrderID                0
CustomerID             0
Origin                 0
Destination            0
Carrier                0
RouteID                0
PlannedShipDate        0
ActualShipDate         0
PlannedDeliveryDate    0
ActualDeliveryDate     0
Status                 0
ShipmentType           0
Weight                 0
Volume                 0
Dimensions             0
DelayFlag              0
DisruptionReason       0
Weather                0
Traffic                0
PublicEvent            0
VehicleID              0
EngineHours            0
Mileage                0
FuelConsumption        0
GPS                    0
Temperature            0
Vibration              0
MaintenanceDate        0
MaintenanceType        0
BreakdownFlag          0
PackageImage           0
ImageLabel             0
IncidentFlag           0
AnomalyFlag            0
dtype: int64

In [3]:
import pandas as pd

df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate", "ActualDeliveryDate"])
df["DeliveryTimeDays"] = (df["ActualDeliveryDate"] - df["ActualShipDate"]).dt.days

shipment_df = df[[
    "Origin", "Destination", "Carrier", "ShipmentType",
    "Weight", "Volume", "Dimensions",
    "PlannedShipDate", "ActualShipDate", "PlannedDeliveryDate", "ActualDeliveryDate",
    "Status", "DeliveryTimeDays"
]].dropna(subset=["DeliveryTimeDays"])

shipment_df.to_csv("task1_to_3_dataset.csv", index=False)
print("✅ Saved: task1_to_3_dataset.csv")


✅ Saved: task1_to_3_dataset.csv


In [4]:
# TASK 1 — Data Preprocessing
# Author: Opemipo Oreoluwa
# Purpose: Prepare features for shipment delivery prediction.

import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load task-specific dataset
df = pd.read_csv("task1_to_3_dataset.csv")

# Define input and target
X = df[["Origin", "Destination", "Carrier", "ShipmentType", "Weight", "Volume", "Dimensions"]]
y = df["DeliveryTimeDays"]

# Define column types
categorical = ["Origin", "Destination", "Carrier", "ShipmentType"]
numerical = ["Weight", "Volume"]

# Build preprocessing pipeline
preprocessor = ColumnTransformer([
    ("num", Pipeline([
        ("imputer", SimpleImputer(strategy="mean")),
        ("scaler", StandardScaler())
    ]), numerical),
    
    ("cat", Pipeline([
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("encoder", OneHotEncoder(handle_unknown="ignore"))
    ]), categorical)
])

# Fit & transform the data
X_processed = preprocessor.fit_transform(X)
print("✅ Task 1 complete: Preprocessed features ready for modeling.")


✅ Task 1 complete: Preprocessed features ready for modeling.


In [5]:
# TASK 2 — Train Linear Regression
# Author: Opemipo Oreoluwa

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("✅ Task 2 complete — MAE:", mean_absolute_error(y_test, y_pred), "| R²:", r2_score(y_test, y_pred))


✅ Task 2 complete — MAE: 1.8587480270235202 | R²: -0.004157442984270121


In [6]:
# TASK 3 — Neural Network (TensorFlow)
# Author: Opemipo Oreoluwa

import tensorflow as tf
from sklearn.preprocessing import StandardScaler

# Use numeric features only
X = df[["Weight", "Volume"]]
y = df["DeliveryTimeDays"]

# Scale and split
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2)

# Build model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation="relu", input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(16, activation="relu"),
    tf.keras.layers.Dense(1)
])
model.compile(optimizer="adam", loss="mse", metrics=["mae"])
model.fit(X_train, y_train, epochs=10, batch_size=16)
print("✅ Task 3 complete — Neural network trained.")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 10.9786 - mae: 2.6902
Epoch 2/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 4.8367 - mae: 1.8508
Epoch 3/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 4.8930 - mae: 1.8643
Epoch 4/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 4.8564 - mae: 1.8728
Epoch 5/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 4.8700 - mae: 1.8645
Epoch 6/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - loss: 4.7732 - mae: 1.8411
Epoch 7/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 4.7664 - mae: 1.8442
Epoch 8/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 4.8190 - mae: 1.8636
Epoch 9/10
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5m

In [7]:
# # TASK 4 — Disruption Prediction with PyTorch
# # Author: Opemipo Oreoluwa

# import pandas as pd
# import torch
# import torch.nn as nn
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder

# # Load dataset
# df1 = pd.read_csv("task4_disruption_dataset.csv")

# # Encode categorical features
# for col in ["Weather", "Traffic", "PublicEvent"]:
#     df1[col] = LabelEncoder().fit_transform(df1[col])

# X = df1[["Weight", "Volume", "Weather", "Traffic", "PublicEvent"]].values
# y = df1["DisruptionFlag"].values.reshape(-1, 1)

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# X_train = torch.tensor(X_train, dtype=torch.float32)
# y_train = torch.tensor(y_train, dtype=torch.float32)

# # Build PyTorch model
# class DisruptionModel(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.net = nn.Sequential(
#             nn.Linear(5, 16),
#             nn.ReLU(),
#             nn.Linear(16, 1),
#             nn.Sigmoid()
#         )
#     def forward(self, x):
#         return self.net(x)

# model = DisruptionModel()
# loss_fn = nn.BCELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# # Train model
# for epoch in range(20):
#     optimizer.zero_grad()
#     output = model(X_train)
#     loss = loss_fn(output, y_train)
#     loss.backward()
#     optimizer.step()
#     if epoch % 5 == 0:
#         print(f"Epoch {epoch} — Loss: {loss.item():.4f}")

# print("✅ Task 4 complete — PyTorch model trained.")


In [8]:
# import pandas as pd

# df1 = pd.read_csv("shipment_logistics_dataset.csv")
# df1["DisruptionFlag"] = (df1["Status"] == "Disrupted").astype(int)

# disruption_df = df[[
#     "Weight", "Volume", "Weather", "Traffic", "PublicEvent",
#     "DisruptionReason", "DelayFlag", "DisruptionFlag"
# ]].dropna()

# disruption_df.to_csv("task4_disruption_dataset.csv", index=False)
# print("✅ Saved: task4_disruption_dataset.csv")


In [9]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Generate dates
dates = pd.date_range(start="2025-05-01", periods=60).date
routes = [f"R{i}" for i in range(1, 21)]
weathers = ["Clear", "Rain", "Storm", "Cloudy", "Sunny"]

# Create weather dataset (date x route)
data = []
for date in dates:
    for route in routes:
        data.append({
            "date": date,
            "route": route,
            "weather": random.choice(weathers)
        })

weather_df = pd.DataFrame(data)
weather_df.to_csv("weather_data.csv", index=False)
print("✅ External weather_data.csv created.")


✅ External weather_data.csv created.


In [10]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load shipment + weather data
df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate"])
weather = pd.read_csv("weather_data.csv")

# Convert to date only
df["ActualShipDate"] = df["ActualShipDate"].dt.date

# Add disruption flag
df["DisruptionFlag"] = (df["Status"] == "Disrupted").astype(int)
df["Route"] = df["RouteID"]

# Merge on route + date
merged_df = pd.merge(df, weather, left_on=["ActualShipDate", "Route"], right_on=["date", "route"])

# Encode weather
merged_df["weather_encoded"] = LabelEncoder().fit_transform(merged_df["weather"])

# Save merged dataset for reuse
merged_df.to_csv("task4_merged_disruption_dataset.csv", index=False)
print("✅ Merged dataset ready: task4_merged_disruption_dataset.csv")


✅ Merged dataset ready: task4_merged_disruption_dataset.csv


In [11]:
# # Final PyTorch Classifier Code — Aligned with Sample Task
# import pandas as pd
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from sklearn.model_selection import train_test_split

# # Load merged dataset
# df = pd.read_csv("task4_merged_disruption_dataset.csv")

# # Feature Selection: route (encoded), distance proxy (Volume), weather_encoded
# df["route_encoded"] = LabelEncoder().fit_transform(df["Route"])
# X = df[["route_encoded", "Volume", "weather_encoded"]].values
# y = df["DisruptionFlag"].values.astype(float)

# # Split + convert to tensors
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# X_train = torch.tensor(X_train, dtype=torch.float32)
# X_test  = torch.tensor(X_test, dtype=torch.float32)
# y_train = torch.tensor(y_train.reshape(-1,1), dtype=torch.float32)
# y_test  = torch.tensor(y_test.reshape(-1,1), dtype=torch.float32)

# # Define model
# class DisruptionPredictor(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.fc1 = nn.Linear(3, 64)
#         self.fc2 = nn.Linear(64, 32)
#         self.fc3 = nn.Linear(32, 1)
    
#     def forward(self, x):
#         x = torch.relu(self.fc1(x))
#         x = torch.relu(self.fc2(x))
#         return torch.sigmoid(self.fc3(x))

# model = DisruptionPredictor()
# criterion = nn.BCELoss()
# optimizer = optim.Adam(model.parameters(), lr=0.001)

# # Train model
# for epoch in range(1, 101):
#     model.train()
#     optimizer.zero_grad()
#     outputs = model(X_train)
#     loss = criterion(outputs, y_train)
#     loss.backward()
#     optimizer.step()
#     if epoch % 10 == 0:
#         print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# # Evaluate
# model.eval()
# with torch.no_grad():
#     outputs = model(X_test)
#     predicted = (outputs > 0.5).float()
#     accuracy = (predicted == y_test).float().mean()
#     print(f"✅ Task 4 Complete — Accuracy: {accuracy.item():.4f}")


In [12]:
# # TASK 4 — Predicting Supply Chain Disruptions with External Weather Data using PyTorch
# # Author: Opemipo Oreoluwa

# import pandas as pd
# import torch
# import torch.nn as nn
# import torch.optim as optim
# import random
# from datetime import datetime, timedelta
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder

# # -------------------------------
# # STEP 1: Generate External Weather Dataset
# # -------------------------------
# routes = [f"R{i}" for i in range(1, 21)]
# dates = pd.date_range(start="2025-05-01", periods=60).date
# weathers = ["Clear", "Rain", "Storm", "Cloudy", "Sunny"]

# weather_data = []
# for date in dates:
#     for route in routes:
#         weather_data.append({
#             "date": date,
#             "route": route,
#             "weather": random.choice(weathers)
#         })

# weather_df = pd.DataFrame(weather_data)
# weather_df.to_csv("weather_data.csv", index=False)

# # -------------------------------
# # STEP 2: Load & Prepare Shipment Dataset
# # -------------------------------
# df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate"])
# df["ActualShipDate"] = df["ActualShipDate"].dt.date
# df["Route"] = df["RouteID"]
# df["DisruptionFlag"] = (df["Status"] == "Disrupted").astype(int)

# # -------------------------------
# # STEP 3: Merge Shipment + Weather Data
# # -------------------------------
# weather = pd.read_csv("weather_data.csv")
# merged_df = pd.merge(df, weather, left_on=["ActualShipDate", "Route"], right_on=["date", "route"])

# # -------------------------------
# # STEP 4: Encode & Prepare for PyTorch
# # -------------------------------
# merged_df["weather_encoded"] = LabelEncoder().fit_transform(merged_df["weather"])
# merged_df["route_encoded"] = LabelEncoder().fit_transform(merged_df["Route"])

# X = merged_df[["route_encoded", "Volume", "weather_encoded"]].values
# y = merged_df["DisruptionFlag"].values.astype(float)

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# X_train = torch.tensor(X_train, dtype=torch.float32)
# X_test  = torch.tensor(X_test, dtype=torch.float32)
# y_train = torch.tensor(y_train.reshape(-1,1), dtype=torch.float32)
# y_test  = torch.tensor(y_test.reshape(-1,1), dtype=torch.float32)

# # -------------------------------
# # STEP 5: Define PyTorch Model
# # -------------------------------
# class DisruptionPredictor(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.fc1 = nn.Linear(3, 64)
#         self.fc2 = nn.Linear(64, 32)
#         self.fc3 = nn.Linear(32, 1)
    
#     def forward(self, x):
#         x = torch.relu(self.fc1(x))
#         x = torch.relu(self.fc2(x))
#         return torch.sigmoid(self.fc3(x))

# model = DisruptionPredictor()
# criterion = nn.BCELoss()
# optimizer = optim.Adam(model.parameters(), lr=0.001)

# # -------------------------------
# # STEP 6: Train Model
# # -------------------------------
# for epoch in range(1, 101):
#     model.train()
#     optimizer.zero_grad()
#     outputs = model(X_train)
#     loss = criterion(outputs, y_train)
#     loss.backward()
#     optimizer.step()
#     if epoch % 10 == 0:
#         print(f"Epoch {epoch} — Loss: {loss.item():.4f}")

# # -------------------------------
# # STEP 7: Evaluate
# # -------------------------------
# model.eval()
# with torch.no_grad():
#     outputs = model(X_test)
#     predicted = (outputs > 0.5).float()
#     accuracy = (predicted == y_test).float().mean()
#     print(f"\n✅ Task 4 Complete — Disruption Prediction Accuracy: {accuracy.item():.4f}")


In [13]:
import pandas as pd

# Load both datasets
shipment = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate"])
weather = pd.read_csv("weather_data.csv")

# Convert to date only
shipment["ActualShipDate"] = shipment["ActualShipDate"].dt.date
shipment["Route"] = shipment["RouteID"]

# Check merge keys
print("🧪 Unique Routes in shipment:", sorted(shipment["Route"].unique())[:5])
print("🧪 Unique Routes in weather: ", sorted(weather["route"].unique())[:5])

print("\n🧪 Sample Dates in shipment:", sorted(shipment["ActualShipDate"].unique())[:5])
print("🧪 Sample Dates in weather: ", sorted(pd.to_datetime(weather["date"]).dt.date.unique())[:5])

# Try merging
merged_df = pd.merge(shipment, weather, left_on=["ActualShipDate", "Route"], right_on=["date", "route"])
print(f"\n🔍 Rows in merged_df: {merged_df.shape[0]}")


🧪 Unique Routes in shipment: ['R1', 'R10', 'R11', 'R12', 'R13']
🧪 Unique Routes in weather:  ['R1', 'R10', 'R11', 'R12', 'R13']

🧪 Sample Dates in shipment: [datetime.date(2025, 5, 1), datetime.date(2025, 5, 2), datetime.date(2025, 5, 3), datetime.date(2025, 5, 4), datetime.date(2025, 5, 5)]
🧪 Sample Dates in weather:  [datetime.date(2025, 5, 1), datetime.date(2025, 5, 2), datetime.date(2025, 5, 3), datetime.date(2025, 5, 4), datetime.date(2025, 5, 5)]

🔍 Rows in merged_df: 0


In [14]:
# ✅ Generate External Weather Data Only for Actual Shipment Route-Date Combinations
import pandas as pd
import random
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim

# -------------------------------
# STEP 1: Load Shipment Data
# -------------------------------
df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate"])
df["ActualShipDate"] = df["ActualShipDate"].dt.date
df["Route"] = df["RouteID"]
df["DisruptionFlag"] = (df["Status"] == "Disrupted").astype(int)

# -------------------------------
# STEP 2: Create Matching Weather Data (Only for Shipment Routes + Dates)
# -------------------------------
weather_conditions = ["Clear", "Rain", "Storm", "Cloudy", "Sunny"]
unique_route_dates = df[["ActualShipDate", "Route"]].drop_duplicates()
weather_data = []

for _, row in unique_route_dates.iterrows():
    weather_data.append({
        "date": row["ActualShipDate"],
        "route": row["Route"],
        "weather": random.choice(weather_conditions)
    })

weather_df = pd.DataFrame(weather_data)
weather_df.to_csv("weather_data.csv", index=False)

# -------------------------------
# STEP 3: Merge Shipment + Weather Data
# -------------------------------
merged_df = pd.merge(df, weather_df, left_on=["ActualShipDate", "Route"], right_on=["date", "route"])

# -------------------------------
# STEP 4: Encode & Prepare Dataset
# -------------------------------
merged_df["weather_encoded"] = LabelEncoder().fit_transform(merged_df["weather"])
merged_df["route_encoded"] = LabelEncoder().fit_transform(merged_df["Route"])

X = merged_df[["route_encoded", "Volume", "weather_encoded"]].values
y = merged_df["DisruptionFlag"].values.astype(float)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test  = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train.reshape(-1,1), dtype=torch.float32)
y_test  = torch.tensor(y_test.reshape(-1,1), dtype=torch.float32)

# -------------------------------
# STEP 5: Define & Train PyTorch Model
# -------------------------------
class DisruptionPredictor(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(3, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return torch.sigmoid(self.fc3(x))

model = DisruptionPredictor()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(1, 101):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch} — Loss: {loss.item():.4f}")

# -------------------------------
# STEP 6: Evaluate
# -------------------------------
model.eval()
with torch.no_grad():
    outputs = model(X_test)
    predicted = (outputs > 0.5).float()
    accuracy = (predicted == y_test).float().mean()
    print(f"\n✅ Final Task 4 Complete — Disruption Prediction Accuracy: {accuracy.item():.4f}")


Epoch 10 — Loss: 0.3532
Epoch 20 — Loss: 0.3523
Epoch 30 — Loss: 0.3396
Epoch 40 — Loss: 0.3368
Epoch 50 — Loss: 0.3356
Epoch 60 — Loss: 0.3350
Epoch 70 — Loss: 0.3344
Epoch 80 — Loss: 0.3339
Epoch 90 — Loss: 0.3333
Epoch 100 — Loss: 0.3328

✅ Final Task 4 Complete — Disruption Prediction Accuracy: 0.8955


In [15]:
# TASK 5 — Time Series Forecasting with TensorFlow
# Author: Opemipo Oreoluwa

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.preprocessing import MinMaxScaler

# -------------------------------
# STEP 1: Load + Prepare Time Series Data
# -------------------------------
df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate", "ActualDeliveryDate"])
df["DeliveryTimeDays"] = (df["ActualDeliveryDate"] - df["ActualShipDate"]).dt.days
df = df[["ActualShipDate", "DeliveryTimeDays"]].dropna()

# Group by date → average delivery time
daily_df = df.groupby("ActualShipDate")["DeliveryTimeDays"].mean().reset_index()
daily_df.rename(columns={"DeliveryTimeDays": "AvgDeliveryTime"}, inplace=True)

# Save for record
daily_df.to_csv("task5_timeseries_dataset.csv", index=False)

# -------------------------------
# STEP 2: Normalize + Create Sequences
# -------------------------------
scaler = MinMaxScaler()
scaled_series = scaler.fit_transform(daily_df[["AvgDeliveryTime"]])

def create_sequences(data, window_size):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i+window_size])
        y.append(data[i+window_size])
    return np.array(X), np.array(y)

window_size = 5  # number of days used to predict the next
X, y = create_sequences(scaled_series, window_size)

# -------------------------------
# STEP 3: Build & Train LSTM Model
# -------------------------------
model = Sequential([
    LSTM(64, activation='relu', input_shape=(X.shape[1], X.shape[2])),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')
history = model.fit(X, y, epochs=50, verbose=0)

# -------------------------------
# STEP 4: Forecast Next Day
# -------------------------------
last_window = scaled_series[-window_size:]
last_window = np.expand_dims(last_window, axis=0)
pred_scaled = model.predict(last_window)
pred_actual = scaler.inverse_transform(pred_scaled)

print(f"✅ Task 5 complete — Forecasted Avg Delivery Time for next day: {pred_actual[0][0]:.2f} days")


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 441ms/step
✅ Task 5 complete — Forecasted Avg Delivery Time for next day: 4.22 days


In [16]:
# # TASK 6 — Package Image Classification with CNN
# # Author: Opemipo Oreoluwa

# import tensorflow as tf
# from tensorflow.keras import layers, models
# import os
# import shutil
# import pandas as pd
# from PIL import Image
# import numpy as np

# # -------------------------------
# # STEP 1: Prepare Dummy Image Directory from CSV
# # -------------------------------

# # Create folders: package_images/OK/ and package_images/Damaged/
# df = pd.read_csv("task6_image_labels_dataset.csv")

# base_dir = "package_images"
# for label in df["ImageLabel"].unique():
#     os.makedirs(os.path.join(base_dir, label), exist_ok=True)

# # Create dummy images with matching filenames
# for _, row in df.iterrows():
#     img_name, label = row["PackageImage"], row["ImageLabel"]
#     img_path = os.path.join(base_dir, label, img_name)
#     if not os.path.exists(img_path):
#         dummy_img = Image.fromarray(np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8))
#         dummy_img.save(img_path)

# # -------------------------------
# # STEP 2: Load Dataset from Folder
# # -------------------------------
# img_height, img_width = 100, 100

# ds = tf.keras.utils.image_dataset_from_directory(
#     base_dir,
#     image_size=(img_height, img_width),
#     batch_size=16,
#     label_mode="binary",
#     validation_split=0.2,
#     subset="training",
#     seed=123
# )

# val_ds = tf.keras.utils.image_dataset_from_directory(
#     base_dir,
#     image_size=(img_height, img_width),
#     batch_size=16,
#     label_mode="binary",
#     validation_split=0.2,
#     subset="validation",
#     seed=123
# )

# # -------------------------------
# # STEP 3: Build CNN Model
# # -------------------------------
# model = models.Sequential([
#     layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
#     layers.Conv2D(32, (3,3), activation='relu'),
#     layers.MaxPooling2D(),
#     layers.Conv2D(64, (3,3), activation='relu'),
#     layers.MaxPooling2D(),
#     layers.Flatten(),
#     layers.Dense(64, activation='relu'),
#     layers.Dense(1, activation='sigmoid')  # binary output
# ])

# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# # -------------------------------
# # STEP 4: Train the Model
# # -------------------------------
# history = model.fit(ds, validation_data=val_ds, epochs=5)

# # -------------------------------
# # STEP 5: Evaluate Model
# # -------------------------------
# loss, acc = model.evaluate(val_ds)
# print(f"\n✅ Task 6 Complete — Validation Accuracy: {acc:.4f}")


In [17]:
import pandas as pd

df = pd.read_csv("shipment_logistics_dataset.csv")
df[["PackageImage", "ImageLabel"]].to_csv("task6_image_labels_dataset.csv", index=False)
print("✅ Saved: task6_image_labels_dataset.csv")


✅ Saved: task6_image_labels_dataset.csv


In [18]:
# TASK 6 — Image Classification for Package Inspection with CNN
# Author: Opemipo Oreoluwa

import pandas as pd
import numpy as np
import os
from PIL import Image
import shutil
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# -------------------------------
# STEP 1: Rebuild Label File (if missing)
# -------------------------------
df = pd.read_csv("shipment_logistics_dataset.csv")
df[["PackageImage", "ImageLabel"]].to_csv("task6_image_labels_dataset.csv", index=False)

# -------------------------------
# STEP 2: Prepare Dummy Images Folder
# -------------------------------
label_df = pd.read_csv("task6_image_labels_dataset.csv")
base_dir = "package_images"

# Clear and recreate folder structure
if os.path.exists(base_dir):
    shutil.rmtree(base_dir)

for label in ["OK", "Damaged"]:
    os.makedirs(os.path.join(base_dir, label), exist_ok=True)

# Generate dummy images matching filenames
for _, row in label_df.iterrows():
    label = row["ImageLabel"]
    name = row["PackageImage"]
    img_path = os.path.join(base_dir, label, name)
    if not os.path.exists(img_path):
        img = Image.fromarray(np.random.randint(0, 255, (150, 150, 3), dtype=np.uint8))
        img.save(img_path)

# -------------------------------
# STEP 3: Load Images with ImageDataGenerator
# -------------------------------
datagen = ImageDataGenerator(rescale=1.0/255.0, validation_split=0.2)

train_gen = datagen.flow_from_directory(
    base_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode="binary",
    subset="training"
)

val_gen = datagen.flow_from_directory(
    base_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode="binary",
    subset="validation"
)

# -------------------------------
# STEP 4: Define CNN Model
# -------------------------------
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# -------------------------------
# STEP 5: Train & Evaluate
# -------------------------------
model.fit(train_gen, epochs=5, validation_data=val_gen)

loss, accuracy = model.evaluate(val_gen)
print(f"✅ Task 6 Complete — Validation Accuracy: {accuracy:.4f}")


Found 8001 images belonging to 2 classes.
Found 1999 images belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m409s[0m 2s/step - accuracy: 0.4900 - loss: 0.7981 - val_accuracy: 0.5038 - val_loss: 0.6931
Epoch 2/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 1s/step - accuracy: 0.4919 - loss: 0.6932 - val_accuracy: 0.5038 - val_loss: 0.6931
Epoch 3/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 1s/step - accuracy: 0.5048 - loss: 0.6931 - val_accuracy: 0.5038 - val_loss: 0.6931
Epoch 4/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m306s[0m 1s/step - accuracy: 0.4998 - loss: 0.6932 - val_accuracy: 0.5038 - val_loss: 0.6931
Epoch 5/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m305s[0m 1s/step - accuracy: 0.4985 - loss: 0.6932 - val_accuracy: 0.5038 - val_loss: 0.6931
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 287ms/step - accuracy: 0.5029 - loss: 0.6931
✅ Task 6 Complete — Validation Accuracy: 0.5038


In [118]:
# TASK 6 — Image Classification for Package Inspection with CNN
# Author: Opemipo Oreoluwa

import pandas as pd
import numpy as np
import os
from PIL import Image
import shutil
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# STEP 1: Rebuild Label File (if missing)
df = pd.read_csv("shipment_logistics_dataset.csv")
df[["PackageImage", "ImageLabel"]].to_csv("task6_image_labels_dataset.csv", index=False)

# STEP 2: Prepare Dummy Images Folder
label_df = pd.read_csv("task6_image_labels_dataset.csv")
base_dir = "package_images"

# Clear and recreate folder structure
if os.path.exists(base_dir):
    shutil.rmtree(base_dir)

for label in ["OK", "Damaged"]:
    os.makedirs(os.path.join(base_dir, label), exist_ok=True)

# Generate dummy images matching filenames
for _, row in label_df.iterrows():
    label = row["ImageLabel"]
    name = row["PackageImage"]
    img_path = os.path.join(base_dir, label, name)
    if not os.path.exists(img_path):
        img = Image.fromarray(np.random.randint(0, 255, (150, 150, 3), dtype=np.uint8))
        img.save(img_path)

# STEP 3: Load Images with Corrected ImageDataGenerator
datagen = ImageDataGenerator(rescale=1.0/255.0, validation_split=0.2)

train_gen = datagen.flow_from_directory(
    base_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode="binary",  # ✅ Fix: ensure binary labels
    subset="training"
)

val_gen = datagen.flow_from_directory(
    base_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode="binary",  # ✅ Fix here too
    subset="validation"
)

# STEP 4: Define CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid')  # ✅ For binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# STEP 5: Train & Evaluate
model.fit(train_gen, epochs=5, validation_data=val_gen, verbose=1)
loss, accuracy = model.evaluate(val_gen)
print(f"✅ Task 6 Complete — Validation Accuracy: {accuracy:.4f}")

# STEP 6: Save Model
model.save("models/task6_package_cnn.keras")
print("✅ Task 6 model saved to models/task6_package_cnn.keras")


Found 8001 images belonging to 2 classes.
Found 1999 images belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m310s[0m 1s/step - accuracy: 0.4993 - loss: 0.8948 - val_accuracy: 0.5038 - val_loss: 0.6931
Epoch 2/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 827ms/step - accuracy: 0.5112 - loss: 0.6932 - val_accuracy: 0.4962 - val_loss: 0.6932
Epoch 3/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m330s[0m 1s/step - accuracy: 0.4979 - loss: 0.6932 - val_accuracy: 0.5038 - val_loss: 0.6931
Epoch 4/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m233s[0m 927ms/step - accuracy: 0.5025 - loss: 0.6932 - val_accuracy: 0.5038 - val_loss: 0.6931
Epoch 5/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 818ms/step - accuracy: 0.5059 - loss: 0.6931 - val_accuracy: 0.5038 - val_loss: 0.6931
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 199ms/step - accuracy: 0.4936 - loss: 0.6933
✅ Task 6 Complete — Validation Accuracy: 0.5038
✅ Task 6 model saved to

In [19]:
# # TASK 7 — Anomaly Detection in Shipment Data with Autoencoder
# # Author: Opemipo Oreoluwa

# import pandas as pd
# import numpy as np
# from sklearn.preprocessing import StandardScaler
# from tensorflow.keras.models import Model
# from tensorflow.keras.layers import Input, Dense
# import matplotlib.pyplot as plt

# # -------------------------------
# # STEP 1: Load and Preprocess Data
# # -------------------------------
# df = pd.read_csv("task7_anomaly_dataset.csv")

# # Convert dimensions to numeric volume estimate (L x W x H)
# dims = df["Dimensions"].str.split("x", expand=True).astype(float)
# df["DimVolume"] = dims[0] * dims[1] * dims[2]

# # Final feature set
# X = df[["Weight", "Volume", "DimVolume"]].values
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)

# # -------------------------------
# # STEP 2: Define Autoencoder
# # -------------------------------
# input_dim = X_scaled.shape[1]
# encoding_dim = 2  # can be tuned

# input_layer = Input(shape=(input_dim,))
# encoded = Dense(encoding_dim, activation='relu')(input_layer)
# decoded = Dense(input_dim, activation='sigmoid')(encoded)

# autoencoder = Model(inputs=input_layer, outputs=decoded)
# autoencoder.compile(optimizer='adam', loss='mse')

# # -------------------------------
# # STEP 3: Train the Autoencoder
# # -------------------------------
# history = autoencoder.fit(X_scaled, X_scaled, epochs=50, batch_size=32, validation_split=0.2, verbose=0)

# # -------------------------------
# # STEP 4: Reconstruction Error and Anomaly Detection
# # -------------------------------
# reconstructions = autoencoder.predict(X_scaled)
# mse = np.mean(np.power(X_scaled - reconstructions, 2), axis=1)

# threshold = np.percentile(mse, 95)
# anomalies = mse > threshold

# print(f"✅ Task 7 Complete — Detected {np.sum(anomalies)} anomalies out of {len(X_scaled)} shipments")


In [20]:
# STEP 1 — Create the Task 7 Dataset from Shipment Data
# Author: Opemipo Oreoluwa

import pandas as pd

df = pd.read_csv("shipment_logistics_dataset.csv")

# Save the necessary columns (including dimensions)
df[["Weight", "Volume", "Dimensions", "AnomalyFlag"]].to_csv("task7_anomaly_dataset.csv", index=False)

print("✅ Saved: task7_anomaly_dataset.csv")


✅ Saved: task7_anomaly_dataset.csv


In [21]:
# TASK 7 — Anomaly Detection with Autoencoder
# Author: Opemipo Oreoluwa

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

# -------------------------------
# STEP 1: Load and Preprocess Data
# -------------------------------
df = pd.read_csv("task7_anomaly_dataset.csv")

# Convert dimensions (e.g., "30x20x10") into estimated volume
dims = df["Dimensions"].str.split("x", expand=True).astype(float)
dims.columns = ["L", "W", "H"]
df["DimVolume"] = dims["L"] * dims["W"] * dims["H"]

# Select features for unsupervised anomaly detection
X = df[["Weight", "Volume", "DimVolume"]].values

# Normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# -------------------------------
# STEP 2: Define Autoencoder Model
# -------------------------------
input_dim = X_scaled.shape[1]
encoding_dim = 2

input_layer = Input(shape=(input_dim,))
encoded = Dense(encoding_dim, activation='relu')(input_layer)
decoded = Dense(input_dim, activation='sigmoid')(encoded)

autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mse')

# -------------------------------
# STEP 3: Train the Model
# -------------------------------
autoencoder.fit(X_scaled, X_scaled, epochs=50, batch_size=32, validation_split=0.2, verbose=0)

# -------------------------------
# STEP 4: Detect Anomalies
# -------------------------------
reconstructions = autoencoder.predict(X_scaled)
mse = np.mean(np.power(X_scaled - reconstructions, 2), axis=1)

# Use 95th percentile as threshold
threshold = np.percentile(mse, 95)
anomalies = mse > threshold
df["ReconstructionError"] = mse
df["DetectedAnomaly"] = anomalies.astype(int)

# Compare with true flags
print(f"✅ Task 7 complete — Detected {anomalies.sum()} anomalies out of {len(df)} samples.")
print(f"🔍 Detected vs True Anomalies:\n{df[['AnomalyFlag', 'DetectedAnomaly']].value_counts()}")


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step
✅ Task 7 complete — Detected 500 anomalies out of 10000 samples.
🔍 Detected vs True Anomalies:
AnomalyFlag  DetectedAnomaly
0            0                  9124
             1                   481
1            0                   376
             1                    19
Name: count, dtype: int64


In [22]:
# # TASK 8 — Predictive Maintenance Classifier for Fleet Management
# # Author: Opemipo Oreoluwa

# import pandas as pd
# import tensorflow as tf
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler

# # -------------------------------
# # STEP 1 — Load and Preprocess Data
# # -------------------------------
# # I'm using six relevant vehicle metrics to predict the likelihood of a breakdown
# df = pd.read_csv("task8_maintenance_dataset.csv")

# # Split features and target
# X = df.drop("BreakdownFlag", axis=1)
# y = df["BreakdownFlag"]

# # Stratified train-test split for class balance
# X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# # Normalize feature values to improve training stability
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# # -------------------------------
# # STEP 2 — Define a Neural Network Model
# # -------------------------------
# # This is a basic dense feed-forward network for binary classification
# model = tf.keras.Sequential([
#     tf.keras.layers.Input(shape=(X_train.shape[1],)),
#     tf.keras.layers.Dense(64, activation="relu"),
#     tf.keras.layers.Dense(32, activation="relu"),
#     tf.keras.layers.Dense(1, activation="sigmoid")  # sigmoid for binary output
# ])

# model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# # -------------------------------
# # STEP 3 — Train the Model
# # -------------------------------
# model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=1)

# # -------------------------------
# # STEP 4 — Evaluate Performance
# # -------------------------------
# loss, acc = model.evaluate(X_test, y_test)
# print(f"✅ Task 8 Complete — Predictive Maintenance Model Accuracy: {acc:.4f}")


In [23]:
# TASK 8 — Extract Maintenance Dataset
# Author: Opemipo Oreoluwa

import pandas as pd

# Load your full logistics dataset
df = pd.read_csv("shipment_logistics_dataset.csv")

# Select relevant columns for predictive maintenance
df_task8 = df[[
    "EngineHours", "Mileage", "FuelConsumption", "Temperature", "Vibration", "BreakdownFlag"
]]

# Save to CSV for the model to read
df_task8.to_csv("task8_maintenance_dataset.csv", index=False)

print("✅ Saved: task8_maintenance_dataset.csv")


✅ Saved: task8_maintenance_dataset.csv


In [74]:
# TASK 8 — Predictive Maintenance Classifier for Fleet Management
# Author: Opemipo Oreoluwa

import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load and preprocess dataset
df = pd.read_csv("task8_maintenance_dataset.csv")

X = df.drop("BreakdownFlag", axis=1)
y = df["BreakdownFlag"]

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate performance
loss, acc = model.evaluate(X_test, y_test)
print(f"✅ Task 8 Complete — Predictive Maintenance Model Accuracy: {acc:.4f}")


Epoch 1/20
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 11ms/step - accuracy: 0.9248 - loss: 0.3122 - val_accuracy: 0.9675 - val_loss: 0.1464
Epoch 2/20
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9724 - loss: 0.1315 - val_accuracy: 0.9675 - val_loss: 0.1464
Epoch 3/20
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9714 - loss: 0.1348 - val_accuracy: 0.9675 - val_loss: 0.1451
Epoch 4/20
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9701 - loss: 0.1369 - val_accuracy: 0.9675 - val_loss: 0.1453
Epoch 5/20
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9703 - loss: 0.1366 - val_accuracy: 0.9675 - val_loss: 0.1451
Epoch 6/20
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9720 - loss: 0.1300 - val_accuracy: 0.9675 - val_loss: 0.1428
Epoch 7/20
[1m200/200[0m

In [25]:
# # streamlit_app/app.py
# import streamlit as st
# import pandas as pd
# import joblib
# import numpy as np

# # Load trained models
# delivery_model = joblib.load("models/delivery_model.pkl")
# breakdown_model = joblib.load("models/breakdown_model.pkl")

# st.set_page_config(page_title="HeavyHaul AI", layout="centered")

# st.title("🚚 HeavyHaul Systems — Smart Logistics Assistant")
# st.markdown("Apply ML predictions to your shipment and fleet data.")

# # ---- Sidebar navigation ----
# task = st.sidebar.selectbox("Select Task", [
#     "Predict Shipment Delivery Time",
#     "Predict Breakdown Risk",
#     "Show Shipment Anomalies"
# ])

# # ---- Task 1: Predict Delivery Time ----
# if task == "Predict Shipment Delivery Time":
#     st.subheader("📦 Predict Delivery Time")
#     with st.form("delivery_form"):
#         origin = st.selectbox("Origin", ["New York", "Chicago", "Miami", "Boston", "Dallas"])
#         destination = st.selectbox("Destination", ["Los Angeles", "Houston", "Seattle", "Denver"])
#         carrier = st.selectbox("Carrier", ["FedEx", "UPS", "DHL"])
#         shipment_type = st.selectbox("Shipment Type", ["Standard", "Express", "Fragile"])
#         weight = st.number_input("Weight (kg)", min_value=10.0, max_value=500.0)
#         volume = st.number_input("Volume (m³)", min_value=0.5, max_value=5.0)
#         dims = st.text_input("Dimensions (LxWxH)", value="30x20x10")

#         submitted = st.form_submit_button("Predict Delivery Time")
#         if submitted:
#             dims_split = [int(x) for x in dims.lower().split("x")]
#             if len(dims_split) != 3:
#                 st.error("❌ Please use format: 30x20x10")
#             else:
#                 # Create dataframe for model
#                 input_df = pd.DataFrame({
#                     "Origin": [origin],
#                     "Destination": [destination],
#                     "Carrier": [carrier],
#                     "ShipmentType": [shipment_type],
#                     "Weight": [weight],
#                     "Volume": [volume],
#                     "Dimensions": [f"{dims_split[0]}x{dims_split[1]}x{dims_split[2]}"]
#                 })
#                 pred_days = delivery_model.predict(input_df)[0]
#                 st.success(f"✅ Estimated Delivery Time: **{round(pred_days, 2)} days**")

# # ---- Task 8: Predict Breakdown ----
# elif task == "Predict Breakdown Risk":
#     st.subheader("🔧 Predict Breakdown Risk")
#     with st.form("breakdown_form"):
#         engine_hours = st.slider("Engine Hours", 1000, 8000, 5000)
#         mileage = st.slider("Mileage (km)", 20000, 150000, 50000)
#         fuel = st.slider("Fuel Consumption (L/100km)", 4.0, 15.0, 8.5)
#         temp = st.slider("Engine Temperature (°C)", 10, 90, 45)
#         vibration = st.slider("Vibration Level", 0.01, 0.10, 0.05)

#         submit2 = st.form_submit_button("Predict Breakdown Risk")
#         if submit2:
#             X_input = pd.DataFrame([[
#                 engine_hours, mileage, fuel, temp, vibration
#             ]], columns=["EngineHours", "Mileage", "FuelConsumption", "Temperature", "Vibration"])
#             pred = breakdown_model.predict(X_input)[0]
#             prob = breakdown_model.predict_proba(X_input)[0][1]
#             if pred == 1:
#                 st.error(f"⚠️ High Breakdown Risk (Confidence: {prob:.2%})")
#             else:
#                 st.success(f"✅ No Immediate Breakdown Risk (Confidence: {prob:.2%})")

# # ---- Task 7: Show Anomalies ----
# elif task == "Show Shipment Anomalies":
#     st.subheader("🚨 Shipment Anomaly Insights")
#     df = pd.read_csv("data/anomaly_detection.csv")
#     st.dataframe(df[df["AnomalyFlag"] == 1].head(10))
#     st.markdown("Shows first 10 detected anomalies in the shipment.")


In [26]:
# # train_delivery_model.py
# import pandas as pd
# import joblib
# from sklearn.pipeline import Pipeline
# from sklearn.preprocessing import OneHotEncoder, StandardScaler
# from sklearn.compose import ColumnTransformer
# from sklearn.ensemble import RandomForestRegressor

# # Load your cleaned dataset
# df = pd.read_csv("shipment_times_for_ml.csv", parse_dates=["ActualShipDate", "ActualDeliveryDate"])

# # Target variable
# df["DeliveryTimeDays"] = (df["ActualDeliveryDate"] - df["ActualShipDate"]).dt.days

# # Drop any rows with missing target
# df.dropna(subset=["DeliveryTimeDays"], inplace=True)

# # Select features and target
# X = df[["Origin", "Destination", "Carrier", "ShipmentType", "Weight", "Volume", "Dimensions"]]
# y = df["DeliveryTimeDays"]

# # Preprocessing
# categorical = ["Origin", "Destination", "Carrier", "ShipmentType", "Dimensions"]
# numerical = ["Weight", "Volume"]

# preprocessor = ColumnTransformer([
#     ("cat", OneHotEncoder(handle_unknown="ignore"), categorical),
#     ("num", StandardScaler(), numerical)
# ])

# # Pipeline
# model = Pipeline([
#     ("pre", preprocessor),
#     ("reg", RandomForestRegressor(n_estimators=100, random_state=42))
# ])

# # Train and save model
# model.fit(X, y)
# joblib.dump(model, "models/delivery_model.pkl")
# print("✅ delivery_model.pkl saved.")


In [72]:
# # Create task1-3 dataset from the main shipment_logistics_dataset
# import pandas as pd

# # Load full dataset
# df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate", "ActualDeliveryDate"])

# # Compute delivery time in days
# df["DeliveryTimeDays"] = (df["ActualDeliveryDate"] - df["ActualShipDate"]).dt.days

# # Select relevant features
# shipment_df = df[[
#     "Origin", "Destination", "Carrier", "ShipmentType",
#     "Weight", "Volume", "Dimensions",
#     "PlannedShipDate", "ActualShipDate",
#     "PlannedDeliveryDate", "ActualDeliveryDate",
#     "Status", "DeliveryTimeDays"
# ]].dropna()

# shipment_df.to_csv("shipment_times_for_ml.csv", index=False)
# print("✅ Saved: shipment_times_for_ml.csv")


In [28]:
# # train_delivery_model.py
# import pandas as pd
# import joblib
# from sklearn.pipeline import Pipeline
# from sklearn.preprocessing import OneHotEncoder, StandardScaler
# from sklearn.compose import ColumnTransformer
# from sklearn.ensemble import RandomForestRegressor

# # Load the new shipment_times_for_ml.csv
# df = pd.read_csv("shipment_times_for_ml.csv")

# # Target
# X = df[["Origin", "Destination", "Carrier", "ShipmentType", "Weight", "Volume", "Dimensions"]]
# y = df["DeliveryTimeDays"]

# # Preprocessing pipeline
# cat = ["Origin", "Destination", "Carrier", "ShipmentType", "Dimensions"]
# num = ["Weight", "Volume"]

# preprocessor = ColumnTransformer([
#     ("cat", OneHotEncoder(handle_unknown="ignore"), cat),
#     ("num", StandardScaler(), num)
# ])

# # Pipeline
# model = Pipeline([
#     ("prep", preprocessor),
#     ("reg", RandomForestRegressor(n_estimators=100, random_state=42))
# ])

# model.fit(X, y)
# joblib.dump(model, "models/delivery_model.pkl")
# print("✅ delivery_model.pkl saved.")


In [29]:
# # ✅ This script should be run in Jupyter or VS Code to train and save the models used in your Streamlit app
# # Assumes `shipment_logistics_dataset.csv` is in the same directory

# import pandas as pd
# import numpy as np
# import os
# import joblib
# import torch
# import tensorflow as tf
# from tensorflow.keras import layers, models
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder, StandardScaler
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.pipeline import Pipeline
# from sklearn.compose import ColumnTransformer
# from sklearn.preprocessing import OneHotEncoder

# # Create models directory
# os.makedirs("models", exist_ok=True)

# # Load base dataset
# df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate", "ActualDeliveryDate"])

# # ----------------------------
# # Task 1–3: Delivery Time Model
# # ----------------------------
# df["DeliveryTimeDays"] = (df["ActualDeliveryDate"] - df["ActualShipDate"]).dt.days

# df1 = df[["Origin", "Destination", "Carrier", "ShipmentType", "Weight", "Volume", "Dimensions", "DeliveryTimeDays"]].dropna()

# X = df1.drop("DeliveryTimeDays", axis=1)
# y = df1["DeliveryTimeDays"]

# categorical = ["Origin", "Destination", "Carrier", "ShipmentType", "Dimensions"]
# numerical = ["Weight", "Volume"]

# preprocessor = ColumnTransformer([
#     ("cat", OneHotEncoder(handle_unknown="ignore"), categorical),
#     ("num", StandardScaler(), numerical)
# ])

# model = Pipeline([
#     ("prep", preprocessor),
#     ("reg", RandomForestRegressor(n_estimators=100, random_state=42))
# ])

# model.fit(X, y)
# joblib.dump(model, "models/delivery_model.pkl")
# print("✅ delivery_model.pkl saved")

# # ----------------------------
# # Task 4: Disruption Classifier (PyTorch)
# # ----------------------------
# df["DisruptionFlag"] = (df["Status"] == "Disrupted").astype(int)
# df2 = df[["Weight", "Volume", "Weather", "Traffic", "PublicEvent", "DisruptionFlag"]].dropna()

# X = df2.drop("DisruptionFlag", axis=1)
# y = df2["DisruptionFlag"]

# for col in ["Weather", "Traffic", "PublicEvent"]:
#     X[col] = LabelEncoder().fit_transform(X[col])

# X = torch.tensor(X.values, dtype=torch.float32)
# y = torch.tensor(y.values.reshape(-1, 1), dtype=torch.float32)

# class DisruptionModel(torch.nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.net = torch.nn.Sequential(
#             torch.nn.Linear(X.shape[1], 16),
#             torch.nn.ReLU(),
#             torch.nn.Linear(16, 1),
#             torch.nn.Sigmoid()
#         )
#     def forward(self, x):
#         return self.net(x)

# model_d = DisruptionModel()
# loss_fn = torch.nn.BCELoss()
# optimizer = torch.optim.Adam(model_d.parameters(), lr=0.01)

# for epoch in range(100):
#     optimizer.zero_grad()
#     output = model_d(X)
#     loss = loss_fn(output, y)
#     loss.backward()
#     optimizer.step()

# torch.save(model_d.state_dict(), "models/disruption_model.pt")
# print("✅ disruption_model.pt saved")

# # ----------------------------
# # Task 5: Forecasting Model
# # ----------------------------
# timeseries_df = df.groupby("ActualShipDate")["DeliveryTimeDays"].mean().reset_index()
# timeseries_df["AvgDeliveryTime"] = timeseries_df["DeliveryTimeDays"]
# timeseries_df["day_index"] = np.arange(len(timeseries_df))

# X = timeseries_df[["day_index"]]
# y = timeseries_df[["AvgDeliveryTime"]]

# model_f = tf.keras.Sequential([
#     layers.Dense(32, activation="relu", input_shape=(1,)),
#     layers.Dense(16, activation="relu"),
#     layers.Dense(1)
# ])
# model_f.compile(optimizer="adam", loss="mse")
# model_f.fit(X, y, epochs=30, verbose=0)
# model_f.save("models/forecasting_model.keras")
# print("✅ forecasting_model.keras saved")

# # ----------------------------
# # Task 6: CNN Image Model (mock only — assume real retraining outside)
# # ----------------------------
# # You can place your trained model here
# # For now, we create and save a dummy CNN model
# image_model = tf.keras.Sequential([
#     layers.Input(shape=(150,150,3)),
#     layers.Conv2D(16, (3,3), activation='relu'),
#     layers.MaxPooling2D(2,2),
#     layers.Flatten(),
#     layers.Dense(64, activation='relu'),
#     layers.Dense(1, activation='sigmoid')
# ])
# image_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
# image_model.save("models/cnn_package_model.keras")
# print("✅ cnn_package_model.keras saved (dummy)")

# # ----------------------------
# # Task 7: Anomaly Autoencoder
# # ----------------------------
# df7 = df[["Weight", "Volume"]].dropna()
# X = StandardScaler().fit_transform(df7.values)

# inp = layers.Input(shape=(X.shape[1],))
# encoded = layers.Dense(2, activation='relu')(inp)
# decoded = layers.Dense(X.shape[1], activation='sigmoid')(encoded)
# autoencoder = tf.keras.Model(inp, decoded)
# autoencoder.compile(optimizer='adam', loss='mse')
# autoencoder.fit(X, X, epochs=20, verbose=0)
# autoencoder.save("models/anomaly_autoencoder.keras")
# print("✅ anomaly_autoencoder.keras saved")

# # ----------------------------
# # Task 8: Predictive Maintenance
# # ----------------------------
# df8 = df[["EngineHours", "Mileage", "FuelConsumption", "Temperature", "Vibration", "BreakdownFlag"]].dropna()
# X = df8.drop("BreakdownFlag", axis=1)
# y = df8["BreakdownFlag"]

# X = StandardScaler().fit_transform(X)

# model_m = tf.keras.Sequential([
#     layers.Dense(32, activation="relu", input_shape=(X.shape[1],)),
#     layers.Dense(16, activation="relu"),
#     layers.Dense(1, activation="sigmoid")
# ])
# model_m.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
# model_m.fit(X, y, epochs=20, verbose=0)
# model_m.save("models/maintenance_model.keras")
# print("✅ maintenance_model.keras saved")


In [30]:
# import os
# import torch

# # 📁 Create directory for saved models if it doesn't exist
# os.makedirs("models", exist_ok=True)

# # ✅ Task 3 — TensorFlow Neural Net for Delivery Time Prediction
# try:
#     model.save("models/task3_delivery_time_nn.h5")
#     print("✅ Task 3 model saved: task3_delivery_time_nn.h5")
# except Exception as e:
#     print(f"❌ Task 3 save error: {e}")

# # ✅ Task 4 — PyTorch Disruption Classifier
# try:
#     torch.save(model.state_dict(), "models/task4_disruption_model.pt")
#     print("✅ Task 4 model saved: task4_disruption_model.pt")
# except Exception as e:
#     print(f"❌ Task 4 save error: {e}")

# # ✅ Task 5 — LSTM Time Series Forecasting Model
# try:
#     model.save("models/task5_timeseries_lstm.h5")
#     print("✅ Task 5 model saved: task5_timeseries_lstm.h5")
# except Exception as e:
#     print(f"❌ Task 5 save error: {e}")

# # ✅ Task 6 — CNN Image Classification Model
# try:
#     model.save("models/task6_package_cnn.h5")
#     print("✅ Task 6 model saved: task6_package_cnn.h5")
# except Exception as e:
#     print(f"❌ Task 6 save error: {e}")

# # ✅ Task 7 — Autoencoder for Anomaly Detection
# try:
#     autoencoder.save("models/task7_anomaly_autoencoder.h5")
#     print("✅ Task 7 model saved: task7_anomaly_autoencoder.h5")
# except Exception as e:
#     print(f"❌ Task 7 save error: {e}")

# # ✅ Task 8 — Maintenance Classifier
# try:
#     model.save("models/task8_maintenance_classifier.h5")
#     print("✅ Task 8 model saved: task8_maintenance_classifier.h5")
# except Exception as e:
#     print(f"❌ Task 8 save error: {e}")


In [31]:
# import os
# import torch
# import joblib
# from tensorflow.keras.models import save_model

# os.makedirs("models", exist_ok=True)

# # TASK 3 — Neural Net for Delivery Time (Keras model)
# try:
#     save_model(task3_model, "models/task3_delivery_time_nn.keras")
#     print("✅ Task 3 model saved: task3_delivery_time_nn.keras")
# except Exception as e:
#     print(f"❌ Task 3 save error: {e}")

# # TASK 4 — PyTorch model for Disruption
# try:
#     torch.save(task4_model.state_dict(), "models/task4_disruption_model.pt")
#     print("✅ Task 4 model saved: task4_disruption_model.pt")
# except Exception as e:
#     print(f"❌ Task 4 save error: {e}")

# # TASK 5 — LSTM Time Series (Keras)
# try:
#     save_model(task5_model, "models/task5_timeseries_lstm.keras")
#     print("✅ Task 5 model saved: task5_timeseries_lstm.keras")
# except Exception as e:
#     print(f"❌ Task 5 save error: {e}")

# # TASK 6 — CNN for Images (Keras)
# try:
#     save_model(task6_model, "models/task6_package_cnn.keras")
#     print("✅ Task 6 model saved: task6_package_cnn.keras")
# except Exception as e:
#     print(f"❌ Task 6 save error: {e}")

# # TASK 7 — Autoencoder (Keras)
# try:
#     autoencoder.save("models/task7_anomaly_autoencoder.keras")
#     print("✅ Task 7 model saved: task7_anomaly_autoencoder.keras")
# except Exception as e:
#     print(f"❌ Task 7 save error: {e}")

# # TASK 8 — Maintenance Classifier (Keras)
# try:
#     save_model(task8_model, "models/task8_maintenance_classifier.keras")
#     print("✅ Task 8 model saved: task8_maintenance_classifier.keras")
# except Exception as e:
#     print(f"❌ Task 8 save error: {e}")


In [32]:
# # If you've trained them in sequence using variable name 'model', reassign here:
# task3_model = model  # after Task 3 training
# task4_model = model  # after Task 4 training (PyTorch)
# task5_model = model  # after Task 5 training (LSTM)
# task6_model = model  # after Task 6 training (CNN)
# # Task 7 model was saved correctly
# task8_model = model  # after Task 8 training (Keras)


In [33]:
# task4_model = DisruptionPredictor()
# task4_model.load_state_dict(torch.load("models/task4_disruption_model.pt"))  # if saved earlier


In [34]:
# import os
# import torch
# from tensorflow.keras.models import save_model

# os.makedirs("models", exist_ok=True)

# try:
#     save_model(task3_model, "models/task3_delivery_time_nn.keras")
#     print("✅ Task 3 model saved")
# except Exception as e:
#     print("❌ Task 3:", e)

# try:
#     torch.save(task4_model.state_dict(), "models/task4_disruption_model.pt")
#     print("✅ Task 4 model saved")
# except Exception as e:
#     print("❌ Task 4:", e)

# try:
#     save_model(task5_model, "models/task5_timeseries_lstm.keras")
#     print("✅ Task 5 model saved")
# except Exception as e:
#     print("❌ Task 5:", e)

# try:
#     save_model(task6_model, "models/task6_package_cnn.keras")
#     print("✅ Task 6 model saved")
# except Exception as e:
#     print("❌ Task 6:", e)

# try:
#     autoencoder.save("models/task7_anomaly_autoencoder.keras")
#     print("✅ Task 7 model saved")
# except Exception as e:
#     print("❌ Task 7:", e)

# try:
#     save_model(task8_model, "models/task8_maintenance_classifier.keras")
#     print("✅ Task 8 model saved")
# except Exception as e:
#     print("❌ Task 8:", e)


In [82]:
# Task 4 — Disruption Prediction (retrain and save)

import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
import torch.optim as optim

# Load data
df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate"])
df["ActualShipDate"] = df["ActualShipDate"].dt.date
df["Route"] = df["RouteID"]
df["DisruptionFlag"] = (df["Status"] == "Disrupted").astype(int)

# Create weather data and merge
weather_conditions = ["Clear", "Rain", "Storm", "Cloudy", "Sunny"]
weather_data = df[["ActualShipDate", "Route"]].drop_duplicates().copy()
weather_data["weather"] = [random.choice(weather_conditions) for _ in range(len(weather_data))]

merged_df = pd.merge(df, weather_data, left_on=["ActualShipDate", "Route"], right_on=["ActualShipDate", "Route"])
merged_df["weather_encoded"] = LabelEncoder().fit_transform(merged_df["weather"])
merged_df["route_encoded"] = LabelEncoder().fit_transform(merged_df["Route"])

# Prepare tensors
X = merged_df[["route_encoded", "Volume", "weather_encoded"]].values
y = merged_df["DisruptionFlag"].values.astype(float)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train.reshape(-1, 1), dtype=torch.float32)

# Model
class DisruptionPredictor(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(3, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return torch.sigmoid(self.fc3(x))

task4_model = DisruptionPredictor()
criterion = nn.BCELoss()
optimizer = optim.Adam(task4_model.parameters(), lr=0.001)

# Train
for epoch in range(10):
    task4_model.train()
    optimizer.zero_grad()
    output = task4_model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

# Save
torch.save(task4_model.state_dict(), "models/task4_disruption_model.pt")
print("✅ Task 4 model saved successfully.")


✅ Task 4 model saved successfully.


In [100]:
import os
import torch
from tensorflow.keras.models import save_model

# Create models/ directory if not exists
os.makedirs("models", exist_ok=True)

# Task 3 — Neural Network (TensorFlow)
try:
    save_model(model, "models/task3_delivery_nn.keras")  # model is the trained Keras model
    print("✅ Task 3 model saved")
except Exception as e:
    print("❌ Task 3:", e)

# Task 4 — PyTorch Disruption Classifier
try:
    torch.save(model.state_dict(), "models/task4_disruption_model.pt")  # model is the PyTorch DisruptionPredictor
    print("✅ Task 4 model saved")
except Exception as e:
    print("❌ Task 4:", e)

# Task 5 — LSTM for Time Series (TensorFlow)
try:
    save_model(model, "models/task5_timeseries_lstm.keras")
    print("✅ Task 5 model saved")
except Exception as e:
    print("❌ Task 5:", e)

# Task 6 — CNN for Image Classification (TensorFlow)
try:
    save_model(model, "models/task6_package_cnn.keras")
    print("✅ Task 6 model saved")
except Exception as e:
    print("❌ Task 6:", e)

# Task 7 — Autoencoder for Anomaly Detection
try:
    autoencoder.save("models/task7_anomaly_autoencoder.keras")
    print("✅ Task 7 model saved")
except Exception as e:
    print("❌ Task 7:", e)

# Task 8 — Predictive Maintenance Classifier (TensorFlow)
try:
    save_model(model, "models/task8_maintenance_classifier.keras")
    print("✅ Task 8 model saved")
except Exception as e:
    print("❌ Task 8:", e)


✅ Task 3 model saved
❌ Task 4: 'Sequential' object has no attribute 'state_dict'
✅ Task 5 model saved
✅ Task 6 model saved
✅ Task 7 model saved
✅ Task 8 model saved


In [90]:
import numpy as np
import tensorflow as tf
import torch
import torch.nn as nn
from tensorflow.keras.models import load_model


In [102]:
# # Load Task 3 model
# task3_model = load_model("models/task3_delivery_nn.h5")  # or .keras if you haven’t re-saved yet

# # Sample input: Weight, Volume
# task3_input = np.array([[150, 3.5]])
# pred = task3_model.predict(task3_input)
# print(f"📦 Task 3: Predicted Delivery Time = {pred[0][0]:.2f} days")


In [94]:
import os
print(os.listdir("models"))


['anomaly_autoencoder.keras', 'cnn_package_model.keras', 'delivery_model.pkl', 'disruption_model.pt', 'forecasting_model.keras', 'maintenance_model.keras', 'task3_delivery_nn.keras', 'task4_disruption_model.pt', 'task5_timeseries_lstm.keras', 'task6_package_cnn.keras', 'task7_anomaly_autoencoder.h5', 'task7_anomaly_autoencoder.keras', 'task8_maintenance_classifier.keras']


In [104]:
import tensorflow as tf
import torch
from tensorflow.keras.models import load_model
import os

# Verify models exist
print("Available models:", os.listdir("models"))

# Task 3: Delivery Neural Network (TensorFlow)
task3_model = load_model("models/task3_delivery_nn.keras")

# Task 4: Disruption Classifier (PyTorch)
from torch_model_def import DisruptionPredictor  # this is your defined class
task4_model = DisruptionPredictor()
task4_model.load_state_dict(torch.load("models/task4_disruption_model.pt"))
task4_model.eval()

# Task 5: LSTM Forecasting Model (TensorFlow)
task5_model = load_model("models/task5_timeseries_lstm.keras")

# Task 6: CNN for Image Classification (TensorFlow)
task6_model = load_model("models/task6_package_cnn.keras")

# Task 7: Autoencoder for Anomaly Detection (TensorFlow)
task7_model = load_model("models/task7_anomaly_autoencoder.keras")

# Task 8: Predictive Maintenance Model (TensorFlow)
task8_model = load_model("models/task8_maintenance_classifier.keras")

print("✅ All models loaded successfully.")


Available models: ['anomaly_autoencoder.keras', 'cnn_package_model.keras', 'delivery_model.pkl', 'disruption_model.pt', 'forecasting_model.keras', 'maintenance_model.keras', 'task3_delivery_nn.keras', 'task4_disruption_model.pt', 'task5_timeseries_lstm.keras', 'task6_package_cnn.keras', 'task7_anomaly_autoencoder.h5', 'task7_anomaly_autoencoder.keras', 'task8_maintenance_classifier.keras']


ModuleNotFoundError: No module named 'torch_model_def'

In [122]:
import os
import numpy as np
import torch
import torch.nn as nn
from tensorflow.keras.models import load_model

# Check saved models
print("✅ Available models:", os.listdir("models"))

# -----------------------------
# Task 3 — TensorFlow Delivery NN
# -----------------------------
task3_model = load_model("models/task3_delivery_nn.keras")

# -----------------------------
# Task 4 — PyTorch Disruption Classifier
# -----------------------------
class DisruptionPredictor(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(3, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return torch.sigmoid(self.fc3(x))

task4_model = DisruptionPredictor()
task4_model.load_state_dict(torch.load("models/task4_disruption_model.pt"))
task4_model.eval()

# -----------------------------
# Task 5 — Time Series LSTM
# -----------------------------
task5_model = load_model("models/task5_timeseries_lstm.keras")

# -----------------------------
# Task 6 — CNN for Image Classification
# -----------------------------
task6_model = load_model("models/task6_package_cnn.keras")

# -----------------------------
# Task 7 — Autoencoder
# -----------------------------
task7_model = load_model("models/task7_anomaly_autoencoder.keras")

# -----------------------------
# Task 8 — Predictive Maintenance Classifier
# -----------------------------
task8_model = load_model("models/task8_maintenance_classifier.keras")

print("✅ All models loaded successfully.")


✅ Available models: ['anomaly_autoencoder.keras', 'cnn_package_model.keras', 'delivery_model.pkl', 'disruption_model.pt', 'forecasting_model.keras', 'maintenance_model.keras', 'task3_delivery_nn.keras', 'task4_disruption_model.pt', 'task5_timeseries_lstm.keras', 'task6_package_cnn.keras', 'task7_anomaly_autoencoder.h5', 'task7_anomaly_autoencoder.keras', 'task8_maintenance_classifier.keras']
✅ All models loaded successfully.


In [108]:
from tensorflow.keras.models import save_model

# Save model using Keras zip format (recommended)
model.save("models/task3_delivery_nn.keras")


In [110]:
from tensorflow.keras.models import save_model
import os

# Create models/ directory if not exists
os.makedirs("models", exist_ok=True)

# Assuming `model` is your trained Task 3 model
save_model(model, "models/task3_delivery_nn.keras")
print("✅ Task 3 model saved")


✅ Task 3 model saved


In [112]:
import os
from tensorflow.keras.models import save_model

# Correct path for Streamlit to find the file
os.makedirs("C:/Users/USER PC/Downloads/heavyhaul/models", exist_ok=True)
save_model(model, "C:/Users/USER PC/Downloads/heavyhaul/models/task3_delivery_nn.keras")
print("✅ Task 3 model saved to heavyhaul/models/")


✅ Task 3 model saved to heavyhaul/models/


In [124]:
# save_all_models.py

import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import save_model

# Create consistent model save path
model_dir = r"C:/Users/USER PC/Downloads/heavyhaul/models"
os.makedirs(model_dir, exist_ok=True)

print("📦 Saving models to:", model_dir)

# --------------------------
# TASK 3 — Delivery NN (TF)
# --------------------------
try:
    df = pd.read_csv("shipment_logistics_dataset.csv")
    df["DeliveryTimeDays"] = (
        pd.to_datetime(df["ActualDeliveryDate"]) -
        pd.to_datetime(df["ActualShipDate"])
    ).dt.days

    X = df[["Weight", "Volume"]].dropna()
    y = df.loc[X.index, "DeliveryTimeDays"]

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, _, y_train, _ = train_test_split(X_scaled, y, test_size=0.2)
    task3_model = tf.keras.Sequential([
        Dense(32, activation="relu", input_shape=(X_train.shape[1],)),
        Dense(16, activation="relu"),
        Dense(1)
    ])
    task3_model.compile(optimizer="adam", loss="mse")
    task3_model.fit(X_train, y_train, epochs=5, verbose=0)

    save_model(task3_model, os.path.join(model_dir, "task3_delivery_nn.keras"))
    print("✅ Task 3 saved")
except Exception as e:
    print("❌ Task 3 error:", e)

# --------------------------
# TASK 4 — PyTorch Classifier
# --------------------------
try:
    class DisruptionPredictor(nn.Module):
        def __init__(self):
            super().__init__()
            self.fc1 = nn.Linear(3, 64)
            self.fc2 = nn.Linear(64, 32)
            self.fc3 = nn.Linear(32, 1)

        def forward(self, x):
            x = torch.relu(self.fc1(x))
            x = torch.relu(self.fc2(x))
            return torch.sigmoid(self.fc3(x))

    df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate"])
    df["ActualShipDate"] = df["ActualShipDate"].dt.date
    df["Route"] = df["RouteID"]
    df["DisruptionFlag"] = (df["Status"] == "Disrupted").astype(int)

    # Match dates and routes
    weather = pd.read_csv("weather_data.csv")
    merged = pd.merge(df, weather, left_on=["ActualShipDate", "Route"], right_on=["date", "route"])
    merged["weather_encoded"] = LabelEncoder().fit_transform(merged["weather"])
    merged["route_encoded"] = LabelEncoder().fit_transform(merged["Route"])

    X = merged[["route_encoded", "Volume", "weather_encoded"]].values
    y = merged["DisruptionFlag"].astype(float).values.reshape(-1, 1)

    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32)

    model = DisruptionPredictor()
    loss_fn = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(10):
        model.train()
        optimizer.zero_grad()
        out = model(X_tensor)
        loss = loss_fn(out, y_tensor)
        loss.backward()
        optimizer.step()

    torch.save(model.state_dict(), os.path.join(model_dir, "task4_disruption_model.pt"))
    print("✅ Task 4 saved")
except Exception as e:
    print("❌ Task 4 error:", e)

# --------------------------
# TASK 5 — Time Series LSTM
# --------------------------
try:
    df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate", "ActualDeliveryDate"])
    df["DeliveryTimeDays"] = (df["ActualDeliveryDate"] - df["ActualShipDate"]).dt.days
    df = df[["ActualShipDate", "DeliveryTimeDays"]].dropna()
    daily_df = df.groupby("ActualShipDate").mean().reset_index()

    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    series = scaler.fit_transform(daily_df[["DeliveryTimeDays"]])

    def create_sequences(data, window):
        X, y = [], []
        for i in range(len(data) - window):
            X.append(data[i:i + window])
            y.append(data[i + window])
        return np.array(X), np.array(y)

    X, y = create_sequences(series, 5)
    model = Sequential([
        LSTM(64, activation='relu', input_shape=(X.shape[1], X.shape[2])),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    model.fit(X, y, epochs=5, verbose=0)

    save_model(model, os.path.join(model_dir, "task5_timeseries_lstm.keras"))
    print("✅ Task 5 saved")
except Exception as e:
    print("❌ Task 5 error:", e)

# --------------------------
# TASK 6 — CNN Package Model
# --------------------------
try:
    # Load dummy images from previous setup
    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    base_dir = "package_images"
    datagen = ImageDataGenerator(rescale=1.0/255.0, validation_split=0.2)
    train_gen = datagen.flow_from_directory(base_dir, target_size=(150,150), batch_size=32, subset='training')
    val_gen = datagen.flow_from_directory(base_dir, target_size=(150,150), batch_size=32, subset='validation')

    cnn_model = Sequential([
        tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150,150,3)),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
        tf.keras.layers.MaxPooling2D(2,2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    cnn_model.fit(train_gen, epochs=1, validation_data=val_gen)
    save_model(cnn_model, os.path.join(model_dir, "task6_package_cnn.keras"))
    print("✅ Task 6 saved")
except Exception as e:
    print("❌ Task 6 error:", e)

# --------------------------
# TASK 7 — Autoencoder
# --------------------------
try:
    df = pd.read_csv("task7_anomaly_dataset.csv")
    dims = df["Dimensions"].str.split("x", expand=True).astype(float)
    df["DimVolume"] = dims[0] * dims[1] * dims[2]
    X = df[["Weight", "Volume", "DimVolume"]].values
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    input_layer = Input(shape=(3,))
    encoded = Dense(2, activation="relu")(input_layer)
    decoded = Dense(3, activation="sigmoid")(encoded)
    autoencoder = tf.keras.Model(input_layer, decoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    autoencoder.fit(X_scaled, X_scaled, epochs=5, verbose=0)

    autoencoder.save(os.path.join(model_dir, "task7_anomaly_autoencoder.keras"))
    print("✅ Task 7 saved")
except Exception as e:
    print("❌ Task 7 error:", e)

# --------------------------
# TASK 8 — Maintenance Classifier
# --------------------------
try:
    df = pd.read_csv("task8_maintenance_dataset.csv")
    X = df.drop("BreakdownFlag", axis=1)
    y = df["BreakdownFlag"]
    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, stratify=y)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)

    clf_model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    clf_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    clf_model.fit(X_train, y_train, epochs=5, verbose=0)

    save_model(clf_model, os.path.join(model_dir, "task8_maintenance_classifier.keras"))
    print("✅ Task 8 saved")
except Exception as e:
    print("❌ Task 8 error:", e)


📦 Saving models to: C:/Users/USER PC/Downloads/heavyhaul/models


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


✅ Task 3 saved
✅ Task 4 saved


  super().__init__(**kwargs)


✅ Task 5 saved
Found 8001 images belonging to 2 classes.
Found 1999 images belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


❌ Task 6 error: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 2), output.shape=(None, 1)
✅ Task 7 saved
✅ Task 8 saved


In [126]:
import pandas as pd
import numpy as np
import os
from PIL import Image
import shutil
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# STEP 1: Rebuild Label File (if missing)
df = pd.read_csv("shipment_logistics_dataset.csv")

# Ensure labels are only "OK" and "Damaged"
df = df[df["ImageLabel"].isin(["OK", "Damaged"])]
df[["PackageImage", "ImageLabel"]].to_csv("task6_image_labels_dataset.csv", index=False)

# STEP 2: Prepare Image Directory
label_df = pd.read_csv("task6_image_labels_dataset.csv")
base_dir = "package_images"

if os.path.exists(base_dir):
    shutil.rmtree(base_dir)

for label in ["OK", "Damaged"]:
    os.makedirs(os.path.join(base_dir, label), exist_ok=True)

# Generate dummy images
for _, row in label_df.iterrows():
    label = row["ImageLabel"]
    name = row["PackageImage"]
    img_path = os.path.join(base_dir, label, name)
    if not os.path.exists(img_path):
        img = Image.fromarray(np.random.randint(0, 255, (150, 150, 3), dtype=np.uint8))
        img.save(img_path)

# STEP 3: Data Generator (✅ with class_mode='binary')
datagen = ImageDataGenerator(rescale=1.0/255.0, validation_split=0.2)

train_gen = datagen.flow_from_directory(
    base_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode="binary",  # ✅ binary for sigmoid output
    subset="training",
    shuffle=True
)

val_gen = datagen.flow_from_directory(
    base_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode="binary",  # ✅ must match model's final layer
    subset="validation"
)

# STEP 4: CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')  # ✅ binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# STEP 5: Train
model.fit(train_gen, epochs=5, validation_data=val_gen)

# STEP 6: Save
model.save("models/task6_package_cnn.keras")
print("✅ Task 6 model saved")


Found 8001 images belonging to 2 classes.
Found 1999 images belonging to 2 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m284s[0m 1s/step - accuracy: 0.4942 - loss: 0.7585 - val_accuracy: 0.5038 - val_loss: 0.6931
Epoch 2/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 710ms/step - accuracy: 0.4982 - loss: 0.6932 - val_accuracy: 0.5038 - val_loss: 0.6931
Epoch 3/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 707ms/step - accuracy: 0.5080 - loss: 0.6931 - val_accuracy: 0.5038 - val_loss: 0.6931
Epoch 4/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 675ms/step - accuracy: 0.4908 - loss: 0.6932 - val_accuracy: 0.5038 - val_loss: 0.6931
Epoch 5/5
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 716ms/step - accuracy: 0.5117 - loss: 0.6930 - val_accuracy: 0.5038 - val_loss: 0.6931
✅ Task 6 model saved


In [128]:
import os
print(os.getcwd())


C:\Users\USER PC


In [134]:
# project_dir = os.path.join("C:", "Users", "USER PC", "Downloads", "heavyhaul")
# model_path = os.path.join(project_dir, "models", "task6_package_cnn.keras")
# model.save(model_path)


In [132]:
import os

project_dir = os.path.join("C:\\", "Users", "USER PC", "Downloads", "heavyhaul", "models")
os.makedirs(project_dir, exist_ok=True)  # Create the folder if it doesn't exist

model_path = os.path.join(project_dir, "task6_package_cnn.keras")
model.save(model_path)
print("✅ Task 6 model saved")


✅ Task 6 model saved


In [136]:
import pandas as pd

# Load the main dataset
df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate", "ActualDeliveryDate"])
df["DeliveryTimeDays"] = (df["ActualDeliveryDate"] - df["ActualShipDate"]).dt.days

# Aggregate to daily averages
daily_df = df[["ActualShipDate", "DeliveryTimeDays"]].dropna()
daily_avg = daily_df.groupby("ActualShipDate").mean().reset_index()
daily_avg.rename(columns={"DeliveryTimeDays": "AvgDeliveryTime"}, inplace=True)

# Save the dataset
daily_avg.to_csv("task5_timeseries_dataset.csv", index=False)
print("✅ Saved: task5_timeseries_dataset.csv")


✅ Saved: task5_timeseries_dataset.csv


In [138]:
import pandas as pd

# Load the full dataset
df = pd.read_csv("shipment_logistics_dataset.csv", parse_dates=["ActualShipDate", "ActualDeliveryDate"])

# Calculate delivery duration
df["DeliveryTimeDays"] = (df["ActualDeliveryDate"] - df["ActualShipDate"]).dt.days

# Group by ship date to get daily average delivery time
daily_avg = df[["ActualShipDate", "DeliveryTimeDays"]].dropna()
daily_avg = daily_avg.groupby("ActualShipDate").mean().reset_index()
daily_avg.rename(columns={"DeliveryTimeDays": "AvgDeliveryTime"}, inplace=True)

# Save to file (make sure it's in the same directory as your app script)
daily_avg.to_csv("task5_timeseries_dataset.csv", index=False)
print("✅ Saved: task5_timeseries_dataset.csv")


✅ Saved: task5_timeseries_dataset.csv


In [154]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import joblib
import os

# Load dataset
df7 = pd.read_csv("task7_anomaly_dataset.csv")

# Recompute DimVolume
dims = df7["Dimensions"].str.split("x", expand=True).astype(float)
dims.columns = ["L", "W", "H"]
df7["DimVolume"] = dims["L"] * dims["W"] * dims["H"]

# Select features
X7 = df7[["Weight", "Volume", "DimVolume"]].values

# Fit scaler
scaler7 = StandardScaler()
scaler7.fit(X7)
X_scaled = scaler.fit_transform(X)


# Save scaler
os.makedirs("models", exist_ok=True)
joblib.dump(scaler7, "models/task7_scaler.joblib")
print("✅ Saved: models/task7_scaler.joblib")


✅ Saved: models/task7_scaler.joblib


In [150]:
# # Load dataset
# df8 = pd.read_csv("task8_maintenance_dataset.csv")

# # Drop target column
# X8 = df8.drop("BreakdownFlag", axis=1)

# # Fit scaler
# scaler8 = StandardScaler()
# scaler8.fit(X8)
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)
# # Save scaler
# joblib.dump(scaler8, "models/task8_scaler.joblib")
# print("✅ Saved: models/task8_scaler.joblib")


In [144]:
import os
print(os.getcwd())


C:\Users\USER PC


In [148]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load your Task 8 dataset
df = pd.read_csv("task8_maintenance_dataset.csv")

# Features and target
X8 = df.drop("BreakdownFlag", axis=1)
y8 = df["BreakdownFlag"]

# Split the data
X_train8, X_test8, y_train8, y_test8 = train_test_split(X8, y8, test_size=0.2, stratify=y8, random_state=42)

# Fit scaler on Task 8 data
scaler8 = StandardScaler()
X_train_scaled8 = scaler8.fit_transform(X_train8)
X_test_scaled8 = scaler8.transform(X_test8)

# ✅ Save the Task 8 scaler
joblib.dump(scaler8, "models/task8_scaler.joblib")
print("✅ Task 8 scaler saved")


✅ Task 8 scaler saved
