In [1]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
from typing import List, Tuple, Dict
import torch
from torch.utils.data import Dataset, DataLoader
import re
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import os
from torchsummary import summary
from torchinfo import summary
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
import wandb
import torch.nn.functional as F
import hashlib
from typing import Dict, Tuple
import random

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Device:  cuda


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [106]:
# Configuration Dictionary
config = {
    'batch_size': 64,
    'lr': 0.001,
    'epochs': 10,
    'input_dim': 9,
    'num_classes': 7,
    'hidden_dim': 512,
    'num_blocks': 3,
    'checkpoint_dir': "/content/drive/MyDrive/IDL/Checkpoint",
    'device': 'cuda' if torch.cuda.is_available() else 'cpu'
}

In [107]:
# Define category mapping
CATEGORIES = {
    'Blueball': 0,
    'Box': 1,
    'Pencilcase': 2,
    'Pinkball': 3,
    'StuffedAnimal': 4,
    'Tennis': 5,
    'Waterbottle': 6,
}

In [108]:
# Path to the folder containing the dataset files
folder_path = "/content/drive/MyDrive/IDL/IDL_Project"

In [109]:
# Stats trackers
total_count = 0
kept_count = 0
valid_file_count = 0
skipped_due_to_missing_waypoints = 0

In [110]:
WAYPOINTS = [
    (30, -30), (30, 30), (15, -30), (15, 30),
    (0, -30), (0, 30), (-15, -30), (-15, 30),
    (-30, -30), (-30, 30), (-30, -30), (30, -30),
    (30, 30), (-30, 30)
]


In [111]:
# Step 1: Load and label dataset
def load_and_label_file(file_path, file_name):
    global total_count
    category = next((key for key in CATEGORIES if key in file_name), None)
    if category is None:
        return pd.DataFrame()

    data = []
    with open(file_path, "r") as f:
        for line in f:
            parts = line.strip().split(',')
            if len(parts) == 10:
                try:
                    timestamp = parts[0]
                    microsec = int(parts[1])
                    x = float(parts[2])
                    y = float(parts[3])
                    x_target = float(parts[4])
                    y_target = float(parts[5])
                    pwm1 = int(parts[6])
                    pwm2 = int(parts[7])
                    pwm3 = int(parts[8])
                    pwm4 = int(parts[9])
                    total_count += 1

                    data.append([
                        timestamp, microsec, x, y, x_target, y_target,
                        pwm1, pwm2, pwm3, pwm4, category, CATEGORIES[category]
                    ])
                except ValueError:
                    continue

    return pd.DataFrame(data, columns=[
        "timestamp", "microseconds", "x", "y", "x_target", "y_target",
        "pwm1", "pwm2", "pwm3", "pwm4", "category", "label"
    ])

In [112]:
# Step 2: Assign sequential waypoint numbers
def assign_sequential_waypoints(df, tol=1.0):
    df = df.reset_index(drop=True)
    wp_index = 0
    assigned_wp = []

    for i in range(len(df)):
        x_t, y_t = df.loc[i, "x_target"], df.loc[i, "y_target"]
        current_expected = WAYPOINTS[wp_index]

        if np.isclose(x_t, current_expected[0], atol=tol) and np.isclose(y_t, current_expected[1], atol=tol):
            assigned_wp.append(wp_index)
        else:
            if wp_index + 1 < len(WAYPOINTS):
                next_expected = WAYPOINTS[wp_index + 1]
                if np.isclose(x_t, next_expected[0], atol=tol) and np.isclose(y_t, next_expected[1], atol=tol):
                    wp_index += 1
                    assigned_wp.append(wp_index)
                else:
                    assigned_wp.append(wp_index)
            else:
                assigned_wp.append(wp_index)

    df["waypoint_number"] = assigned_wp
    return df

In [113]:
# Step 3: Filter out rows where y <= 0
def filter_by_y(df):
    global kept_count
    filtered = df[df["y"] > 0].reset_index(drop=True)
    kept_count += len(filtered)
    return filtered

In [114]:
def process_file(file_path, file_name):
    global valid_file_count

    # Step 1: Load and label
    df = load_and_label_file(file_path, file_name)
    if df.empty:
        return pd.DataFrame()

    # Step 2: Assign waypoint numbers
    df = assign_sequential_waypoints(df)

    # 📌 Show how many waypoints existed before filtering
    waypoint_count_before = df["waypoint_number"].nunique()
    print(f"\n📌 {file_name} → {waypoint_count_before} waypoints BEFORE filtering")

    # Step 3: Filter out rows where y ≤ 0
    df = filter_by_y(df)

    # 📌 Show how many remain after filtering
    waypoint_count_after = df["waypoint_number"].nunique()
    print(f"📌 {file_name} → {waypoint_count_after} waypoints AFTER filtering")

    # Count as valid if any data was kept
    if not df.empty:
        valid_file_count += 1

    return df

In [115]:
# # Step 5: Process all .txt files
# all_data = pd.DataFrame()

# for file_name in os.listdir(folder_path):
#     if not file_name.endswith(".txt") or file_name.startswith("."):
#         continue
#     file_path = os.path.join(folder_path, file_name)
#     df = process_file(file_path, file_name)
#     if not df.empty:
#         all_data = pd.concat([all_data, df], ignore_index=True)

# # Step 6: Summary
# print("\n📄 Summary:")
# print(f"Total files scanned: {len([f for f in os.listdir(folder_path) if f.endswith('.txt')])}")
# print(f"✅ Files with 14 valid waypoints: {valid_file_count}")
# print(f"⚠️ Skipped due to missing waypoints: {skipped_due_to_missing_waypoints}")
# print(f"📊 Data points before filtering: {total_count}")
# print(f"✅ Data points after filtering: {kept_count}")
# print(f"🚫 Dropped data points: {total_count - kept_count}")

In [116]:
# print(all_data.head(1350))

In [117]:
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import pandas as pd
import numpy as np

In [118]:
# class ObjectSensorDataset(Dataset):
#     def __init__(self, df):
#         features = df[["x", "y", "x_target", "y_target", "pwm1", "pwm2", "pwm3", "pwm4", "microseconds"]].values
#         labels = df["label"].values

#         self.X = torch.tensor(features, dtype=torch.float32)
#         self.y = torch.tensor(labels, dtype=torch.long)

#     def __len__(self):
#         return len(self.y)

#     def __getitem__(self, idx):
#         return self.X[idx], self.y[idx]

In [119]:
# # Normalize features before split
# features_to_scale = ["x", "y", "x_target", "y_target", "pwm1", "pwm2", "pwm3", "pwm4", "microseconds"]
# scaler = StandardScaler()
# all_data[features_to_scale] = scaler.fit_transform(all_data[features_to_scale])

# # 🧪 Split into train/val/test with stratified sampling
# train_df, temp_df = train_test_split(
#     all_data, test_size=0.3, stratify=all_data["label"], random_state=42
# )
# val_df, test_df = train_test_split(
#     temp_df, test_size=0.5, stratify=temp_df["label"], random_state=42
# )

# # 📦 Create datasets
# train_dataset = ObjectSensorDataset(train_df)
# val_dataset = ObjectSensorDataset(val_df)
# test_dataset = ObjectSensorDataset(test_df)

In [181]:
from collections import defaultdict, Counter

def build_datasets(data_dir: str):
    data_dir = Path(data_dir)
    file_paths = list(data_dir.glob("*.txt"))
    random.seed(42)

    # 1. Group files by object class
    class_to_files = defaultdict(list)
    for file_path in file_paths:
        for class_name in CATEGORIES:
            if class_name in file_path.name:
                class_to_files[class_name].append(file_path)
                break

    # 2. Stratified split (each class in train/val/test)
    train_files, val_files, test_files = [], [], []
    for class_name, files in class_to_files.items():
        random.shuffle(files)
        n = len(files)
        train_split = int(0.65 * n)
        val_split = int(0.85 * n)
        train_files += files[:train_split]
        val_files += files[train_split:val_split]
        test_files += files[val_split:]

    print("🔍 Per-class file counts:")
    for cls in CATEGORIES:
        print(f"  {cls:<15} → {len(class_to_files[cls])} total files")

    print("\n✅ Final split file counts:")
    print(f"Train: {len(train_files)}")
    print(f"Val:   {len(val_files)}")
    print(f"Test:  {len(test_files)}")

    # 3. Process each split
    def process_file_list(file_list):
        dfs = []
        for fp in file_list:
            df = process_file(fp, fp.name)
            if not df.empty:
                dfs.append(df)
        return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()

    print("Processing train files")
    train_df = process_file_list(train_files)
    print("Processing validation files")
    val_df = process_file_list(val_files)
    print("Processing test files")
    test_df = process_file_list(test_files)

    return train_df, val_df, test_df


# 📦 Run everything
train_df, val_df, test_df = build_datasets(folder_path)

🔍 Per-class file counts:
  Blueball        → 30 total files
  Box             → 30 total files
  Pencilcase      → 10 total files
  Pinkball        → 30 total files
  StuffedAnimal   → 30 total files
  Tennis          → 30 total files
  Waterbottle     → 29 total files

✅ Final split file counts:
Train: 119
Val:   38
Test:  32
Processing train files

📌 Tennis18.txt → 14 waypoints BEFORE filtering
📌 Tennis18.txt → 12 waypoints AFTER filtering

📌 Tennis6.txt → 14 waypoints BEFORE filtering
📌 Tennis6.txt → 12 waypoints AFTER filtering

📌 Tennis16.txt → 14 waypoints BEFORE filtering
📌 Tennis16.txt → 13 waypoints AFTER filtering

📌 Tennis22.txt → 14 waypoints BEFORE filtering
📌 Tennis22.txt → 12 waypoints AFTER filtering

📌 Tennis28.txt → 14 waypoints BEFORE filtering
📌 Tennis28.txt → 12 waypoints AFTER filtering

📌 Tennis9.txt → 14 waypoints BEFORE filtering
📌 Tennis9.txt → 12 waypoints AFTER filtering

📌 Tennis29.txt → 14 waypoints BEFORE filtering
📌 Tennis29.txt → 12 waypoints AFTER filt




📌 Tennis2.txt → 14 waypoints BEFORE filtering
📌 Tennis2.txt → 13 waypoints AFTER filtering

📌 Tennis1.txt → 14 waypoints BEFORE filtering
📌 Tennis1.txt → 13 waypoints AFTER filtering

📌 Box14.txt → 14 waypoints BEFORE filtering
📌 Box14.txt → 12 waypoints AFTER filtering

📌 Box1.txt → 14 waypoints BEFORE filtering
📌 Box1.txt → 12 waypoints AFTER filtering

📌 Box30.txt → 14 waypoints BEFORE filtering
📌 Box30.txt → 12 waypoints AFTER filtering

📌 Box11.txt → 14 waypoints BEFORE filtering
📌 Box11.txt → 12 waypoints AFTER filtering

📌 Box22.txt → 14 waypoints BEFORE filtering
📌 Box22.txt → 12 waypoints AFTER filtering

📌 Box21.txt → 14 waypoints BEFORE filtering
📌 Box21.txt → 12 waypoints AFTER filtering

📌 Box27.txt → 14 waypoints BEFORE filtering
📌 Box27.txt → 12 waypoints AFTER filtering

📌 Box8.txt → 14 waypoints BEFORE filtering
📌 Box8.txt → 12 waypoints AFTER filtering

📌 Box23.txt → 14 waypoints BEFORE filtering
📌 Box23.txt → 12 waypoints AFTER filtering

📌 Box2.txt → 14 waypoints B

In [201]:
class WindowedDataset(torch.utils.data.Dataset):
    def __init__(self, df: pd.DataFrame, seq_len: int = 10):
        self.seq_len = seq_len
        self.df = df.reset_index(drop=True)

        # Select feature columns (change as needed)
        self.features = self.df[[
            "x", "y", "x_target", "y_target", "pwm1", "pwm2", "pwm3", "pwm4", "waypoint_number"
        ]].values.astype(np.float32)

        # Label per row (you can change this to majority/last of the window)
        self.labels = self.df["label"].values.astype(np.int64)

    def __len__(self):
        return len(self.df) - self.seq_len + 1

    def __getitem__(self, idx):
        x = self.features[idx:idx + self.seq_len]  # (seq_len, input_dim)
        y = self.labels[idx + self.seq_len - 1]
        x_tensor = torch.tensor(x, dtype=torch.float32)  # ✅ explicitly float32
        y_tensor = torch.tensor(y, dtype=torch.long)
        return x_tensor, y_tensor


seq_len = 2000  # adjust as needed

train_dataset = WindowedDataset(train_df, seq_len=seq_len)
val_dataset = WindowedDataset(val_df, seq_len=seq_len)
test_dataset = WindowedDataset(test_df, seq_len=seq_len)

batch_size = config["batch_size"]

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [202]:
# # 📦 Create DataLoaders
# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# for X_batch, y_batch in tqdm(train_loader, desc="🔁 Training"):
#     # Simulate training step
#     pass

# for X_batch, y_batch in tqdm(val_loader, desc="🔍 Validating"):
#     # Simulate validation step
#     pass

# for X_batch, y_batch in tqdm(test_loader, desc="🧪 Testing"):
#     # Simulate test step
#     pass

# # 📊 Summary of dataset sizes
# print("\n📊 Dataset Sizes:")
# print(f"🧠 Training set: {len(train_dataset)} samples")
# print(f"🧪 Validation set: {len(val_dataset)} samples")
# print(f"🧾 Test set: {len(test_dataset)} samples")

In [203]:
# import torch
# import torch.nn as nn
# from torchinfo import summary

# class TimeDistributed(nn.Module):
#     """Applies a module over the time dimension (B, T, F) → applies to each T."""
#     def __init__(self, module):
#         super(TimeDistributed, self).__init__()
#         self.module = module

#     def forward(self, x):
#         # x: (B, T, F)
#         B, T, F = x.size()
#         x = x.contiguous().view(B * T, F)
#         x = self.module(x)
#         x = x.view(B, T, -1)
#         return x

# class ResidualBlock(nn.Module):
#     def __init__(self, dim):
#         super(ResidualBlock, self).__init__()
#         self.linear1 = TimeDistributed(nn.Linear(dim, dim))
#         self.bn1 = TimeDistributed(nn.BatchNorm1d(dim))
#         self.act1 = nn.ReLU()
#         self.drop1 = nn.Dropout(0.5)

#         self.linear2 = TimeDistributed(nn.Linear(dim, dim))
#         self.bn2 = TimeDistributed(nn.BatchNorm1d(dim))

#         self.relu = nn.ReLU(inplace=True)

#     def forward(self, x):
#         identity = x
#         out = self.linear1(x)
#         out = self.bn1(out)
#         out = self.act1(out)
#         out = self.drop1(out)

#         out = self.linear2(out)
#         out = self.bn2(out)

#         out += identity
#         return self.relu(out)

# class ResidualMLPClassifier(nn.Module):
#     def __init__(self, input_dim, num_classes, hidden_dim=2048, num_blocks=10):
#         super(ResidualMLPClassifier, self).__init__()

#         self.input_layer = nn.Sequential(
#             TimeDistributed(nn.Linear(input_dim, hidden_dim)),
#             TimeDistributed(nn.BatchNorm1d(hidden_dim)),
#             nn.ReLU(),
#             nn.Dropout(0.5),
#         )

#         self.res_blocks = nn.Sequential(*[ResidualBlock(hidden_dim) for _ in range(num_blocks)])

#         self.classifier = nn.Sequential(
#             nn.Dropout(0.5),
#             TimeDistributed(nn.Linear(hidden_dim, 1024)),
#             TimeDistributed(nn.BatchNorm1d(1024)),
#             nn.ReLU(),
#             nn.Dropout(0.5),
#             TimeDistributed(nn.Linear(1024, 512)),
#             TimeDistributed(nn.BatchNorm1d(512)),
#             nn.ReLU(),
#             nn.Dropout(0.5),
#             TimeDistributed(nn.Linear(512, 256)),
#             TimeDistributed(nn.BatchNorm1d(256)),
#             nn.ReLU(),
#             TimeDistributed(nn.Linear(256, num_classes)),
#         )

#         self.softmax = nn.Softmax(dim=2)

#     def forward(self, x):
#         x = self.input_layer(x)
#         x = self.res_blocks(x)
#         feats = x
#         out = self.classifier(x)
#         out = self.softmax(out)
#         out = out[:, -1, :]
#         return {"feats": feats, "out": out}


In [204]:
# import torch
# import torch.nn as nn

# class ResidualBlock(nn.Module):
#     def __init__(self, dim):
#         super(ResidualBlock, self).__init__()
#         self.block = nn.Sequential(
#             nn.Linear(dim, dim),
#             nn.BatchNorm1d(dim),
#             nn.GELU(),
#             nn.Dropout(0.5),
#             nn.Linear(dim, dim),
#             nn.BatchNorm1d(dim),
#         )
#         self.relu = nn.ReLU(inplace=True)

#     def forward(self, x):
#         identity = x
#         out = self.block(x)
#         out += identity
#         return self.relu(out)


# class ResidualMLPClassifier(nn.Module):
#     def __init__(self, input_dim, num_classes, hidden_dim=2048, num_blocks=6):
#         super(ResidualMLPClassifier, self).__init__()
#         self.input_layer = nn.Sequential(
#             nn.Linear(input_dim, hidden_dim),
#             nn.BatchNorm1d(hidden_dim),
#             nn.GELU(),
#             nn.Dropout(0.5),
#         )

#         self.res_blocks = nn.Sequential(*[ResidualBlock(hidden_dim) for _ in range(num_blocks)])

#         self.classifier = nn.Sequential(
#             nn.Dropout(0.5),
#             nn.Linear(hidden_dim, 512),
#             nn.BatchNorm1d(512),
#             nn.GELU(),
#             nn.Linear(512, num_classes),
#             nn.Softmax(dim=1)
#         )

#     def forward(self, x):
#         x = self.input_layer(x)
#         x = self.res_blocks(x)
#         feats = x
#         out = self.classifier(x)
#         return {"feats": feats, "out": out}

# model = ResidualMLPClassifier(input_dim=9, num_classes=7, hidden_dim = config['hidden_dim'], num_blocks = config['num_blocks']).to(config['device'])
# summary(model, input_size=(64, 9))  # for batch size 64

In [205]:
# import torch
# import torch.nn as nn

# class RNNClassifier(nn.Module):
#     def __init__(self, input_dim, num_classes, hidden_dim=256, num_layers=2, dropout=0.5, rnn_type="lstm"):
#         super(RNNClassifier, self).__init__()

#         self.rnn_type = rnn_type.lower()
#         rnn_cls = {
#             "lstm": nn.LSTM,
#             "gru": nn.GRU,
#             "rnn": nn.RNN
#         }[self.rnn_type]

#         self.rnn = rnn_cls(
#             input_size=input_dim,
#             hidden_size=hidden_dim,
#             num_layers=num_layers,
#             batch_first=True,
#             dropout=dropout if num_layers > 1 else 0,
#             bidirectional=False
#         )

#         self.classifier = nn.Sequential(
#             nn.Linear(hidden_dim, 256),
#             nn.ReLU(),
#             nn.Dropout(dropout),
#             nn.Linear(256, 128),
#             nn.ReLU(),
#             nn.Dropout(dropout),
#             nn.Linear(128, num_classes)
#         )

#         self.softmax = nn.Softmax(dim=1)

#     def forward(self, x):
#         # x: (B, T, F)
#         out, _ = self.rnn(x)  # out: (B, T, H)
#         last_out = out[:, -1, :]  # last timestep (B, H)
#         logits = self.classifier(last_out)
#         probs = self.softmax(logits)
#         return {"feats": last_out, "out": probs}

# model = RNNClassifier(
#     input_dim=9,
#     num_classes=7,
#     hidden_dim=config['hidden_dim'],
#     num_layers=config['num_blocks'],  # or another config param
#     dropout=0.5,
#     rnn_type="lstm"
# ).to(config['device'])

# # For summary:
# from torchinfo import summary
# summary(model, input_data=torch.zeros(64, 30, 9).to(config['device']))

In [206]:
import torch
import torch.nn as nn

class CNNClassifier(nn.Module):
    def __init__(self, input_dim, num_classes, dropout=0.5):
        super(CNNClassifier, self).__init__()

        self.conv_layers = nn.Sequential(
            nn.Conv1d(in_channels=input_dim, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Conv1d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.AdaptiveAvgPool1d(output_size=1)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),  # from (B, 256, 1) to (B, 256)
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, num_classes)
        )

        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        # x: (B, T, F) -> (B, F, T) for Conv1d
        x = x.permute(0, 2, 1)
        feats = self.conv_layers(x)  # (B, 256, 1)
        logits = self.classifier(feats)  # (B, num_classes)
        probs = self.softmax(logits)
        return {"feats": feats.squeeze(-1), "out": probs}

# Model initialization
model = CNNClassifier(
    input_dim=9,
    num_classes=7,
    dropout=0.5
).to(config['device'])

# For summary
from torchinfo import summary
summary(model, input_data=torch.zeros(64, 30, 9).to(config['device']))


Layer (type:depth-idx)                   Output Shape              Param #
CNNClassifier                            [64, 7]                   --
├─Sequential: 1-1                        [64, 256, 1]              --
│    └─Conv1d: 2-1                       [64, 64, 30]              1,792
│    └─ReLU: 2-2                         [64, 64, 30]              --
│    └─BatchNorm1d: 2-3                  [64, 64, 30]              128
│    └─Conv1d: 2-4                       [64, 128, 30]             24,704
│    └─ReLU: 2-5                         [64, 128, 30]             --
│    └─BatchNorm1d: 2-6                  [64, 128, 30]             256
│    └─Conv1d: 2-7                       [64, 256, 30]             98,560
│    └─ReLU: 2-8                         [64, 256, 30]             --
│    └─BatchNorm1d: 2-9                  [64, 256, 30]             512
│    └─AdaptiveAvgPool1d: 2-10           [64, 256, 1]              --
├─Sequential: 1-2                        [64, 7]                   --
│

In [207]:
class AverageMeter:
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [208]:
def accuracy(output, target, topk=(1,)):
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [209]:
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    loss_m = AverageMeter()
    acc_m = AverageMeter()
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train')

    for i, data in enumerate(train_loader):
        optimizer.zero_grad()
        x, y = data
        x, y = x.to(device), y.to(device)
        outputs = model(x)
        loss = criterion(outputs['out'], y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        acc = accuracy(outputs['out'], y)[0].item()
        loss_m.update(loss.item())
        acc_m.update(acc)

        batch_bar.set_postfix(
            loss="{:.04f}".format(float(loss_m.avg)),
            acc="{:.04f}%".format(float(acc_m.avg)),
            lr="{:.06f}".format(float(optimizer.param_groups[0]['lr']))
        )
        batch_bar.update()

        del x, y, outputs, loss
        torch.cuda.empty_cache()

    batch_bar.close()
    return loss_m.avg, acc_m.avg

In [210]:
@torch.no_grad()
def validate_model(model, val_loader, criterion, class_names, device):
    model.eval()
    loss_m = AverageMeter()
    acc_m = AverageMeter()
    batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')

    all_preds = []
    all_targets = []

    for i, data in enumerate(val_loader):
        x, y = data
        x, y = x.to(device), y.to(device)
        outputs = model(x)
        loss = criterion(outputs['out'], y)

        acc = accuracy(outputs['out'], y)[0].item()

        _, predicted = torch.max(outputs['out'], 1)
        all_preds.extend(predicted.cpu().numpy())
        all_targets.extend(y.cpu().numpy())

        loss_m.update(loss.item())
        acc_m.update(acc)

        batch_bar.set_postfix(
            loss="{:.04f}".format(float(loss_m.avg)),
            acc="{:.04f}%".format(float(acc_m.avg))
        )
        batch_bar.update()

        del x, y, outputs, loss
        torch.cuda.empty_cache()

    batch_bar.close()

    if class_names:
        print("\nPer-class Validation Accuracy:")
        per_class_acc = {}
        for i, class_name in enumerate(class_names):
            class_mask = (np.array(all_targets) == i)
            if np.sum(class_mask) > 0:
                class_correct = np.sum((np.array(all_preds)[class_mask] == i))
                class_total = np.sum(class_mask)
                acc_percent = 100 * class_correct / class_total
                print(f"  {class_name}: {acc_percent:.4f}% ({class_correct}/{class_total})")
                per_class_acc[f"val_acc_{class_name}"] = acc_percent

    return loss_m.avg, acc_m.avg

In [211]:
def save_model(model, optimizer, scheduler, metrics, epoch, path):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
        'metrics': metrics
    }, path)

In [212]:
# Define CrossEntropyLoss as the criterion
criterion = nn.CrossEntropyLoss(
    label_smoothing=0.1
)

# Initialize optimizer with AdamW
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=config['lr'],
    weight_decay=1e-4
)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,
    patience=3,
    min_lr=1e-6,
    verbose=True
)



In [213]:
import wandb

# Intialize wandb
wandb.login(key="e768e7128db26d1cd8476eda26e045d5bf6b390d") # API Key is in your wandb account, under settings (wandb.ai/settings)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [214]:
run = wandb.init(
    name = "27run", ## Wandb creates random run names if you skip this field
    reinit = False, ### Allows reinitalizing runs when you re-run this cell
    #id = "", ### Insert specific run id here if you want to resume a previous run
    # resume = "must" ### You need this to resume previous runs, but comment out reinit = True when using this
    project = "object_classification", ### Project should be created in your wandb account
    config = config ### Wandb Config for your run
)

In [215]:
# Training Loop
best_val_loss = float('inf')
best_val_acc = 0
class_names = list(CATEGORIES.keys())

for epoch in range(config['epochs']):
    print(f"\nEpoch {epoch + 1}/{config['epochs']}")

    train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, config['device'])
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%")

    val_loss, val_acc = validate_model(model, val_loader, criterion, class_names, config['device'])
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%")

    scheduler.step(val_loss)
    curr_lr = optimizer.param_groups[0]['lr']

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_val_acc = val_acc
        best_model_path = os.path.join(config['checkpoint_dir'], 'best_model.pth')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': val_loss,
            'val_acc': val_acc,
        }, best_model_path)
        wandb.save(best_model_path)
        print(f"Saved best model with validation loss: {best_val_loss:.4f} and accuracy: {best_val_acc:.2f}%")

    last_model_path = os.path.join(config['checkpoint_dir'], f'model_epoch_{epoch+1}.pth')
    torch.save(model.state_dict(), last_model_path)
    wandb.save(last_model_path)
    print(f"Saved model for epoch {epoch+1}")

    wandb.log({
        'epoch': epoch + 1,
        'train_loss': train_loss,
        'train_acc': train_acc,
        'val_loss': val_loss,
        'val_acc': val_acc,
        'learning_rate': curr_lr
    }, step=epoch)

    print(f"End of Epoch {epoch+1}/{config['epochs']}")

print(f"\nTraining complete! Best validation accuracy: {best_val_acc:.2f}%")


Epoch 1/10




Train Loss: 1.3648, Train Accuracy: 87.91%





Per-class Validation Accuracy:
  Blueball: 44.7935% (10552/23557)
  Box: 53.9198% (12731/23611)
  Pencilcase: 17.6764% (1345/7609)
  Pinkball: 56.5106% (13458/23815)
  StuffedAnimal: 34.4676% (9552/27713)
  Tennis: 45.6139% (10800/23677)
  Waterbottle: 25.7371% (6346/24657)
Validation Loss: 1.7679, Validation Accuracy: 41.91%
Saved best model with validation loss: 1.7679 and accuracy: 41.91%
Saved model for epoch 1
End of Epoch 1/10

Epoch 2/10




Train Loss: 1.2577, Train Accuracy: 99.37%





Per-class Validation Accuracy:
  Blueball: 63.8621% (15044/23557)
  Box: 55.3937% (13079/23611)
  Pencilcase: 3.7061% (282/7609)
  Pinkball: 59.5969% (14193/23815)
  StuffedAnimal: 32.9412% (9129/27713)
  Tennis: 52.2532% (12372/23677)
  Waterbottle: 38.9910% (9614/24657)
Validation Loss: 1.7167, Validation Accuracy: 47.68%
Saved best model with validation loss: 1.7167 and accuracy: 47.68%
Saved model for epoch 2
End of Epoch 2/10

Epoch 3/10




Train Loss: 1.2549, Train Accuracy: 99.63%





Per-class Validation Accuracy:
  Blueball: 63.3485% (14923/23557)
  Box: 64.3217% (15187/23611)
  Pencilcase: 28.9000% (2199/7609)
  Pinkball: 62.8385% (14965/23815)
  StuffedAnimal: 36.3440% (10072/27713)
  Tennis: 63.6778% (15077/23677)
  Waterbottle: 33.9498% (8371/24657)
Validation Loss: 1.6763, Validation Accuracy: 52.26%
Saved best model with validation loss: 1.6763 and accuracy: 52.26%
Saved model for epoch 3
End of Epoch 3/10

Epoch 4/10




Train Loss: 1.2539, Train Accuracy: 99.72%





Per-class Validation Accuracy:
  Blueball: 67.0416% (15793/23557)
  Box: 46.4445% (10966/23611)
  Pencilcase: 27.1258% (2064/7609)
  Pinkball: 63.8085% (15196/23815)
  StuffedAnimal: 38.8662% (10771/27713)
  Tennis: 57.1947% (13542/23677)
  Waterbottle: 26.5239% (6540/24657)
Validation Loss: 1.7114, Validation Accuracy: 48.40%
Saved model for epoch 4
End of Epoch 4/10

Epoch 5/10




Train Loss: 1.2534, Train Accuracy: 99.77%





Per-class Validation Accuracy:
  Blueball: 63.9725% (15070/23557)
  Box: 52.1071% (12303/23611)
  Pencilcase: 21.2117% (1614/7609)
  Pinkball: 69.4268% (16534/23815)
  StuffedAnimal: 40.2699% (11160/27713)
  Tennis: 54.5424% (12914/23677)
  Waterbottle: 35.0124% (8633/24657)
Validation Loss: 1.6910, Validation Accuracy: 50.57%
Saved model for epoch 5
End of Epoch 5/10

Epoch 6/10




Train Loss: 1.2530, Train Accuracy: 99.80%





Per-class Validation Accuracy:
  Blueball: 63.7390% (15015/23557)
  Box: 50.0953% (11828/23611)
  Pencilcase: 28.5977% (2176/7609)
  Pinkball: 57.2160% (13626/23815)
  StuffedAnimal: 47.1800% (13075/27713)
  Tennis: 43.4726% (10293/23677)
  Waterbottle: 27.7933% (6853/24657)
Validation Loss: 1.7206, Validation Accuracy: 47.14%
Saved model for epoch 6
End of Epoch 6/10

Epoch 7/10


Train:   2%|▏         | 120/7473 [00:07<06:55, 17.68it/s, acc=99.8568%, loss=1.2525, lr=0.001000]

KeyboardInterrupt: 

In [216]:
@torch.no_grad()
def test_model(model, test_loader, criterion, class_names, device, checkpoint_dir=None):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_targets = []
    all_probs = []

    class_correct = {class_name: 0 for class_name in class_names}
    class_total = {class_name: 0 for class_name in class_names}

    for data in test_loader:
        inputs, targets = data
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        outputs_for_loss = outputs['out'] if isinstance(outputs, dict) and 'out' in outputs else outputs
        loss = criterion(outputs_for_loss, targets)
        test_loss += loss.item() * inputs.size(0)

        probs = torch.nn.functional.softmax(outputs_for_loss, dim=1)
        _, predicted = torch.max(outputs_for_loss, 1)

        total += targets.size(0)
        correct += (predicted == targets).sum().item()

        for i in range(targets.size(0)):
            label = targets[i].item()
            pred = predicted[i].item()
            class_name = class_names[label]
            class_total[class_name] += 1
            if pred == label:
                class_correct[class_name] += 1

        all_preds.extend(predicted.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())

    test_loss /= len(test_loader.dataset)
    test_acc = correct / total

    class_accuracy = {
        name: class_correct[name]/class_total[name] if class_total[name] > 0 else 0
        for name in class_names
    }

    print("\n" + "="*50)
    print("TEST RESULTS")
    print("="*50)
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_acc:.4f} ({correct}/{total})")
    print("\nPer-Class Accuracy:")
    for class_name in class_names:
        print(f" {class_name}: {class_accuracy[class_name]:.4f} ({class_correct[class_name]}/{class_total[class_name]})")

    return {
        'test_loss': test_loss,
        'test_accuracy': test_acc,
        'class_accuracy': class_accuracy,
        'predictions': all_preds,
        'targets': all_targets,
        'probabilities': all_probs
    }


In [217]:
best_model_path = f"{config['checkpoint_dir']}/best_model.pth"
if os.path.exists(best_model_path):
    checkpoint = torch.load(best_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    print(f"Loaded best model from epoch {checkpoint.get('epoch', 'unknown')}")

Loaded best model from epoch 2


In [218]:
test_model(model, test_loader, criterion, class_names, config['device'])


TEST RESULTS
Test Loss: 1.7716
Test Accuracy: 0.4133 (51979/125752)

Per-Class Accuracy:
 Blueball: 0.3122 (5854/18753)
 Box: 0.4030 (8761/21738)
 Pencilcase: 0.1756 (1305/7430)
 Pinkball: 0.7260 (13486/18577)
 StuffedAnimal: 0.2373 (4425/18644)
 Tennis: 0.5150 (9852/19131)
 Waterbottle: 0.3862 (8296/21479)


{'test_loss': 1.7715778912863316,
 'test_accuracy': 0.41334531458744195,
 'class_accuracy': {'Blueball': 0.3121633871913827,
  'Box': 0.4030269574017849,
  'Pencilcase': 0.1756393001345895,
  'Pinkball': 0.72595144533563,
  'StuffedAnimal': 0.23734177215189872,
  'Tennis': 0.5149756938999529,
  'Waterbottle': 0.3862377205642721},
 'predictions': [np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(2),
  np.int64(2),
  np.int64(2),
  np.int64(2),
  np.int64(2),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int64(6),
  np.int

In [None]:
run.finish()