In [1]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
from typing import List, Tuple, Dict
import torch
from torch.utils.data import Dataset, DataLoader
import re
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import os
from torchsummary import summary
from torchinfo import summary
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
import wandb
import torch.nn.functional as F
import hashlib
from typing import Dict, Tuple
import random

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Device:  cuda


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Configuration Dictionary
config = {
    'batch_size': 64,
    'lr': 0.001,
    'epochs': 10,
    'input_dim': 9,
    'num_classes': 7,
    'hidden_dim': 2048,
    'num_blocks': 6,
    'checkpoint_dir': "/content/drive/MyDrive/IDL/Checkpoint",
    'device': 'cuda' if torch.cuda.is_available() else 'cpu'
}

In [5]:
# Define category mapping
CATEGORIES = {
    'Blueball': 0,
    'Box': 1,
    'Pencilcase': 2,
    'Pinkball': 3,
    'StuffedAnimal': 4,
    'Tennis': 5,
    'Waterbottle': 6,
}

In [6]:
# Path to the folder containing the dataset files
folder_path = "/content/drive/MyDrive/IDL/IDL_Data"

In [7]:
# Stats trackers
total_count = 0
kept_count = 0
valid_file_count = 0
skipped_due_to_missing_waypoints = 0

In [8]:
WAYPOINTS = [
    (30, -30), (30, 30), (15, -30), (15, 30),
    (0, -30), (0, 30), (-15, -30), (-15, 30),
    (-30, -30), (-30, 30), (-30, -30), (30, -30),
    (30, 30), (-30, 30)
]


In [9]:
# Step 1: Load and label dataset
def load_and_label_file(file_path, file_name):
    global total_count
    category = next((key for key in CATEGORIES if key in file_name), None)
    if category is None:
        return pd.DataFrame()

    data = []
    with open(file_path, "r") as f:
        for line in f:
            parts = line.strip().split(',')
            if len(parts) == 10:
                try:
                    timestamp = parts[0]
                    microsec = int(parts[1])
                    x = float(parts[2])
                    y = float(parts[3])
                    x_target = float(parts[4])
                    y_target = float(parts[5])
                    pwm1 = int(parts[6])
                    pwm2 = int(parts[7])
                    pwm3 = int(parts[8])
                    pwm4 = int(parts[9])
                    total_count += 1

                    data.append([
                        timestamp, microsec, x, y, x_target, y_target,
                        pwm1, pwm2, pwm3, pwm4, category, CATEGORIES[category]
                    ])
                except ValueError:
                    continue

    return pd.DataFrame(data, columns=[
        "timestamp", "microseconds", "x", "y", "x_target", "y_target",
        "pwm1", "pwm2", "pwm3", "pwm4", "category", "label"
    ])

In [10]:
# Step 2: Assign sequential waypoint numbers
def assign_sequential_waypoints(df, tol=1.0):
    df = df.reset_index(drop=True)
    wp_index = 0
    assigned_wp = []

    for i in range(len(df)):
        x_t, y_t = df.loc[i, "x_target"], df.loc[i, "y_target"]
        current_expected = WAYPOINTS[wp_index]

        if np.isclose(x_t, current_expected[0], atol=tol) and np.isclose(y_t, current_expected[1], atol=tol):
            assigned_wp.append(wp_index)
        else:
            if wp_index + 1 < len(WAYPOINTS):
                next_expected = WAYPOINTS[wp_index + 1]
                if np.isclose(x_t, next_expected[0], atol=tol) and np.isclose(y_t, next_expected[1], atol=tol):
                    wp_index += 1
                    assigned_wp.append(wp_index)
                else:
                    assigned_wp.append(wp_index)
            else:
                assigned_wp.append(wp_index)

    df["waypoint_number"] = assigned_wp
    return df

In [11]:
# Step 3: Filter out rows where y <= 0
def filter_by_y(df):
    global kept_count
    filtered = df[df["y"] > 0].reset_index(drop=True)
    kept_count += len(filtered)
    return filtered

In [28]:
def process_file(file_path, file_name):
    global valid_file_count

    # Step 1: Load and label
    df = load_and_label_file(file_path, file_name)
    if df.empty:
        return pd.DataFrame()

    # Step 2: Assign waypoint numbers
    df = assign_sequential_waypoints(df)

    # 📌 Show how many waypoints existed before filtering
    waypoint_count_before = df["waypoint_number"].nunique()
    print(f"\n📌 {file_name} → {waypoint_count_before} waypoints BEFORE filtering")

    # Step 3: Filter out rows where y ≤ 0
    df = filter_by_y(df)

    # 📌 Show how many remain after filtering
    waypoint_count_after = df["waypoint_number"].nunique()
    print(f"📌 {file_name} → {waypoint_count_after} waypoints AFTER filtering")

    # Count as valid if any data was kept
    if not df.empty:
        valid_file_count += 1

    return df

In [13]:
# # Step 5: Process all .txt files
# all_data = pd.DataFrame()

# for file_name in os.listdir(folder_path):
#     if not file_name.endswith(".txt") or file_name.startswith("."):
#         continue
#     file_path = os.path.join(folder_path, file_name)
#     df = process_file(file_path, file_name)
#     if not df.empty:
#         all_data = pd.concat([all_data, df], ignore_index=True)

# # Step 6: Summary
# print("\n📄 Summary:")
# print(f"Total files scanned: {len([f for f in os.listdir(folder_path) if f.endswith('.txt')])}")
# print(f"✅ Files with 14 valid waypoints: {valid_file_count}")
# print(f"⚠️ Skipped due to missing waypoints: {skipped_due_to_missing_waypoints}")
# print(f"📊 Data points before filtering: {total_count}")
# print(f"✅ Data points after filtering: {kept_count}")
# print(f"🚫 Dropped data points: {total_count - kept_count}")


📌 StuffedAnimal18.txt → 14 waypoints BEFORE filtering
📌 StuffedAnimal18.txt → 13 waypoints AFTER filtering

📌 Waterbottle30.txt → 14 waypoints BEFORE filtering
📌 Waterbottle30.txt → 12 waypoints AFTER filtering

📌 Tennis27.txt → 14 waypoints BEFORE filtering
📌 Tennis27.txt → 12 waypoints AFTER filtering

📌 Pinkball2.txt → 14 waypoints BEFORE filtering
📌 Pinkball2.txt → 13 waypoints AFTER filtering

📌 Pinkball6.txt → 14 waypoints BEFORE filtering
📌 Pinkball6.txt → 13 waypoints AFTER filtering

📌 Blueball6.txt → 14 waypoints BEFORE filtering
📌 Blueball6.txt → 13 waypoints AFTER filtering

📌 Pinkball17.txt → 14 waypoints BEFORE filtering
📌 Pinkball17.txt → 12 waypoints AFTER filtering

📌 Tennis22.txt → 14 waypoints BEFORE filtering
📌 Tennis22.txt → 12 waypoints AFTER filtering

📌 Waterbottle2.txt → 14 waypoints BEFORE filtering
📌 Waterbottle2.txt → 13 waypoints AFTER filtering

📌 Waterbottle4.txt → 14 waypoints BEFORE filtering
📌 Waterbottle4.txt → 13 waypoints AFTER filtering

📌 Waterbo

In [14]:
# print(all_data.head(1350))

                timestamp  microseconds      x      y  x_target  y_target  \
0     2025-03-28 16:36:11         89476   9.44   2.29      30.0     -30.0   
1     2025-03-28 16:36:11     338242964   9.44   2.29      30.0     -30.0   
2     2025-03-28 16:36:11     338296464   9.44   2.29      30.0     -30.0   
3     2025-03-28 16:36:12        228432   6.53   1.59      30.0     -30.0   
4     2025-03-28 16:36:12        281816   9.77   2.38      30.0     -30.0   
...                   ...           ...    ...    ...       ...       ...   
1345  2025-03-28 16:37:14      62204116  15.00  28.49      15.0      30.0   
1346  2025-03-28 16:37:14      62225968  15.00  28.49      15.0      30.0   
1347  2025-03-28 16:37:14      62247828  15.00  28.49      15.0      30.0   
1348  2025-03-28 16:37:14      62269688  15.00  28.49      15.0      30.0   
1349  2025-03-28 16:37:14      62291540  15.00  28.49      15.0      30.0   

      pwm1  pwm2  pwm3  pwm4       category  label  waypoint_number  
0    

In [15]:
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import pandas as pd
import numpy as np

In [16]:
# class ObjectSensorDataset(Dataset):
#     def __init__(self, df):
#         features = df[["x", "y", "x_target", "y_target", "pwm1", "pwm2", "pwm3", "pwm4", "microseconds"]].values
#         labels = df["label"].values

#         self.X = torch.tensor(features, dtype=torch.float32)
#         self.y = torch.tensor(labels, dtype=torch.long)

#     def __len__(self):
#         return len(self.y)

#     def __getitem__(self, idx):
#         return self.X[idx], self.y[idx]

In [None]:
# # Normalize features before split
# features_to_scale = ["x", "y", "x_target", "y_target", "pwm1", "pwm2", "pwm3", "pwm4", "microseconds"]
# scaler = StandardScaler()
# all_data[features_to_scale] = scaler.fit_transform(all_data[features_to_scale])

# # 🧪 Split into train/val/test with stratified sampling
# train_df, temp_df = train_test_split(
#     all_data, test_size=0.3, stratify=all_data["label"], random_state=42
# )
# val_df, test_df = train_test_split(
#     temp_df, test_size=0.5, stratify=temp_df["label"], random_state=42
# )

# # 📦 Create datasets
# train_dataset = ObjectSensorDataset(train_df)
# val_dataset = ObjectSensorDataset(val_df)
# test_dataset = ObjectSensorDataset(test_df)

In [40]:
# Set random seed for reproducibility
random.seed(42)

# Collect valid files first
valid_files = []
for file_name in os.listdir(folder_path):
    if not file_name.endswith(".txt") or file_name.startswith("."):
        continue
    file_path = os.path.join(folder_path, file_name)
    df = load_and_label_file(file_path, file_name)
    if not df.empty:
        valid_files.append((file_path, file_name))

# Shuffle and split into train/val/test file sets
random.shuffle(valid_files)
total = len(valid_files)
train_split = int(0.7 * total)
val_split = int(0.85 * total)

train_files = valid_files[:train_split]
val_files = valid_files[train_split:val_split]
test_files = valid_files[val_split:]

def process_files(file_list):
    all_dfs = []
    for file_path, file_name in file_list:
        df = process_file(file_path, file_name)
        if not df.empty:
            all_dfs.append(df)
    return pd.concat(all_dfs, ignore_index=True) if all_dfs else pd.DataFrame()

# Process each split
train_data = process_files(train_files)
val_data = process_files(val_files)
test_data = process_files(test_files)

# Normalize using training set stats only
scaler = StandardScaler()
features_to_scale = ["x", "y", "x_target", "y_target", "pwm1", "pwm2", "pwm3", "pwm4", "microseconds"]
scaler.fit(train_data[features_to_scale])

for df in [train_data, val_data, test_data]:
    df[features_to_scale] = scaler.transform(df[features_to_scale])





📌 Waterbottle30.txt → 14 waypoints BEFORE filtering
📌 Waterbottle30.txt → 12 waypoints AFTER filtering

📌 Blueball24.txt → 14 waypoints BEFORE filtering
📌 Blueball24.txt → 12 waypoints AFTER filtering

📌 Blueball18.txt → 14 waypoints BEFORE filtering
📌 Blueball18.txt → 12 waypoints AFTER filtering


Train:   2%|▏         | 123/7381 [00:22<11:41, 10.35it/s, acc=31.7835%, loss=1.8445, lr=0.001000]


📌 StuffedAnimal30.txt → 14 waypoints BEFORE filtering
📌 StuffedAnimal30.txt → 13 waypoints AFTER filtering

📌 Box24.txt → 14 waypoints BEFORE filtering
📌 Box24.txt → 12 waypoints AFTER filtering

📌 StuffedAnimal4.txt → 14 waypoints BEFORE filtering
📌 StuffedAnimal4.txt → 13 waypoints AFTER filtering

📌 Blueball12.txt → 14 waypoints BEFORE filtering
📌 Blueball12.txt → 12 waypoints AFTER filtering

📌 Waterbottle8.txt → 14 waypoints BEFORE filtering
📌 Waterbottle8.txt → 12 waypoints AFTER filtering

📌 Pinkball9.txt → 14 waypoints BEFORE filtering
📌 Pinkball9.txt → 12 waypoints AFTER filtering

📌 Blueball10.txt → 14 waypoints BEFORE filtering
📌 Blueball10.txt → 12 waypoints AFTER filtering

📌 Box27.txt → 14 waypoints BEFORE filtering
📌 Box27.txt → 12 waypoints AFTER filtering

📌 StuffedAnimal15.txt → 14 waypoints BEFORE filtering
📌 StuffedAnimal15.txt → 12 waypoints AFTER filtering

📌 StuffedAnimal9.txt → 14 waypoints BEFORE filtering
📌 StuffedAnimal9.txt → 12 waypoints AFTER filtering

📌

  self.X = torch.tensor(self.X, dtype=torch.float32)   # shape: (num_samples, window_size, num_features)



📊 Dataset Sizes (File-level split):
🧠 Training set: 107629 samples from 132 files
🧪 Validation set: 22528 samples from 28 files
🧾 Test set: 22816 samples from 29 files


In [None]:
# Dataset class
class ObjectSensorWindowDataset(Dataset):
    def __init__(self, df, window_size=30, step_size=1):
        self.X = []
        self.y = []

        features = df[features_to_scale].values
        labels = df["label"].values

        for i in range(0, len(df) - window_size + 1, step_size):
            window_X = features[i:i+window_size]
            window_y = labels[i+window_size-1]  # you can change this to majority or first label if needed

            self.X.append(window_X)
            self.y.append(window_y)

        self.X = torch.tensor(self.X, dtype=torch.float32)   # shape: (num_samples, window_size, num_features)
        self.y = torch.tensor(self.y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

window_size = 200
step_size = 1  # use 1 for sliding, >1 for stride

train_dataset = ObjectSensorWindowDataset(train_data, window_size, step_size)
val_dataset = ObjectSensorWindowDataset(val_data, window_size, step_size)
test_dataset = ObjectSensorWindowDataset(test_data, window_size, step_size)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Summary
print("\n📊 Dataset Sizes (File-level split):")
print(f"🧠 Training set: {len(train_dataset)} samples from {len(train_files)} files")
print(f"🧪 Validation set: {len(val_dataset)} samples from {len(val_files)} files")
print(f"🧾 Test set: {len(test_dataset)} samples from {len(test_files)} files")

In [41]:
# # 📦 Create DataLoaders
# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# for X_batch, y_batch in tqdm(train_loader, desc="🔁 Training"):
#     # Simulate training step
#     pass

# for X_batch, y_batch in tqdm(val_loader, desc="🔍 Validating"):
#     # Simulate validation step
#     pass

# for X_batch, y_batch in tqdm(test_loader, desc="🧪 Testing"):
#     # Simulate test step
#     pass

# # 📊 Summary of dataset sizes
# print("\n📊 Dataset Sizes:")
# print(f"🧠 Training set: {len(train_dataset)} samples")
# print(f"🧪 Validation set: {len(val_dataset)} samples")
# print(f"🧾 Test set: {len(test_dataset)} samples")

In [55]:
# import torch
# import torch.nn as nn
# from torchinfo import summary

# class TimeDistributed(nn.Module):
#     """Applies a module over the time dimension (B, T, F) → applies to each T."""
#     def __init__(self, module):
#         super(TimeDistributed, self).__init__()
#         self.module = module

#     def forward(self, x):
#         # x: (B, T, F)
#         B, T, F = x.size()
#         x = x.contiguous().view(B * T, F)
#         x = self.module(x)
#         x = x.view(B, T, -1)
#         return x

# class ResidualBlock(nn.Module):
#     def __init__(self, dim):
#         super(ResidualBlock, self).__init__()
#         self.linear1 = TimeDistributed(nn.Linear(dim, dim))
#         self.bn1 = TimeDistributed(nn.BatchNorm1d(dim))
#         self.act1 = nn.ReLU()
#         self.drop1 = nn.Dropout(0.5)

#         self.linear2 = TimeDistributed(nn.Linear(dim, dim))
#         self.bn2 = TimeDistributed(nn.BatchNorm1d(dim))

#         self.relu = nn.ReLU(inplace=True)

#     def forward(self, x):
#         identity = x
#         out = self.linear1(x)
#         out = self.bn1(out)
#         out = self.act1(out)
#         out = self.drop1(out)

#         out = self.linear2(out)
#         out = self.bn2(out)

#         out += identity
#         return self.relu(out)

# class ResidualMLPClassifier(nn.Module):
#     def __init__(self, input_dim, num_classes, hidden_dim=2048, num_blocks=10):
#         super(ResidualMLPClassifier, self).__init__()

#         self.input_layer = nn.Sequential(
#             TimeDistributed(nn.Linear(input_dim, hidden_dim)),
#             TimeDistributed(nn.BatchNorm1d(hidden_dim)),
#             nn.ReLU(),
#             nn.Dropout(0.5),
#         )

#         self.res_blocks = nn.Sequential(*[ResidualBlock(hidden_dim) for _ in range(num_blocks)])

#         self.classifier = nn.Sequential(
#             nn.Dropout(0.5),
#             TimeDistributed(nn.Linear(hidden_dim, 1024)),
#             TimeDistributed(nn.BatchNorm1d(1024)),
#             nn.ReLU(),
#             nn.Dropout(0.5),
#             TimeDistributed(nn.Linear(1024, 512)),
#             TimeDistributed(nn.BatchNorm1d(512)),
#             nn.ReLU(),
#             nn.Dropout(0.5),
#             TimeDistributed(nn.Linear(512, 256)),
#             TimeDistributed(nn.BatchNorm1d(256)),
#             nn.ReLU(),
#             TimeDistributed(nn.Linear(256, num_classes)),
#         )

#         self.softmax = nn.Softmax(dim=2)

#     def forward(self, x):
#         x = self.input_layer(x)
#         x = self.res_blocks(x)
#         feats = x
#         out = self.classifier(x)
#         out = self.softmax(out)
#         out = out[:, -1, :]
#         return {"feats": feats, "out": out}


In [57]:
# import torch
# import torch.nn as nn

# class ResidualBlock(nn.Module):
#     def __init__(self, dim):
#         super(ResidualBlock, self).__init__()
#         self.block = nn.Sequential(
#             nn.Linear(dim, dim),
#             nn.BatchNorm1d(dim),
#             nn.GELU(),
#             nn.Dropout(0.5),
#             nn.Linear(dim, dim),
#             nn.BatchNorm1d(dim),
#         )
#         self.relu = nn.ReLU(inplace=True)

#     def forward(self, x):
#         identity = x
#         out = self.block(x)
#         out += identity
#         return self.relu(out)


# class ResidualMLPClassifier(nn.Module):
#     def __init__(self, input_dim, num_classes, hidden_dim=2048, num_blocks=6):
#         super(ResidualMLPClassifier, self).__init__()
#         self.input_layer = nn.Sequential(
#             nn.Linear(input_dim, hidden_dim),
#             nn.BatchNorm1d(hidden_dim),
#             nn.GELU(),
#             nn.Dropout(0.5),
#         )

#         self.res_blocks = nn.Sequential(*[ResidualBlock(hidden_dim) for _ in range(num_blocks)])

#         self.classifier = nn.Sequential(
#             nn.Dropout(0.5),
#             nn.Linear(hidden_dim, 512),
#             nn.BatchNorm1d(512),
#             nn.GELU(),
#             nn.Linear(512, num_classes),
#             nn.Softmax(dim=1)
#         )

#     def forward(self, x):
#         x = self.input_layer(x)
#         x = self.res_blocks(x)
#         feats = x
#         out = self.classifier(x)
#         return {"feats": feats, "out": out}

model = ResidualMLPClassifier(input_dim=9, num_classes=7, hidden_dim = config['hidden_dim'], num_blocks = config['num_blocks']).to(config['device'])
# summary(model, input_size=(64, 9))  # for batch size 64

In [None]:
import torch
import torch.nn as nn

class RNNClassifier(nn.Module):
    def __init__(self, input_dim, num_classes, hidden_dim=256, num_layers=2, dropout=0.5, rnn_type="lstm"):
        super(RNNClassifier, self).__init__()

        self.rnn_type = rnn_type.lower()
        rnn_cls = {
            "lstm": nn.LSTM,
            "gru": nn.GRU,
            "rnn": nn.RNN
        }[self.rnn_type]

        self.rnn = rnn_cls(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
            bidirectional=False
        )

        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, num_classes)
        )

        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        # x: (B, T, F)
        out, _ = self.rnn(x)  # out: (B, T, H)
        last_out = out[:, -1, :]  # last timestep (B, H)
        logits = self.classifier(last_out)
        probs = self.softmax(logits)
        return {"feats": last_out, "out": probs}

model = RNNClassifier(
    input_dim=9,
    num_classes=7,
    hidden_dim=config['hidden_dim'],
    num_layers=config['num_blocks'],  # or another config param
    dropout=0.5,
    rnn_type="lstm"
).to(config['device'])

# For summary:
from torchinfo import summary
summary(model, input_data=torch.zeros(64, 30, 9).to(config['device']))

In [58]:
class AverageMeter:
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [59]:
def accuracy(output, target, topk=(1,)):
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [60]:
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    loss_m = AverageMeter()
    acc_m = AverageMeter()
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train')

    for i, data in enumerate(train_loader):
        optimizer.zero_grad()
        x, y = data
        x, y = x.to(device), y.to(device)
        outputs = model(x)
        loss = criterion(outputs['out'], y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        acc = accuracy(outputs['out'], y)[0].item()
        loss_m.update(loss.item())
        acc_m.update(acc)

        batch_bar.set_postfix(
            loss="{:.04f}".format(float(loss_m.avg)),
            acc="{:.04f}%".format(float(acc_m.avg)),
            lr="{:.06f}".format(float(optimizer.param_groups[0]['lr']))
        )
        batch_bar.update()

        del x, y, outputs, loss
        torch.cuda.empty_cache()

    batch_bar.close()
    return loss_m.avg, acc_m.avg

In [61]:
@torch.no_grad()
def validate_model(model, val_loader, criterion, class_names, device):
    model.eval()
    loss_m = AverageMeter()
    acc_m = AverageMeter()
    batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')

    all_preds = []
    all_targets = []

    for i, data in enumerate(val_loader):
        x, y = data
        x, y = x.to(device), y.to(device)
        outputs = model(x)
        loss = criterion(outputs['out'], y)

        acc = accuracy(outputs['out'], y)[0].item()

        _, predicted = torch.max(outputs['out'], 1)
        all_preds.extend(predicted.cpu().numpy())
        all_targets.extend(y.cpu().numpy())

        loss_m.update(loss.item())
        acc_m.update(acc)

        batch_bar.set_postfix(
            loss="{:.04f}".format(float(loss_m.avg)),
            acc="{:.04f}%".format(float(acc_m.avg))
        )
        batch_bar.update()

        del x, y, outputs, loss
        torch.cuda.empty_cache()

    batch_bar.close()

    if class_names:
        print("\nPer-class Validation Accuracy:")
        per_class_acc = {}
        for i, class_name in enumerate(class_names):
            class_mask = (np.array(all_targets) == i)
            if np.sum(class_mask) > 0:
                class_correct = np.sum((np.array(all_preds)[class_mask] == i))
                class_total = np.sum(class_mask)
                acc_percent = 100 * class_correct / class_total
                print(f"  {class_name}: {acc_percent:.4f}% ({class_correct}/{class_total})")
                per_class_acc[f"val_acc_{class_name}"] = acc_percent

    return loss_m.avg, acc_m.avg

In [62]:
def save_model(model, optimizer, scheduler, metrics, epoch, path):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
        'metrics': metrics
    }, path)

In [63]:
# Define CrossEntropyLoss as the criterion
criterion = nn.CrossEntropyLoss(
    label_smoothing=0.1
)

# Initialize optimizer with AdamW
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=config['lr'],
    weight_decay=1e-5
)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,
    patience=3,
    min_lr=1e-6,
    verbose=True
)

In [64]:
import wandb

# Intialize wandb
wandb.login(key="78d5988d9f05a421bc74d044c3cd9afc3b918020") # API Key is in your wandb account, under settings (wandb.ai/settings)



True

In [65]:
run = wandb.init(
    name = "19run", ## Wandb creates random run names if you skip this field
    reinit = False, ### Allows reinitalizing runs when you re-run this cell
    #id = "", ### Insert specific run id here if you want to resume a previous run
    # resume = "must" ### You need this to resume previous runs, but comment out reinit = True when using this
    project = "object_classification", ### Project should be created in your wandb account
    config = config ### Wandb Config for your run
)

In [66]:
# Training Loop
best_val_loss = float('inf')
best_val_acc = 0
class_names = list(CATEGORIES.keys())

for epoch in range(config['epochs']):
    print(f"\nEpoch {epoch + 1}/{config['epochs']}")

    train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, config['device'])
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%")

    val_loss, val_acc = validate_model(model, val_loader, criterion, class_names, config['device'])
    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.2f}%")

    scheduler.step(val_loss)
    curr_lr = optimizer.param_groups[0]['lr']

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_val_acc = val_acc
        best_model_path = os.path.join(config['checkpoint_dir'], 'best_model.pth')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': val_loss,
            'val_acc': val_acc,
        }, best_model_path)
        wandb.save(best_model_path)
        print(f"Saved best model with validation loss: {best_val_loss:.4f} and accuracy: {best_val_acc:.2f}%")

    last_model_path = os.path.join(config['checkpoint_dir'], f'model_epoch_{epoch+1}.pth')
    torch.save(model.state_dict(), last_model_path)
    wandb.save(last_model_path)
    print(f"Saved model for epoch {epoch+1}")

    wandb.log({
        'epoch': epoch + 1,
        'train_loss': train_loss,
        'train_acc': train_acc,
        'val_loss': val_loss,
        'val_acc': val_acc,
        'learning_rate': curr_lr
    }, step=epoch)

    print(f"End of Epoch {epoch+1}/{config['epochs']}")

print(f"\nTraining complete! Best validation accuracy: {best_val_acc:.2f}%")


Epoch 1/10




Train Loss: 1.9131, Train Accuracy: 22.20%





Per-class Validation Accuracy:
  Blueball: 15.4304% (570/3694)
  Box: 8.0258% (374/4660)
  Pinkball: 36.9341% (824/2231)
  StuffedAnimal: 26.9564% (1712/6351)
  Tennis: 30.1940% (965/3196)
  Waterbottle: 7.0534% (169/2396)
Validation Loss: 1.9276, Validation Accuracy: 20.48%
Saved best model with validation loss: 1.9276 and accuracy: 20.48%
Saved model for epoch 1
End of Epoch 1/10

Epoch 2/10




Train Loss: 1.8936, Train Accuracy: 25.12%





Per-class Validation Accuracy:
  Blueball: 24.2826% (897/3694)
  Box: 17.0815% (796/4660)
  Pinkball: 33.2138% (741/2231)
  StuffedAnimal: 31.6643% (2011/6351)
  Tennis: 16.4268% (525/3196)
  Waterbottle: 9.3072% (223/2396)
Validation Loss: 1.9128, Validation Accuracy: 23.05%
Saved best model with validation loss: 1.9128 and accuracy: 23.05%
Saved model for epoch 2
End of Epoch 2/10

Epoch 3/10




Train Loss: 1.8838, Train Accuracy: 26.62%





Per-class Validation Accuracy:
  Blueball: 17.3795% (642/3694)
  Box: 23.4549% (1093/4660)
  Pinkball: 35.3653% (789/2231)
  StuffedAnimal: 28.6726% (1821/6351)
  Tennis: 22.9975% (735/3196)
  Waterbottle: 13.3139% (319/2396)
Validation Loss: 1.9113, Validation Accuracy: 23.97%
Saved best model with validation loss: 1.9113 and accuracy: 23.97%
Saved model for epoch 3
End of Epoch 3/10

Epoch 4/10




Train Loss: 1.8739, Train Accuracy: 28.17%





Per-class Validation Accuracy:
  Blueball: 14.2393% (526/3694)
  Box: 9.5494% (445/4660)
  Pinkball: 32.4070% (723/2231)
  StuffedAnimal: 28.7356% (1825/6351)
  Tennis: 19.1489% (612/3196)
  Waterbottle: 19.7830% (474/2396)
Validation Loss: 1.9310, Validation Accuracy: 20.44%
Saved model for epoch 4
End of Epoch 4/10

Epoch 5/10




Train Loss: 1.8660, Train Accuracy: 28.95%





Per-class Validation Accuracy:
  Blueball: 15.6741% (579/3694)
  Box: 26.1803% (1220/4660)
  Pinkball: 33.0793% (738/2231)
  StuffedAnimal: 22.7996% (1448/6351)
  Tennis: 17.0213% (544/3196)
  Waterbottle: 17.1119% (410/2396)
Validation Loss: 1.9242, Validation Accuracy: 21.92%
Saved model for epoch 5
End of Epoch 5/10

Epoch 6/10




Train Loss: 1.8585, Train Accuracy: 30.03%





Per-class Validation Accuracy:
  Blueball: 19.3286% (714/3694)
  Box: 15.9227% (742/4660)
  Pinkball: 27.6109% (616/2231)
  StuffedAnimal: 24.8465% (1578/6351)
  Tennis: 19.7747% (632/3196)
  Waterbottle: 17.4457% (418/2396)
Validation Loss: 1.9307, Validation Accuracy: 20.86%
Saved model for epoch 6
End of Epoch 6/10

Epoch 7/10




Train Loss: 1.8519, Train Accuracy: 30.97%





Per-class Validation Accuracy:
  Blueball: 23.3622% (863/3694)
  Box: 19.6567% (916/4660)
  Pinkball: 24.5182% (547/2231)
  StuffedAnimal: 27.7279% (1761/6351)
  Tennis: 16.3016% (521/3196)
  Waterbottle: 17.6962% (424/2396)
Validation Loss: 1.9231, Validation Accuracy: 22.34%
Saved model for epoch 7
End of Epoch 7/10

Epoch 8/10




Train Loss: 1.8407, Train Accuracy: 32.34%





Per-class Validation Accuracy:
  Blueball: 16.0531% (593/3694)
  Box: 16.9957% (792/4660)
  Pinkball: 22.7701% (508/2231)
  StuffedAnimal: 25.8542% (1642/6351)
  Tennis: 14.0175% (448/3196)
  Waterbottle: 24.7913% (594/2396)
Validation Loss: 1.9375, Validation Accuracy: 20.32%
Saved model for epoch 8
End of Epoch 8/10

Epoch 9/10




Train Loss: 1.8371, Train Accuracy: 32.69%





Per-class Validation Accuracy:
  Blueball: 21.4402% (792/3694)
  Box: 23.1330% (1078/4660)
  Pinkball: 26.5800% (593/2231)
  StuffedAnimal: 27.9484% (1775/6351)
  Tennis: 12.6721% (405/3196)
  Waterbottle: 15.3589% (368/2396)
Validation Loss: 1.9242, Validation Accuracy: 22.24%
Saved model for epoch 9
End of Epoch 9/10

Epoch 10/10




Train Loss: 1.8333, Train Accuracy: 33.06%





Per-class Validation Accuracy:
  Blueball: 23.4434% (866/3694)
  Box: 17.9614% (837/4660)
  Pinkball: 26.7593% (597/2231)
  StuffedAnimal: 22.6264% (1437/6351)
  Tennis: 15.8323% (506/3196)
  Waterbottle: 15.1085% (362/2396)
Validation Loss: 1.9371, Validation Accuracy: 20.44%
Saved model for epoch 10
End of Epoch 10/10

Training complete! Best validation accuracy: 23.97%


In [None]:
@torch.no_grad()
def test_model(model, test_loader, criterion, class_names, device, checkpoint_dir=None):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_targets = []
    all_probs = []

    class_correct = {class_name: 0 for class_name in class_names}
    class_total = {class_name: 0 for class_name in class_names}

    for data in test_loader:
        inputs, targets = data
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        outputs_for_loss = outputs['out'] if isinstance(outputs, dict) and 'out' in outputs else outputs
        loss = criterion(outputs_for_loss, targets)
        test_loss += loss.item() * inputs.size(0)

        probs = torch.nn.functional.softmax(outputs_for_loss, dim=1)
        _, predicted = torch.max(outputs_for_loss, 1)

        total += targets.size(0)
        correct += (predicted == targets).sum().item()

        for i in range(targets.size(0)):
            label = targets[i].item()
            pred = predicted[i].item()
            class_name = class_names[label]
            class_total[class_name] += 1
            if pred == label:
                class_correct[class_name] += 1

        all_preds.extend(predicted.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())

    test_loss /= len(test_loader.dataset)
    test_acc = correct / total

    class_accuracy = {
        name: class_correct[name]/class_total[name] if class_total[name] > 0 else 0
        for name in class_names
    }

    print("\n" + "="*50)
    print("TEST RESULTS")
    print("="*50)
    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_acc:.4f} ({correct}/{total})")
    print("\nPer-Class Accuracy:")
    for class_name in class_names:
        print(f" {class_name}: {class_accuracy[class_name]:.4f} ({class_correct[class_name]}/{class_total[class_name]})")

    return {
        'test_loss': test_loss,
        'test_accuracy': test_acc,
        'class_accuracy': class_accuracy,
        'predictions': all_preds,
        'targets': all_targets,
        'probabilities': all_probs
    }


In [None]:
best_model_path = f"{config['checkpoint_dir']}/best_model.pth"
if os.path.exists(best_model_path):
    checkpoint = torch.load(best_model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    print(f"Loaded best model from epoch {checkpoint.get('epoch', 'unknown')}")

Loaded best model from epoch 0


In [None]:
test_model(model, test_loader, criterion, class_names, config['device'])

In [None]:
run.finish()

Train:   1%|          | 99/14993 [00:17<14:57, 16.60it/s, acc=38.9362%, loss=1.7867, lr=0.001000]

0,1
epoch,▁█
learning_rate,▁▁
train_acc,▁█
train_loss,█▁
val_acc,▁█
val_loss,█▁

0,1
epoch,2.0
learning_rate,0.001
train_acc,36.76726
train_loss,1.80355
val_acc,24.60289
val_loss,1.91094
