In [None]:
# VIT6D.ipynb refactored for Minty (Linux Mint)
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import os
from tqdm import tqdm
import timm
import time
import datetime
import json
import numpy as np
import csv

PREREQUISITES AND DECLARATIONS AND YADA YADA YADA 

In [None]:
with open("GlobVar.json", "r") as file:
    gv = json.load(file)
mod_id = gv['mod_id']
# Constants and environment setup
BATCH_ID = 3
BATCH_SIZE = 32
NUM_EPOCHS = 20
LEARNING_RATE = 1e-4
IMG_SIZE = 224  # Required input size for ViT

# Paths assume Linux-style forward slashes
BASE_DIR = os.path.expanduser("~/SKRIPSI/SCRIPTS")  # Refactor path to be absolute
DATASET_DIR = os.path.join(BASE_DIR, f"dataset/batch{BATCH_ID}")
MODEL_SAVE_PATH = os.path.join(BASE_DIR, f"model/ViT6DP_batch{BATCH_ID}.{mod_id}.pth")

# Use CUDA if available
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()

In [None]:
class PoseDataset(Dataset):
    def __init__(self, image_dir, label_csv, transform=None):
        self.image_dir = image_dir
        self.labels = pd.read_csv(label_csv)
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        row = self.labels.iloc[idx]
        img_path = os.path.join(self.image_dir, row['image_name'])
        image = Image.open(img_path).convert('RGB')
        label = torch.tensor(row[1:].values.astype('float32'))  # x, y, z, pitch, roll, yaw
        if self.transform:
            image = self.transform(image)
        return image, label

In [None]:
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.5] * 3, [0.5] * 3)
])

In [None]:
def get_dataloader(split):
    image_dir = os.path.join(DATASET_DIR, split, 'images')
    label_csv = os.path.join(DATASET_DIR, split, 'labels.csv')
    dataset = PoseDataset(image_dir, label_csv, transform)
    return DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=(split == 'train'))

train_loader = get_dataloader('train')
val_loader = get_dataloader('val')
test_loader = get_dataloader('test')

In [None]:
def get_dataset_stats(loader):
    all_labels = []
    for _, labels in loader:
        all_labels.append(labels)
    all_labels = torch.cat(all_labels, dim=0)
    
    # Translation stats (x,y,z)
    trans_stats = {
        'min': all_labels[:, :3].min(dim=0)[0],
        'max': all_labels[:, :3].max(dim=0)[0],
        'mean': all_labels[:, :3].mean(dim=0),
        'std': all_labels[:, :3].std(dim=0)
    }
    
    # Rotation stats (pitch, roll, yaw)
    rot_stats = {
        'min': all_labels[:, 3:].min(dim=0)[0],
        'max': all_labels[:, 3:].max(dim=0)[0],
        'mean': all_labels[:, 3:].mean(dim=0),
        'std': all_labels[:, 3:].std(dim=0)
    }
    
    return trans_stats, rot_stats

# Get stats for each dataset split
train_trans_stats, train_rot_stats = get_dataset_stats(train_loader)
val_trans_stats, val_rot_stats = get_dataset_stats(val_loader)
test_trans_stats, test_rot_stats = get_dataset_stats(test_loader)
print(f"Train Translation stat:{train_trans_stats}      |       Train Rotation stat: {train_rot_stats}")
print(f"Validation Translation stat:{val_trans_stats}     |       Validation Rotation stat: {val_rot_stats}")
print(f"Test Translation stat:{test_trans_stats}      |       Test Rotation stat: {test_rot_stats}")


In [None]:
class ViT6DP(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = timm.create_model('vit_base_patch16_224', pretrained=True)
        self.backbone.head = nn.Sequential(
            nn.Linear(self.backbone.head.in_features, 512),
            nn.ReLU(),
            nn.Linear(512, 9)
        )

    def forward(self, x):
        return self.backbone(x)

In [None]:
def compute_rmse(pred, target):
    pred_trans, pred_rot = pred[:, :3], pred[:, 3:]
    target_trans, target_rot = target[:, :3], target[:, 3:]

    trans_rmse = torch.sqrt(nn.MSELoss()(pred_trans, target_trans))
    rot_rmse = torch.sqrt(nn.MSELoss()(pred_rot, target_rot))

    return trans_rmse.item(), rot_rmse.item()


In [None]:
def combined_loss(pred, target, alpha=1.0, beta=1.0):
    pred_trans, pred_rot = pred[:, :3], pred[:, 3:]
    target_trans, target_rot = target[:, :3], target[:, 3:]

    trans_loss = nn.MSELoss()(pred_trans, target_trans)
    rot_loss = nn.MSELoss()(pred_rot, target_rot)

    return alpha * trans_loss + beta * rot_loss

In [None]:
model = ViT6DP().to(DEVICE)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

TRAINING

In [None]:
def train(validate=True):
    now = [time.time()]
    for epoch in range(NUM_EPOCHS):
        print("\n")
        model.train()
        running_loss = 0.0
        for images, labels in tqdm(train_loader):
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)  # shape: [B, 9] -> [tx, ty, tz, r1...r6]

            outputs = model(images)  # no normalize_pose anymore

            loss = combined_loss(outputs, labels, alpha=1.0, beta=1.0)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        now.append(time.time())
        avg_train_loss = running_loss / len(train_loader)
        print(f"Epoch {epoch + 1}/{NUM_EPOCHS}, Train Loss: {avg_train_loss:.4f}")
        print(f"Time per epoch {epoch + 1}: {int(now[epoch + 1] - now[epoch])}s")

        if validate:
            model.eval()
            val_loss = 0.0
            total_trans_rmse, total_rot_rmse = 0.0, 0.0
            with torch.no_grad():
                for images, labels in val_loader:
                    images = images.to(DEVICE)
                    labels = labels.to(DEVICE)
                    outputs = model(images)

                    loss = combined_loss(outputs, labels)
                    val_loss += loss.item()

                    trans_rmse, rot_rmse = compute_rmse(outputs, labels)
                    total_trans_rmse += trans_rmse
                    total_rot_rmse += rot_rmse

            avg_val_loss = val_loss / len(val_loader)
            print(f"Val Loss: {avg_val_loss:.4f}")
            print(f"RMSE - Translation: {total_trans_rmse / len(val_loader):.4f}, "
                  f"Rotation: {total_rot_rmse / len(val_loader):.4f}")
        else:
            print("Skipping validation for this epoch.")


In [None]:
train(validate=True)

SAVE + INCREMENT

In [None]:
torch.save(model.state_dict(), MODEL_SAVE_PATH)
# Increment
mod_id += 1
gv['mod_id'] = mod_id
# Save the updated JSON back to the file
with open("GlobVar.json", "w") as file:
    json.dump(gv, file, indent=4)

TEST THE MODEL

In [None]:
def test_model(model_path):
    model = ViT6DP().to(DEVICE)
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.eval()

    test_total_loss = 0.0
    test_total_trans_rmse, test_total_rot_rmse = 0.0, 0.0
    preds, gts = [], []

    with torch.no_grad():
        for images, labels in tqdm(test_loader):
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            outputs = model(images)

            loss = combined_loss(outputs, labels)
            test_total_loss += loss.item()

            trans_rmse, rot_rmse = compute_rmse(outputs, labels)
            test_total_trans_rmse += trans_rmse
            test_total_rot_rmse += rot_rmse

            preds.extend(outputs.cpu().numpy())
            gts.extend(labels.cpu().numpy())

    test_avg_loss = test_total_loss / len(test_loader)
    print(f"Test Loss: {test_avg_loss:.4f}")
    print(f"Test RMSE - Translation: {test_total_trans_rmse / len(test_loader):.4f}, "
          f"Rotation: {test_total_rot_rmse / len(test_loader):.4f}")

    return preds, gts, test_avg_loss, test_total_trans_rmse, test_total_rot_rmse


In [None]:
predictions, ground_truths, test_avg_loss, test_total_trans_rmse, test_total_rot_rmse = test_model(MODEL_SAVE_PATH)

VALIDATE THE MODEL

In [None]:
def validate_model(model_path):
    model = ViT6DP().to(DEVICE)
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.eval()

    val_total_loss = 0.0
    val_total_trans_rmse, val_total_rot_rmse = 0.0, 0.0
    preds, gts = [], []

    with torch.no_grad():
        for images, labels in tqdm(val_loader):
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            outputs = model(images)

            loss = combined_loss(outputs, labels)
            val_total_loss += loss.item()

            trans_rmse, rot_rmse = compute_rmse(outputs, labels)
            val_total_trans_rmse += trans_rmse
            val_total_rot_rmse += rot_rmse

            preds.extend(outputs.cpu().numpy())
            gts.extend(labels.cpu().numpy())

    val_avg_loss = val_total_loss / len(val_loader)
    print(f"Validation Loss: {val_avg_loss:.4f}")
    print(f"Validation RMSE - Translation: {val_total_trans_rmse / len(val_loader):.4f}, "
          f"Rotation: {val_total_rot_rmse / len(val_loader):.4f}")

    return preds, gts, val_avg_loss, val_total_trans_rmse, val_total_rot_rmse


In [None]:
val_predictions, val_ground_truths, val_avg_loss, val_total_trans_rmse, val_total_rot_rmse = validate_model(MODEL_SAVE_PATH)

In [None]:
torch.cuda.empty_cache()

CALCULATIONS AND SUCH (ALSO OUTPUTTING TO MD AND CSV)

In [None]:
def vectors_to_rotation_matrix(r1, r2):
    """Convert orthogonalized r1 and r2 vectors into a rotation matrix."""
    r1 = r1 / np.linalg.norm(r1)
    r2 = r2 / np.linalg.norm(r2)
    r3 = np.cross(r1, r2)
    return np.stack([r1, r2, r3], axis=1)

def calculate_translation_rmse(preds, gts):
    """Euclidean distance between predicted and GT translations (in meters)."""
    errors = np.linalg.norm(preds - gts, axis=1)  # Shape: [N]
    rmse = np.sqrt(np.mean(errors**2))
    return rmse * 1000  # Convert to mm

def calculate_rotation_rmse(preds_r1r2, gts_r1r2):
    """Angular difference (in degrees) between predicted and GT rotation matrices."""
    angles = []
    for pred, gt in zip(preds_r1r2, gts_r1r2):
        R_pred = vectors_to_rotation_matrix(pred[:3], pred[3:])
        R_gt = vectors_to_rotation_matrix(gt[:3], gt[3:])
        R_diff = R_pred.T @ R_gt
        angle = np.arccos(np.clip((np.trace(R_diff) - 1) / 2.0, -1.0, 1.0))
        angles.append(np.degrees(angle))
    return np.sqrt(np.mean(np.array(angles)**2))  # RMSE in degrees

In [None]:
val_trans_accuracy, val_rot_accuracy = val_total_trans_rmse / len(val_loader),  val_total_rot_rmse / len(val_loader)
test_trans_accuracy, test_rot_accuracy = test_total_trans_rmse / len(test_loader),  test_total_rot_rmse / len(test_loader)

In [None]:
train_rot_mag_min = train_rot_stats['min'].norm().item()
train_rot_mag_max = train_rot_stats['max'].norm().item()
val_rot_mag_min = val_rot_stats['min'].norm().mean().item()
val_rot_mag_max = val_rot_stats['max'].norm().mean().item()
test_rot_mag_min = test_rot_stats['min'].norm().mean().item()
test_rot_mag_max = test_rot_stats['max'].norm().mean().item()

WRITE TO MD

In [None]:
eval_content = f"""# Evaluation Results - Batch {BATCH_ID}

## Training Configuration
- Batch Size: {BATCH_SIZE}
- Epochs: {NUM_EPOCHS}
- Learning Rate: {LEARNING_RATE}
- Image Size: {IMG_SIZE}
- Device: {DEVICE}
- Optimizer : Adam

## Model Architecture
- Backbone: ViT Base Patch16 224
- Head: Linear(768->512->6)

## Evaluation Metrics

### Validation Set
- Average Loss: {val_avg_loss:.4f}
- Translation RMSE: {val_total_trans_rmse / len(val_loader):.4f}
- Translation Accuracy: {val_trans_accuracy:.2f} cm
- Rotation RMSE: {val_total_rot_rmse / len(val_loader):.4f}
- Rotation Accuracy: {val_rot_accuracy:.2f}°

### Test Set
- Average Loss: {test_avg_loss:.4f}
- Translation RMSE: {test_total_trans_rmse / len(test_loader):.4f}
- Translation Accuracy: {test_trans_accuracy:.2f} cm
- Rotation RMSE: {test_total_rot_rmse / len(test_loader):.4f}
- Rotation Accuracy: {test_rot_accuracy:.2f}°

## Dataset Statistics
### Training Set
- Translation range: [{train_trans_stats['min'].mean():.2f}, {train_trans_stats['max'].mean():.2f}] m
- Rotation magnitude range: [{train_rot_mag_min:.2f}, {train_rot_mag_max:.2f}]

### Validation Set
- Translation range: [{val_trans_stats['min'].mean():.2f}, {val_trans_stats['max'].mean():.2f}] m
- Rotation magnitude range: [{val_rot_mag_min:.2f}, {val_rot_mag_max:.2f}]

### Test Set
- Translation range: [{test_trans_stats['min'].mean():.2f}, {test_trans_stats['max'].mean():.2f}] m
- Rotation magnitude range: [{test_rot_mag_min:.2f}, {test_rot_mag_max:.2f}]

## File Locations
- Dataset Directory: {DATASET_DIR}
- Model Save Path: {MODEL_SAVE_PATH}
"""


eval_path = os.path.join(BASE_DIR, f"model/ViT6DP_EVAL_batch{BATCH_ID}.{mod_id-1}.md")
with open(eval_path, 'w') as f:
    f.write(eval_content)
    
print(f"Evaluation report saved to: {eval_path}")

WRITE TO CSV

In [None]:
# First, define the CSV file path
csv_path = os.path.join(BASE_DIR, "model/eval_results.csv")

# Check if CSV exists to determine if we need to write headers
write_header = not os.path.exists(csv_path)


csv_data = {
    'timestamp': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'dataset_id': BATCH_ID,
    'model_id': mod_id-1,
    'batch_size': BATCH_SIZE,
    'epochs': NUM_EPOCHS,
    'learning_rate': LEARNING_RATE,
    'test_loss': test_avg_loss,
    'test_translation_rmse': test_total_trans_rmse / len(test_loader),
    'test_rotation_rmse': test_total_rot_rmse / len(test_loader),
    'validation_loss': val_avg_loss,
    'validation_translation_rmse': val_total_trans_rmse / len(val_loader),
    'validation_rotation_rmse': val_total_rot_rmse / len(val_loader),
    'model_path': MODEL_SAVE_PATH,
    'eval_path' : eval_path
}


# Write to CSV
with open(csv_path, 'a', newline='') as csvfile:
    fieldnames = csv_data.keys()
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    if write_header:
        writer.writeheader()
    writer.writerow(csv_data)

print(f"Results appended to CSV: {csv_path}")