# Model Inference Notebook

This notebook loads a trained drone imitation learning model and tests it on sample data.

## Setup
Update the `EXPERIMENT_DIR` variable below to point to your trained experiment directory.

In [4]:
import numpy as np

In [5]:
import sys
from pathlib import Path

# Add project root to path
project_root = Path.cwd().parent.parent
sys.path.insert(0, str(project_root))

import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import json
from omegaconf import OmegaConf
from hydra.utils import instantiate
import torchvision.transforms as transforms

from drone.datasets.dataloader import CrazyflieILDataset

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

PyTorch version: 2.9.1+cu130
CUDA available: False


## 1. Load Experiment Configuration and Model

In [8]:
# UPDATE THIS PATH to your experiment directory
EXPERIMENT_DIR = Path("/scratch/gpfs/TSILVER/de7281/MAE345/job_2910637")

# Load the training configuration
config_path = EXPERIMENT_DIR / "configs" / "config.yaml"
cfg = OmegaConf.load(config_path)

print("Configuration loaded:")
print(OmegaConf.to_yaml(cfg))

Configuration loaded:
models:
  _target_: drone.models.discrete_action_model.DiscreteActionModel
  pretrained: false
  action_dim: 4
  num_bins: 11
  action_low: -0.2
  action_high: 0.2
  output_space: discrete
  v3: false
experiment_dir: /scratch/gpfs/TSILVER/de7281/MAE345/job_2910637
dataset:
  data_dir: drone/datasets/imitation_data
  image_size:
  - 224
  - 224
  augment: true
  normalize_states: false
  normalize_images: true
  normalize_actions: false
training:
  train_trials: null
  val_trials: null
  batch_size: 32
  num_epochs: 20
  lr: 0.005
  weight_decay: 0.001
  early_stopping_patience: 1
  early_stopping_min_delta: 0.001
  num_workers: 8
  shuffle_train: true
log_dir: recordings
group_number: 5
camera_id: 0
hover_height: 0.5
fly_steps: 100
crop_top: 80
ckpt_path: drone/trained_models/model.pth



In [None]:
# Resolve paths relative to project root
cfg.dataset.data_dir = str(project_root / cfg.dataset.data_dir)

# Load model checkpoint
model_path = EXPERIMENT_DIR / "models" / "model.pth"

# Instantiate model from config
model_cfg = OmegaConf.to_container(cfg.models, resolve=True)
output_space = model_cfg.pop('output_space', None)  # Remove before instantiation
model = instantiate(model_cfg)

# Load trained weights
state_dict = torch.load(model_path, map_location="cpu")
model.load_state_dict(state_dict)
model.eval()

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

print(f"\nModel loaded from: {model_path}")
print(f"Device: {device}")
print(f"\nModel architecture:")
print(f"  Action dim: {model.action_dim}")
print(f"  Num bins: {model.num_bins}")
print(f"  Action range: [{model.action_low}, {model.action_high}]")
print(f"  Total parameters: {sum(p.numel() for p in model.parameters()):,}")

InstantiationException: Error in call to target 'drone.models.discrete_action_model.DiscreteActionModel':
TypeError("DiscreteActionModel.__init__() got an unexpected keyword argument 'output_space'")
full_key: models

## 2. Load Test Dataset

In [None]:
# Create dataset with same settings as training
dataset = CrazyflieILDataset(
    data_dir=cfg.dataset.data_dir,
    trial_numbers=cfg.training.val_trials,  # Use validation trials or None for all
    image_size=cfg.dataset.image_size,
    normalize_images=cfg.dataset.normalize_images,
    normalize_states=cfg.dataset.normalize_states,
    normalize_actions=cfg.dataset.normalize_actions,
    augment=False  # No augmentation for testing
)

print(f"\nDataset loaded with {len(dataset)} samples")

## 2.5. Analyze Action Distribution (Left vs Right Moves)

In [None]:
# Analyze distribution of left vs right moves in the dataset
# Left move: vy (second coordinate) = +0.2
# Right move: vy (second coordinate) = -0.2

print("Analyzing action distribution across entire dataset...")
print(f"Total samples in dataset: {len(dataset)}\n")

# Collect all actions from the dataset
all_actions = []
for i in range(len(dataset)):
    sample = dataset[i]
    action = sample['action']  # (action_dim,) tensor
    all_actions.append(action.numpy())

all_actions = np.array(all_actions)  # (N, action_dim) where action_dim=4

# Extract vy (second coordinate, index 1)
vy_values = all_actions[:, 1]

# Count left and right moves (with some tolerance for floating point)
tolerance = 0.01
left_moves = np.sum(np.abs(vy_values - 0.2) < tolerance)
right_moves = np.sum(np.abs(vy_values + 0.2) < tolerance)
other_moves = len(vy_values) - left_moves - right_moves

# Compute fractions
total = len(vy_values)
left_fraction = left_moves / total
right_fraction = right_moves / total
other_fraction = other_moves / total

print("=" * 60)
print("LEFT vs RIGHT MOVE ANALYSIS")
print("=" * 60)
print(f"Left moves  (vy = +0.2): {left_moves:6d} ({left_fraction*100:5.2f}%)")
print(f"Right moves (vy = -0.2): {right_moves:6d} ({right_fraction*100:5.2f}%)")
print(f"Other moves:              {other_moves:6d} ({other_fraction*100:5.2f}%)")
print("-" * 60)
print(f"Total:                    {total:6d} (100.00%)")
print("=" * 60)

# Visualize the distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Pie chart
ax1 = axes[0]
colors = ['#ff9999', '#66b3ff', '#99ff99']
labels = [f'Left (+0.2)\n{left_fraction*100:.1f}%', 
          f'Right (-0.2)\n{right_fraction*100:.1f}%', 
          f'Other\n{other_fraction*100:.1f}%']
sizes = [left_moves, right_moves, other_moves]
ax1.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
ax1.set_title('Distribution of Left vs Right Moves')

# Histogram of all vy values
ax2 = axes[1]
ax2.hist(vy_values, bins=50, alpha=0.7, edgecolor='black')
ax2.axvline(0.2, color='red', linestyle='--', linewidth=2, label='Left (+0.2)')
ax2.axvline(-0.2, color='blue', linestyle='--', linewidth=2, label='Right (-0.2)')
ax2.axvline(0, color='green', linestyle='--', linewidth=1, label='Straight (0.0)')
ax2.set_xlabel('vy (lateral velocity)')
ax2.set_ylabel('Frequency')
ax2.set_title('Distribution of vy Values')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Show statistics for all action dimensions
print("\nAction statistics across all dimensions:")
print("=" * 60)
action_names = ['vx', 'vy', 'vz', 'yaw_rate']
for i, name in enumerate(action_names):
    values = all_actions[:, i]
    print(f"{name}:")
    print(f"  Min:    {values.min():7.4f}")
    print(f"  Max:    {values.max():7.4f}")
    print(f"  Mean:   {values.mean():7.4f}")
    print(f"  Median: {np.median(values):7.4f}")
    print(f"  Std:    {values.std():7.4f}")
    print()
print("=" * 60)

## 3. Test Model on Random Samples

In [None]:
def predict_action(model, image_tensor, device):
    """Run inference on a single image."""
    with torch.no_grad():
        # Add batch dimension if needed
        if image_tensor.dim() == 3:
            image_tensor = image_tensor.unsqueeze(0)
        
        image_tensor = image_tensor.to(device)
        
        # Get model output
        logits = model(image_tensor)  # (1, action_dim, num_bins)
        probs = torch.softmax(logits, dim=-1)  # Convert to probabilities
        
        # Get continuous action from logits
        continuous_action = model.output_to_executable_actions(logits)  # (1, action_dim)
        
        # Get predicted bin indices
        bin_indices = torch.argmax(logits, dim=-1)  # (1, action_dim)
        
        return {
            'logits': logits.cpu(),
            'probs': probs.cpu(),
            'continuous_action': continuous_action.cpu(),
            'bin_indices': bin_indices.cpu()
        }

def denormalize_image(image_tensor):
    """Convert normalized image tensor to displayable numpy array."""
    # Assuming ImageNet normalization was used
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
    
    img = image_tensor.cpu() * std + mean
    img = img.clamp(0, 1)
    img = img.permute(1, 2, 0).numpy()
    return img

In [None]:
# Get random samples
num_samples = 5
random_indices = np.random.choice(len(dataset), num_samples, replace=False)

# Create figure
fig, axes = plt.subplots(num_samples, 2, figsize=(15, 4*num_samples))
if num_samples == 1:
    axes = axes.reshape(1, -1)

action_names = ['vx', 'vy', 'vz', 'yaw_rate']

for i, idx in enumerate(random_indices):
    # Get sample
    sample = dataset[idx]
    image = sample['observation']
    true_action = sample['action']
    state = sample['state']
    
    # Predict action
    pred_output = predict_action(model, image, device)
    pred_action = pred_output['continuous_action'].squeeze(0)  # (action_dim,)
    probs = pred_output['probs'].squeeze(0)  # (action_dim, num_bins)
    
    # Display image
    ax_img = axes[i, 0]
    display_img = denormalize_image(image)
    ax_img.imshow(display_img)
    ax_img.set_title(f"Sample {idx}\nState: [{state[0]:.2f}, {state[1]:.2f}, {state[2]:.2f}]")
    ax_img.axis('off')
    
    # Display action comparison
    ax_action = axes[i, 1]
    x = np.arange(len(action_names))
    width = 0.35
    
    ax_action.bar(x - width/2, true_action.numpy(), width, label='Ground Truth', alpha=0.8)
    ax_action.bar(x + width/2, pred_action.numpy(), width, label='Predicted', alpha=0.8)
    
    ax_action.set_ylabel('Action Value')
    ax_action.set_title('Action Comparison')
    ax_action.set_xticks(x)
    ax_action.set_xticklabels(action_names)
    ax_action.legend()
    ax_action.grid(True, alpha=0.3)
    ax_action.axhline(y=0, color='k', linestyle='-', linewidth=0.5)
    
    # Add error text
    error = torch.abs(pred_action - true_action).mean()
    ax_action.text(0.02, 0.98, f'MAE: {error:.4f}', 
                   transform=ax_action.transAxes, verticalalignment='top',
                   bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.show()

## 4. Visualize Probability Distributions

In [None]:
# Pick one sample to analyze in detail
sample_idx = random_indices[0]
sample = dataset[sample_idx]
image = sample['observation']
true_action = sample['action']

# Get prediction
pred_output = predict_action(model, image, device)
probs = pred_output['probs'].squeeze(0)  # (action_dim, num_bins)
pred_action = pred_output['continuous_action'].squeeze(0)

# Convert true action to bins for visualization
true_bins = model.continuous_to_bins(true_action.unsqueeze(0)).squeeze(0)

# Create bin values for x-axis
bin_values = np.linspace(model.action_low, model.action_high, model.num_bins)

# Plot probability distributions for each action dimension
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.flatten()

for i, action_name in enumerate(action_names):
    ax = axes[i]
    
    # Plot probability distribution
    ax.bar(bin_values, probs[i].numpy(), width=(bin_values[1]-bin_values[0])*0.8, 
           alpha=0.6, label='Predicted Probability')
    
    # Mark predicted action
    ax.axvline(pred_action[i].item(), color='blue', linestyle='--', 
               linewidth=2, label=f'Predicted: {pred_action[i]:.3f}')
    
    # Mark true action
    ax.axvline(true_action[i].item(), color='red', linestyle='--', 
               linewidth=2, label=f'True: {true_action[i]:.3f}')
    
    ax.set_xlabel('Action Value')
    ax.set_ylabel('Probability')
    ax.set_title(f'{action_name} Distribution')
    ax.legend()
    ax.grid(True, alpha=0.3)
    ax.set_xlim(model.action_low - 0.05, model.action_high + 0.05)

plt.suptitle(f'Action Probability Distributions - Sample {sample_idx}', fontsize=14)
plt.tight_layout()
plt.show()

# Display the image for this sample
plt.figure(figsize=(6, 6))
plt.imshow(denormalize_image(image))
plt.title(f'Input Image - Sample {sample_idx}')
plt.axis('off')
plt.show()

## 5. Compute Overall Statistics

In [None]:
# Evaluate on a larger subset
num_eval_samples = min(100, len(dataset))
eval_indices = np.random.choice(len(dataset), num_eval_samples, replace=False)

all_true_actions = []
all_pred_actions = []

print(f"Evaluating on {num_eval_samples} samples...")
for idx in eval_indices:
    sample = dataset[idx]
    image = sample['observation']
    true_action = sample['action']
    
    pred_output = predict_action(model, image, device)
    pred_action = pred_output['continuous_action'].squeeze(0)
    
    all_true_actions.append(true_action.numpy())
    all_pred_actions.append(pred_action.numpy())

all_true_actions = np.array(all_true_actions)  # (N, action_dim)
all_pred_actions = np.array(all_pred_actions)  # (N, action_dim)

# Compute metrics
mae = np.abs(all_true_actions - all_pred_actions).mean(axis=0)
mse = ((all_true_actions - all_pred_actions)**2).mean(axis=0)
rmse = np.sqrt(mse)

print("\n" + "="*60)
print(f"Evaluation Results on {num_eval_samples} samples")
print("="*60)
print(f"{'Action':<12} {'MAE':<12} {'RMSE':<12}")
print("-"*60)
for i, action_name in enumerate(action_names):
    print(f"{action_name:<12} {mae[i]:<12.4f} {rmse[i]:<12.4f}")
print("-"*60)
print(f"{'Overall':<12} {mae.mean():<12.4f} {rmse.mean():<12.4f}")
print("="*60)

## 6. Visualize Error Distribution

In [None]:
# Plot error distributions
errors = all_pred_actions - all_true_actions

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.flatten()

for i, action_name in enumerate(action_names):
    ax = axes[i]
    
    # Histogram of errors
    ax.hist(errors[:, i], bins=30, alpha=0.7, edgecolor='black')
    ax.axvline(0, color='red', linestyle='--', linewidth=2, label='Zero Error')
    ax.axvline(errors[:, i].mean(), color='green', linestyle='--', 
               linewidth=2, label=f'Mean: {errors[:, i].mean():.4f}')
    
    ax.set_xlabel('Prediction Error')
    ax.set_ylabel('Frequency')
    ax.set_title(f'{action_name} Error Distribution\nStd: {errors[:, i].std():.4f}')
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.suptitle('Prediction Error Distributions', fontsize=14)
plt.tight_layout()
plt.show()

## 7. Test on Custom Image (Optional)

Load your own image and see what action the model predicts.

In [None]:
def predict_from_image_path(image_path, model, device):
    """Load an image from disk and predict action."""
    # Load and preprocess image
    img = Image.open(image_path).convert('RGB')
    
    # Apply same transforms as dataset
    transform = transforms.Compose([
        transforms.Resize(cfg.dataset.image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                           std=[0.229, 0.224, 0.225])
    ])
    
    img_tensor = transform(img)
    
    # Predict
    pred_output = predict_action(model, img_tensor, device)
    pred_action = pred_output['continuous_action'].squeeze(0)
    probs = pred_output['probs'].squeeze(0)
    
    # Display
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
    
    # Show image
    ax1.imshow(img)
    ax1.set_title('Input Image')
    ax1.axis('off')
    
    # Show predicted action
    ax2.bar(action_names, pred_action.numpy())
    ax2.set_ylabel('Action Value')
    ax2.set_title('Predicted Action')
    ax2.grid(True, alpha=0.3)
    ax2.axhline(y=0, color='k', linestyle='-', linewidth=0.5)
    
    plt.tight_layout()
    plt.show()
    
    print("Predicted action:")
    for name, val in zip(action_names, pred_action.numpy()):
        print(f"  {name}: {val:.4f}")
    
    return pred_action, probs

# Example usage (uncomment and provide your own image path):
# custom_image_path = "path/to/your/image.png"
# pred_action, probs = predict_from_image_path(custom_image_path, model, device)

## 8. Save Predictions (Optional)

Save predictions to a file for further analysis.

In [None]:
# Save results to JSON
results = {
    'experiment_dir': str(EXPERIMENT_DIR),
    'model_path': str(model_path),
    'num_samples_evaluated': num_eval_samples,
    'metrics': {
        'mae_per_action': {name: float(mae[i]) for i, name in enumerate(action_names)},
        'rmse_per_action': {name: float(rmse[i]) for i, name in enumerate(action_names)},
        'overall_mae': float(mae.mean()),
        'overall_rmse': float(rmse.mean())
    },
    'config': OmegaConf.to_container(cfg, resolve=True)
}

# Save to file
output_path = EXPERIMENT_DIR / "inference_results.json"
with open(output_path, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\nResults saved to: {output_path}")