In [3]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

from torch.utils.tensorboard import SummaryWriter

from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Clear CUDA cache
torch.cuda.empty_cache()

In [None]:
# Data Preparation
# Read and combine CSV files
file_paths = [
    '/home/jovyan/userdata/ssajja2s/data_files/data_with_images/go1_images_with_data_01.csv',
    '/home/jovyan/userdata/ssajja2s/data_files/data_with_images/go1_images_with_data_02.csv'
]
dfs = [pd.read_csv(file_path) for file_path in file_paths]
df_combined = pd.concat(dfs, ignore_index=True)


# Split observation state data and output (tau_data)
obs_state_data = df_combined.iloc[:, :-12].drop('time', axis=1)
tau_data = df_combined.iloc[:, -12:]

# Further split observation state data into input and output
out_d = obs_state_data.iloc[:, :10]  # First 10 columns
in_d = obs_state_data.iloc[:, 10:]  # Remaining columns

# Load and preprocess images from multiple directories
image_dirs = [
    '/home/jovyan/userdata/ssajja2s/data_files/images_trj1',
    '/home/jovyan/userdata/ssajja2s/data_files/images_trj2'
]

# Collect all image file paths
image_files = []
for image_dir in image_dirs:
    image_files.extend(sorted(os.listdir(image_dir)))

image_size = (200, 200)
images = []

print(f"Number of image files: {len(image_files)}")
print(f"Input shape: {in_d.shape}")
print(f"Output shape: {out_d.shape}")

# Process and resize images
for image_dir in image_dirs:
    for image_file in sorted(os.listdir(image_dir)):
        img_path = os.path.join(image_dir, image_file)
        img = Image.open(img_path).convert('RGB')
        img = img.resize(image_size)
        img_array = np.array(img)
        images.append(img_array)

# Convert images list to NumPy array
images_data = np.array(images)

# Verify the shapes of images, input, and output data
print(f"Images shape: {images_data.shape}")  # Expected: (num_images, 200, 200, 3)

# Convert observation state data to NumPy arrays
in_d = in_d.to_numpy()
out_d = out_d.to_numpy()

# Convert all data to PyTorch tensors
images_data = torch.tensor(images_data, dtype=torch.float32)
in_d = torch.tensor(in_d, dtype=torch.float32)
out_d = torch.tensor(out_d, dtype=torch.float32)

# Normalize image tensor
images_data = images_data / 255.0  # Normalize pixel values to [0, 1]

# Ensure images match the dataset
assert len(images) == len(image_files), "Mismatch between processed images and file count."

# Create image indices for all observations
observations_per_image = len(df_combined) // len(image_files)
image_indices = [min(i // observations_per_image, len(image_files) - 1) for i in range(len(df_combined))]

# Verify correctness of image indices
assert len(image_indices) == len(df_combined), "Mismatch in lengths of image_indices and dataset"
assert max(image_indices) < len(image_files), "Index out of bounds in image_indices"
assert min(image_indices) >= 0, "Negative index found in image_indices"

# Print final checks
print(f"Final Images tensor shape: {images_data.shape}")
print(f"Input tensor shape: {in_d.shape}")
print(f"Output tensor shape: {out_d.shape}")


In [None]:
# Dataset Class
class HybridDataset(Dataset):
    def __init__(self, images, observations, states, image_indices, transform=None):
        self.images = images
        self.observations = observations
        self.states = states
        self.image_indices = image_indices
        self.transform = transform

    def __len__(self):
        return len(self.observations)

    def __getitem__(self, idx):
        image_idx = self.image_indices[idx]  # Map observation index to image index
        image = self.images[image_idx]
        observation = self.observations[idx]
        state = self.states[idx]

        # Change image format from (H, W, C) to (C, H, W)
        image = image.permute(2, 0, 1)
        
        if self.transform:
            image = self.transform(image)
        
        return image, observation, state

# Define transformations for image normalization
transform = transforms.Compose([
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    in_d, out_d, test_size=0.1, random_state=42
)

# Split image indices for train and test datasets
train_image_indices = image_indices[:len(X_train)]
test_image_indices = image_indices[len(X_train):]


In [None]:
# Create train and test datasets
train_dataset = HybridDataset(images_data, X_train, y_train, train_image_indices, transform=transform)
test_dataset = HybridDataset(images_data, X_test, y_test, test_image_indices, transform=transform)


# Define batch size
batch_size = 16

# Create DataLoaders for each dataset
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Define the HybridNN class
class HybridNN(nn.Module):
    def __init__(self, input_obs_size=34, output_size=10):
        super(HybridNN, self).__init__()

        # Image Network: 3 ConvNet branches
        self.conv1 = nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=3)
        self.conv2 = nn.Conv2d(3, 32, kernel_size=8, stride=2, padding=3)
        self.conv3 = nn.Conv2d(3, 32, kernel_size=8, stride=1, padding=3)

        # Dynamically calculate flattened size
        sample_input = torch.zeros(1, 3, 200, 200)  # Dummy input (batch_size=1, 3 channels, 200x200)
        conv1_out = self.conv1(sample_input)
        conv2_out = self.conv2(sample_input)
        conv3_out = self.conv3(sample_input)
        
        self.conv_out_dim = (
            torch.flatten(conv1_out, 1).shape[1] + 
            torch.flatten(conv2_out, 1).shape[1] + 
            torch.flatten(conv3_out, 1).shape[1]
        )

        # Fully connected layers for image features
        self.fc1 = nn.Linear(self.conv_out_dim, 64)
        self.fc2 = nn.Linear(64, 64)

        # Observation Network
        self.obs_fc1 = nn.Linear(input_obs_size, 64)
        self.obs_fc2 = nn.Linear(64, 64)

        # Combined Network
        self.fc_combined = nn.Linear(64 + 64, 128)
        self.output_layer = nn.Linear(128, output_size)

    def forward(self, image, observation):
        # Image processing
        x1 = torch.relu(self.conv1(image))
        x1 = torch.flatten(x1, 1)

        x2 = torch.relu(self.conv2(image))
        x2 = torch.flatten(x2, 1)

        x3 = torch.relu(self.conv3(image))
        x3 = torch.flatten(x3, 1)

        image_features = torch.cat([x1, x2, x3], dim=1)
        image_features = torch.relu(self.fc1(image_features))
        image_features = torch.relu(self.fc2(image_features))

        # Observation processing
        obs_features = torch.relu(self.obs_fc1(observation))
        obs_features = torch.relu(self.obs_fc2(obs_features))

        # Combine image and observation features
        combined_features = torch.cat([image_features, obs_features], dim=1)
        x = torch.relu(self.fc_combined(combined_features))

        # Output layer
        output = self.output_layer(x)
        return output

In [None]:
# Instantiate the model
model = HybridNN(input_obs_size=in_d.shape[1], output_size=out_d.shape[1]).to(device)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-6, weight_decay=1e-5)


In [None]:
# Create a SummaryWriter to log data for TensorBoard
writer = SummaryWriter('runs/test_image_data')

# Training Loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_train_loss = 0.0

    for batch_idx, (images, observations, states) in enumerate(train_loader):
        # Move data to device
        images = images.to(device)
        observations = observations.to(device)
        states = states.to(device)

        # Forward pass
        outputs = model(images, observations)

        # Calculate loss
        loss = criterion(outputs, states)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_train_loss += loss.item()

    avg_train_loss = running_train_loss / len(train_loader)
    writer.add_scalar('Loss/train', avg_train_loss, epoch)

    model.eval()
    running_test_loss = 0.0
    with torch.no_grad():
        for images, observations, states in test_loader:
            images = images.to(device)
            observations = observations.to(device)
            states = states.to(device)

            outputs = model(images, observations)
            loss = criterion(outputs, states)
            running_test_loss += loss.item()

    avg_test_loss = running_test_loss / len(test_loader)
    writer.add_scalar('Loss/val', avg_test_loss, epoch)

    print(f"Epoch [{epoch + 1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_test_loss:.4f}")

writer.close()

In [None]:
# evaluate the model
model.eval()

# Define paths for evaluation data and images
eval_path = '/home/jovyan/userdata/ssajja2s/data_files/eval_data/go1_images_with_data_e.csv'
eval_image_path = '/home/jovyan/userdata/ssajja2s/data_files/images_e'

# Load evaluation data
eval_data = pd.read_csv(eval_path)
eval_obs_state_data = eval_data.iloc[:, :-12].drop('time', axis=1)
eval_tau_data = eval_data.iloc[:, -12:]

# Further split observation state data into input and output
eval_out_d = eval_obs_state_data.iloc[:, :10]  # First 10 columns
eval_in_d = eval_obs_state_data.iloc[:, 10:]  # Remaining columns

# Load and preprocess evaluation images
eval_images = []
for image_file in sorted(os.listdir(eval_image_path)):
    img_path = os.path.join(eval_image_path, image_file)
    img = Image.open(img_path).convert('RGB')
    img = img.resize(image_size)
    img_array = np.array(img)
    eval_images.append(img_array)

# Convert evaluation images list to NumPy array
eval_images_data = np.array(eval_images)

# Convert evaluation observation state data to NumPy arrays
eval_in_d = eval_in_d.to_numpy()
eval_out_d = eval_out_d.to_numpy()

# Convert all evaluation data to PyTorch tensors
eval_images_data = torch.tensor(eval_images_data, dtype=torch.float32)
eval_in_d = torch.tensor(eval_in_d, dtype=torch.float32)
eval_out_d = torch.tensor(eval_out_d, dtype=torch.float32)

# Normalize evaluation image tensor
eval_images_data = eval_images_data / 255.0  # Normalize pixel values to [0, 1]

# Ensure evaluation images match the dataset
assert len(eval_images) == len(os.listdir(eval_image_path)), "Mismatch between processed evaluation images and file count."

# Create image indices for all evaluation observations
eval_observations_per_image = len(eval_data) // len(os.listdir(eval_image_path))
eval_image_indices = [min(i // eval_observations_per_image, len(os.listdir(eval_image_path)) - 1) for i in range(len(eval_data))]

# Verify correctness of evaluation image indices
assert len(eval_image_indices) == len(eval_data), "Mismatch in lengths of evaluation image_indices and dataset"
assert max(eval_image_indices) < len(os.listdir(eval_image_path)), "Index out of bounds in evaluation image_indices"
assert min(eval_image_indices) >= 0, "Negative index found in evaluation image_indices"

# Print final checks
print(f"Final Evaluation Images tensor shape: {eval_images_data.shape}")
print(f"Evaluation Input tensor shape: {eval_in_d.shape}")
print(f"Evaluation Output tensor shape: {eval_out_d.shape}")

# Create evaluation dataset
eval_dataset = HybridDataset(eval_images_data, eval_in_d, eval_out_d, eval_image_indices, transform=transform)

# Define batch size for evaluation
eval_batch_size = 16

# Create DataLoader for evaluation dataset
eval_loader = DataLoader(eval_dataset, batch_size=eval_batch_size, shuffle=False)

# Create a new SummaryWriter for evaluation logs
eval_writer = SummaryWriter('runs/test_image_data')

# Evaluation loop and log the results for model predictions to actual values 
# Evaluation loop and log the results for model predictions to actual values 
with torch.no_grad():  # Disable gradient computation for evaluation
    for batch_idx, (images, observations, states) in enumerate(eval_loader):
        # Move data to device
        images = images.to(device)
        observations = observations.to(device)
        states = states.to(device)

        # Forward pass to get predictions
        outputs = model(images, observations)

        # Convert to NumPy for logging
        actual_states = states.detach().cpu().numpy()
        predicted_states = outputs.detach().cpu().numpy()

        num_states = predicted_states.shape[1]  # Get number of states (e.g., joints)

        state_names = [
            'base_pos_x', 'base_pos_y', 'base_pos_z',
            'base_ori_x', 'base_ori_y', 'base_ori_z', 'base_ori_w',
            'base_vel_x', 'base_vel_y', 'base_vel_z'
        ]

        # Log actual and predicted values for each state
        for i in range(num_states):
            eval_writer.add_scalars(state_names[i], {
                'Actual value': actual_states[:, i],  
                'Predicted value': predicted_states[:, i]  # Use predictions directly for logging
            }, global_step=batch_idx)

# Close the SummaryWriter after evaluation
eval_writer.close()

print("Evaluation completed.")


