<h1>1. Import Libraries</h1>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import os
import time
import numpy as np
import os
import cv2

<h1>2. Data Preprocessing</h1>

In [2]:
# Function to load and preprocess the dataset
def load_data(csv_file, img_dir, transform=None):
    data = pd.read_csv(csv_file)
    images, physical_features, labels = [], [], []

    for idx in range(len(data)):
        img_id = str(data.iloc[idx]['id'])
        img_path = os.path.join(img_dir, f"{img_id}.jpg")
        image = Image.open(img_path).convert("RGB")
        
        if transform:
            image = transform(image)
        
        stable_height = int(data.iloc[idx]['stable_height']) - 1  # zero-based class index
        block_type = data.iloc[idx]['type']
        cam_angle = data.iloc[idx]['cam_angle']

        images.append(image)
        physical_features.append([block_type, cam_angle])
        labels.append(stable_height)
    
    return images, torch.tensor(physical_features, dtype=torch.float32), torch.tensor(labels)


<h1>3. Model Definition</h1>

In [3]:
import torch
import torch.nn as nn
from torchvision import models

# Define the model
class CustomResNetWithFeatures(nn.Module):
    def __init__(self, num_types, num_classes):
        super(CustomResNetWithFeatures, self).__init__()
        self.resnet = models.resnet18(pretrained=True)  # Using ResNet18
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, 128)  # Modify ResNet output to 128 features

        # Embedding for block type
        self.type_embedding = nn.Embedding(num_types, 10)  # Embedding for block types
        
        # Fully connected layers for classification
        self.fc1 = nn.Linear(128 + 10 + 1, 64)  # Combine image features, block type embedding, and cam_angle
        self.fc2 = nn.Linear(64, num_classes)  # Final output for classification (logits for each class)

    def forward(self, x, block_type, cam_angle):
        x = self.resnet(x)  # Pass through ResNet
        type_embed = self.type_embedding(block_type)  # Get block type embeddings
        cam_angle = cam_angle.unsqueeze(1)  # Ensure cam_angle has correct shape
        
        # Concatenate features
        combined = torch.cat((x, type_embed, cam_angle), dim=1)
        
        x = torch.relu(self.fc1(combined))  # Pass through the first fully connected layer
        logits = self.fc2(x)  # Get final class logits
        return logits

# Update get_model function to return the model
def get_model(num_types, num_classes):
    return CustomResNetWithFeatures(num_types, num_classes)


<h1>4. Training Function</h1>

In [4]:
def train_model(csv_file_path, images_dir_path, num_types, num_classes=6, num_epochs=12, batch_size=16, model_save_path="model.pth"):
    # Define image transformations
    transform = transforms.Compose([
        transforms.RandomRotation(15),
        transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Load data
    images, physical_features, labels = load_data(csv_file_path, images_dir_path, transform)
    dataset = list(zip(images, physical_features, labels))
    
    # Use DataLoader without the need to stack tensors manually
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Initialize model and optimizer
    model = get_model(num_types, num_classes)
    criterion = nn.CrossEntropyLoss()  # For classification
    optimizer = optim.Adam(model.parameters(), lr=0.0002, weight_decay=1e-4)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # Training loop
    for epoch in range(num_epochs):
        running_loss = 0.0
        running_correct = 0
        model.train()

        for batch in dataloader:
            images, physical_features, stable_heights = batch
            images = images.to(device)  # No need to stack, already a tensor
            block_type = (physical_features[:, 0] - 1).long().to(device)
            cam_angle = physical_features[:, 1].to(device)
            stable_heights = stable_heights.to(device)

            optimizer.zero_grad()
            logits = model(images, block_type, cam_angle)
            loss = criterion(logits, stable_heights)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted_classes = torch.max(logits, 1)
            running_correct += (predicted_classes == stable_heights).sum().item()

        accuracy = running_correct / len(dataloader.dataset)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader.dataset):.4f}, Accuracy: {accuracy:.4f}")

        # Save the model after every epoch
        torch.save(model.state_dict(), model_save_path)
        print(f"Model saved after Epoch {epoch+1} to {model_save_path}")
    
    return model

<h1>5. Saving and Loading Model</h1>

In [5]:
# Function to save the model
def save_model(model, model_path):
    torch.save({'model_state_dict': model.state_dict()}, model_path)

# Function to load the model
def load_model(model_path, num_types):
    model = get_model(num_types, num_classes=6)
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()  # Set to evaluation mode
    return model


<h1>7. Running the Training</h1>

In [7]:
def load_model(model_save_path, num_types, num_classes):
    # Initialize the model
    model = get_model(num_types, num_classes)
    
    # Load the saved model state dict
    model.load_state_dict(torch.load(model_save_path))
    model.eval()  # Set the model to evaluation mode (important for inference)
    
    return model


In [8]:
csv_file_path = '../COMP90086_2024_Project_train/train.csv'  # Path to your training CSV
images_dir_path = '../COMP90086_2024_Project_train/train'  # Path to your training images
model = train_model(csv_file_path, images_dir_path, num_types=2, num_classes=6, model_save_path='model_2.pth')




Epoch [1/12], Loss: 0.0902, Accuracy: 0.3727
Model saved after Epoch 1 to model_2.pth
Epoch [2/12], Loss: 0.0764, Accuracy: 0.4910
Model saved after Epoch 2 to model_2.pth
Epoch [3/12], Loss: 0.0664, Accuracy: 0.5734
Model saved after Epoch 3 to model_2.pth
Epoch [4/12], Loss: 0.0569, Accuracy: 0.6503
Model saved after Epoch 4 to model_2.pth
Epoch [5/12], Loss: 0.0461, Accuracy: 0.7219
Model saved after Epoch 5 to model_2.pth
Epoch [6/12], Loss: 0.0341, Accuracy: 0.8064
Model saved after Epoch 6 to model_2.pth
Epoch [7/12], Loss: 0.0257, Accuracy: 0.8491
Model saved after Epoch 7 to model_2.pth
Epoch [8/12], Loss: 0.0184, Accuracy: 0.8977
Model saved after Epoch 8 to model_2.pth
Epoch [9/12], Loss: 0.0138, Accuracy: 0.9271
Model saved after Epoch 9 to model_2.pth
Epoch [10/12], Loss: 0.0118, Accuracy: 0.9342
Model saved after Epoch 10 to model_2.pth
Epoch [11/12], Loss: 0.0089, Accuracy: 0.9520
Model saved after Epoch 11 to model_2.pth
Epoch [12/12], Loss: 0.0094, Accuracy: 0.9493
Mode

In [10]:
model_ = load_model('model_2.pth', num_types=2, num_classes=6)

  model.load_state_dict(torch.load(model_save_path))


In [11]:
def load_images_from_folder(folder_path, ids, image_size=(224, 224)):
    images = []
    for id in ids:
        filename = str(int(id)) + '.jpg'

        # Load the image using OpenCV
        img_path = os.path.join(folder_path, filename)
        img = cv2.imread(img_path)

        # Resize the image to the target size (224, 224)
        img = cv2.resize(img, image_size)

        # Convert BGR (OpenCV default) to RGB if needed (TensorFlow models use RGB)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Normalize pixel values to the range [0, 1]
        img_array = img / 255.0

        images.append(img_array)

    # Convert the list of images to a NumPy array with shape (n, 224, 224, 3)
    return np.array(images)

folder_path = '../COMP90086_2024_Project_test/test'
test = np.genfromtxt('../COMP90086_2024_Project_test/test.csv', delimiter=',', skip_header=1)
ids = np.array(test, dtype=int)
test_image_data = load_images_from_folder(folder_path, ids)

# Predict stable height on the test set

In [22]:
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
import os

# 1. Load the saved model
def load_model(model_path, num_types, num_classes):
    model = CustomResNetWithFeatures(num_types, num_classes)
    model.load_state_dict(torch.load(model_path))
    model.eval()  # Set the model to evaluation mode for inference
    return model

# 2. Preprocess the image
def preprocess_image(image_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize to match training size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Apply the same normalization used during training
    ])
    
    # Load image using PIL
    image = Image.open(image_path).convert('RGB')
    
    # Apply transformations and return a batch with a single image (unsqueeze to add batch dimension)
    return transform(image).unsqueeze(0)

# 3. Make a prediction
def predict_image(model, image_tensor, block_type, cam_angle):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    image_tensor = image_tensor.to(device)
    block_type = torch.tensor([block_type]).to(device).long()  # Pass as tensor
    cam_angle = torch.tensor([cam_angle]).to(device).float()   # Pass as tensor
    
    with torch.no_grad():  # Disable gradient calculations for inference
        logits = model(image_tensor, block_type, cam_angle)
        predicted_class = torch.argmax(logits, dim=1).item() + 1  # Convert prediction to class (add 1 for class range [1-6])
    
    return predicted_class

# 4. Example usage
model_path = 'model_2.pth'  # Path to the saved model
folder_path = '../COMP90086_2024_Project_test/test'  # Path to the input image
num_types = 2  # Adjust based on your dataset
num_classes = 6  # Number of stable_height classes

# Load model
model = load_model(model_path, num_types, num_classes)

predictions = {}
predictions["id"] = []
predictions["stable_height"] = []

for image in os.listdir(folder_path):
    image_id = image.strip(".jpg")
    image_tensor = preprocess_image(f"{folder_path}/{image}")


    # Predict stable height (assuming default block_type=1, cam_angle=0.0, adjust as needed)
    predicted_class = predict_image(model, image_tensor, block_type=1, cam_angle=0.0)

    print(f"{image_id}: {predicted_class}")
    predictions["id"].append(image_id)
    predictions["stable_height"].append(predicted_class-1)


  model.load_state_dict(torch.load(model_path))


100156: 1
100357: 2
100998: 2
102119: 1
102131: 4
102213: 3
10265: 1
103351: 3
103671: 1
104154: 2
104298: 5
104408: 4
105572: 1
105753: 3
106117: 2
106155: 3
106207: 2
106615: 3
106839: 1
107131: 1
107154: 1
108129: 5
108162: 6
108306: 1
108313: 5
109548: 5
110737: 2
111486: 3
111571: 3
111936: 3
112148: 3
112408: 3
11329: 2
114074: 2
114456: 3
11485: 2
115147: 1
116446: 2
116758: 3
117059: 6
117554: 3
117913: 2
118233: 2
118392: 3
118644: 6
119874: 3
120797: 2
121757: 2
123349: 6
123544: 3
123994: 5
125981: 3
126835: 2
127129: 3
128888: 6
129188: 3
129287: 4
129464: 4
129853: 3
129905: 3
130200: 2
130227: 5
130325: 4
130451: 3
130956: 1
131307: 2
131690: 4
131853: 3
132664: 2
132993: 2
133736: 2
133791: 2
133859: 1
134412: 3
135144: 2
137131: 2
138612: 6
138697: 2
13901: 3
139770: 4
140013: 4
140787: 1
142146: 2
143614: 1
143947: 3
144874: 3
145326: 3
146340: 2
146432: 6
146463: 1
147577: 5
147836: 3
148326: 4
148338: 2
148342: 3
148680: 1
149231: 3
149364: 5
149931: 3
150322: 1
1514

In [23]:
df = pd.DataFrame(predictions)
df.to_csv("pred.csv", index=False)