In [1]:
import json
import os
from pycocotools.coco import COCO
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim

# Paths
annotation_file = r"C:/Users/natha/OneDrive/Desktop/Applied Machine Learning/Run-Through_Switch/annotations/instances_default.json"
image_dir = r"C:/Users/natha/OneDrive/Desktop/Applied Machine Learning/Run-Through_Switch/images"
output_dir = r"C:/Users/natha/OneDrive/Desktop/Applied Machine Learning/Run-Through_Switch/processed_data"

# Load annotations
coco = COCO(annotation_file)

# Define a custom dataset for loading images and their annotations
class RailroadTrackDataset(Dataset):
    def __init__(self, coco, image_dir, transform=None):
        self.coco = coco
        self.image_dir = image_dir
        self.transform = transform
        self.img_ids = coco.getImgIds()  # Get all image IDs
        
        # Map category IDs to labels
        self.label_map = {
            "open_switch": 0,
            "closed_swtich": 1,
            "straight": 2
        }

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        img_info = self.coco.loadImgs(img_id)[0]
        img_path = os.path.join(self.image_dir, img_info['file_name'])
        
        # Load image
        image = Image.open(img_path).convert('RGB')
        
        # Get annotations (category IDs and bounding boxes)
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anns = self.coco.loadAnns(ann_ids)
        
        # Check if annotations exist for this image
        if len(anns) == 0:
            print(f"Warning: No annotations for image {img_info['file_name']}, skipping.")
            return None  # Skip this sample (or you could return a default image/label if needed)
        
        # Get the category label (assuming a single label per image)
        category_id = anns[0]['category_id']
        category_name = self.coco.loadCats(category_id)[0]['name']
        label = self.label_map[category_name]

        # Apply transformations (if any)
        if self.transform:
            image = self.transform(image)
        
        return image, label

# Define transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create dataset and dataloaders
def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))  # Filter out None samples
    return torch.utils.data.dataloader.default_collate(batch)

dataset = RailroadTrackDataset(coco, image_dir, transform)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

# Define a CNN model for classification
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 3)  # 3 classes: open_switch, closed_swtich, straight
        
    def forward(self, x):
        x = nn.ReLU()(self.conv1(x))
        x = nn.MaxPool2d(2)(x)
        x = nn.ReLU()(self.conv2(x))
        x = nn.MaxPool2d(2)(x)
        x = nn.ReLU()(self.conv3(x))
        x = nn.MaxPool2d(2)(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = nn.ReLU()(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model, loss, and optimizer
model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Set up device (GPU or CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Train the model
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    
    for images, labels in train_loader:
        if images is None or labels is None:
            continue  # Skip this batch if it's None
        
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        _, predicted = torch.max(outputs, 1)
        correct_preds += (predicted == labels).sum().item()
        total_preds += labels.size(0)
    
    epoch_loss = running_loss / len(train_loader)
    accuracy = 100 * correct_preds / total_preds
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%")

# Save the trained model
torch.save(model.state_dict(), "railroad_track_cnn.pth")


loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
Epoch [1/10], Loss: 1.0585, Accuracy: 39.13%
Epoch [2/10], Loss: 0.9395, Accuracy: 55.90%
Epoch [3/10], Loss: 0.9017, Accuracy: 52.48%
Epoch [4/10], Loss: 0.9105, Accuracy: 53.73%
Epoch [5/10], Loss: 0.8209, Accuracy: 56.83%
Epoch [6/10], Loss: 0.7488, Accuracy: 64.29%
Epoch [7/10], Loss: 0.7008, Accuracy: 69.25%
Epoch [8/10], Loss: 0.6179, Accuracy: 74.53%
Epoch [9/10], Loss: 0.4688, Accuracy: 80.43%
Epoch [10/10], Loss: 0.3998, Accuracy: 85.09%


test model

In [5]:
import torch
from torchvision import transforms
from PIL import Image
import os

# Step 1: Reinitialize the model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 3)  # 3 classes: open_switch, closed_swtich, straight

    def forward(self, x):
        x = nn.ReLU()(self.conv1(x))
        x = nn.MaxPool2d(2)(x)
        x = nn.ReLU()(self.conv2(x))
        x = nn.MaxPool2d(2)(x)
        x = nn.ReLU()(self.conv3(x))
        x = nn.MaxPool2d(2)(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = nn.ReLU()(self.fc1(x))
        x = self.fc2(x)
        return x

# Step 2: Load the trained model weights
model = CNNModel()
model.load_state_dict(torch.load('railroad_track_cnn.pth'))
model.eval()  # Set model to evaluation mode

# Step 3: Define the same transformations used during training
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Step 4: Load a test image
test_image_path = r"C:/Users/natha/Downloads/test2.jpg"  # Replace with your test image path
image = Image.open(test_image_path).convert('RGB')

# Apply the same transformations to the image
image = transform(image).unsqueeze(0)  # Add batch dimension

# Step 5: Make predictions
with torch.no_grad():  # Disable gradient computation for inference
    output = model(image)  # Get model output
    _, predicted = torch.max(output, 1)  # Get the class with the highest score

# Step 6: Map the prediction index to the corresponding class name
label_map = {0: 'open_switch', 1: 'closed_swtich', 2: 'straight'}
predicted_class = label_map[predicted.item()]

# Step 7: Output the prediction
print(f"Predicted Class: {predicted_class}")


Predicted Class: straight


  model.load_state_dict(torch.load('railroad_track_cnn.pth'))
