Install Dependencies

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

Load Dataset

In [9]:
# 1. Dataset Loading (EXACTLY same as your original)
def load_dataset(txt_file):
    data, labels = [], []
    with open(txt_file, 'r') as file:
        for line in file.readlines():
            image_path = line.strip()
            if os.path.exists(image_path):
                label = image_path.split('/')[1] 
                data.append(image_path)
                labels.append(label)
    return pd.DataFrame({'image_path': data, 'label': labels})

Define CNN Model

In [10]:
# 2. Simple CNN Model (minimal implementation)
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc = nn.Linear(32*16*16, num_classes)  # Adjusted for 64x64 input
        
    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

Preprocess Data

In [11]:
# 3. Image Preprocessing (matches your resize to 64x64)
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, (64, 64))
    image = image.transpose(2, 0, 1)  # HWC to CHW
    return torch.FloatTensor(image) / 255.0  # Normalize to [0,1]

Results

In [20]:
# 4. Main Workflow (matches your original structure)

lr = 0.0001  # Learning rate (same as original)
epochs = 20  # Number of epochs (same as original)

if __name__ == '__main__':
    # Load datasets (same as original)
    train_df = load_dataset("train.txt")
    test_df = load_dataset("val.txt")  # Using val.txt as test per your original
    
    # Prepare data (simplified version of your workflow)
    X_train = torch.stack([preprocess_image(path) for path in train_df['image_path']])
    X_test = torch.stack([preprocess_image(path) for path in test_df['image_path']])
    
    # Encode labels (same as original)
    encoder = LabelEncoder()
    y_train = encoder.fit_transform(train_df['label'])
    y_test = encoder.transform(test_df['label'])
    
    # Convert to PyTorch tensors
    y_train = torch.LongTensor(y_train)
    y_test = torch.LongTensor(y_test)
    
    # Create datasets
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    test_dataset = torch.utils.data.TensorDataset(X_test, y_test)
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
    
    # Initialize model
    model = SimpleCNN(num_classes=len(encoder.classes_))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    # Training loop (basic version)
    for epoch in range(epochs):
        model.train()
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
    # Evaluation (matches your original metrics)
    model.eval()
    with torch.no_grad():
        test_outputs = model(X_test)
        _, predicted = torch.max(test_outputs, 1)
        accuracy = (predicted == y_test).float().mean()
        
    print(f"\nResults [LR={lr}, BS={32}, Epochs={epochs}]:")
    print(f"Test Accuracy: {accuracy.item():.4f}")


    # Generate classification report (added)
    y_true = y_test.numpy()
    y_pred = predicted.numpy()
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, target_names=encoder.classes_))
    
    # Save predictions (same format as original)
    test_df['Predicted Label'] = encoder.inverse_transform(predicted.numpy())
    test_df.to_csv("test_predictions_cnn.csv", index=False)


Results [LR=0.0001, BS=32, Epochs=20]:
Test Accuracy: 0.9100

Classification Report:
                precision    recall  f1-score   support

 museum-indoor       0.95      0.87      0.91       100
museum-outdoor       0.88      0.95      0.91       100

      accuracy                           0.91       200
     macro avg       0.91      0.91      0.91       200
  weighted avg       0.91      0.91      0.91       200

