In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch.nn.functional as F
from PIL import Image
import json
import numpy as np
import os

# Define the CNN Architecture
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(128 * 8 * 8, 256)  # Assuming input images are 64x64
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)  # Output layer for 10 classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Create a Custom Dataset Class
class ARCDataset(Dataset):
    def __init__(self, inputs, outputs, transform=None):
        self.inputs = inputs
        self.outputs = outputs
        self.transform = transform

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_image = self.inputs[idx]
        output_image = self.outputs[idx]
        if self.transform:
            input_image = self.transform(input_image)
            output_image = self.transform(output_image)
        return input_image, output_image

# Load and preprocess ARC-AGI data
def load_arc_data(json_dir):
    inputs = []
    outputs = []
    for filename in os.listdir(json_dir):
        if filename.endswith('.json'):
            with open(os.path.join(json_dir, filename), 'r') as f:
                data = json.load(f)
                for key in ['train', 'test']:
                    for item in data.get(key, []):
                        input_array = np.array(item['input'], dtype=np.float32)
                        output_array = np.array(item['output'], dtype=np.float32)
                        # Convert to PIL images
                        input_image = Image.fromarray(input_array)
                        output_image = Image.fromarray(output_array)
                        inputs.append(input_image)
                        outputs.append(output_image)
    return inputs, outputs

# Define transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize images to 64x64 (adjust as needed)
    transforms.Grayscale(),       # Ensure image is in grayscale
    transforms.ToTensor(),        # Convert PIL images to tensors
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize to [0, 1] range
])

# Define hyperparameters
batch_size = 32
learning_rate = 0.001
num_epochs = 10

# Load data
json_dir = '../data/training'  # Update this path
inputs, outputs = load_arc_data(json_dir)

# Apply transformations to images
inputs_transformed = [transform(img) for img in inputs]
outputs_transformed = [transform(img) for img in outputs]

# Convert lists of tensors to PyTorch tensors
inputs_tensor = torch.stack(inputs_transformed)
outputs_tensor = torch.stack(outputs_transformed)

# Split data into training and validation sets
indices = np.arange(len(inputs_tensor))
np.random.shuffle(indices)
split_idx = int(len(inputs_tensor) * 0.8)  # 80% training, 20% validation

train_indices, val_indices = indices[:split_idx], indices[split_idx:]
train_inputs, val_inputs = inputs_tensor[train_indices], inputs_tensor[val_indices]
train_outputs, val_outputs = outputs_tensor[train_indices], outputs_tensor[val_indices]

# Create DataLoader instances
train_dataset = ARCDataset(train_inputs, train_outputs, transform=transform)
val_dataset = ARCDataset(val_inputs, val_outputs, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Initialize model, loss function, and optimizer
model = SimpleCNN()
criterion = nn.MSELoss()  # Assuming output is continuous; use CrossEntropyLoss if outputs are categorical
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, outputs in train_loader:
        optimizer.zero_grad()
        predictions = model(inputs)
        loss = criterion(predictions, outputs)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}')

# Evaluate the Model
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, outputs in val_loader:
        predictions = model(inputs)
        # Since the outputs are continuous, calculate some regression metrics if needed
        # For classification, use appropriate accuracy calculation
        total += outputs.size(0)
        correct += torch.sum(torch.abs(predictions - outputs) < 0.5).item()  # Example metric for regression

accuracy = 100 * correct / total
print(f'Validation Accuracy: {accuracy:.2f}%')


TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>