In [15]:
import os
import json
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchvision.transforms import RandomRotation, RandomHorizontalFlip, RandomVerticalFlip

def load_arc_data(directory):
    tasks = []
    full_path = os.path.join('..', 'data', directory)
    print(f"Loading data from: {full_path}")
    if not os.path.exists(full_path):
        print(f"Directory does not exist: {full_path}")
        return tasks
    for filename in os.listdir(full_path):
        if filename.endswith('.json'):
            file_path = os.path.join(full_path, filename)
            with open(file_path, 'r') as f:
                tasks.append(json.load(f))
    print(f"Loaded {len(tasks)} tasks from {directory}")
    return tasks

def prepare_data(tasks):
    inputs = []
    outputs = []
    for task in tasks:
        for train in task['train']:
            inputs.append(np.array(train['input']))
            outputs.append(np.array(train['output']))
    print(f"Prepared {len(inputs)} input-output pairs")
    return inputs, outputs

# Load and prepare data
train_tasks = load_arc_data('training')
test_tasks = load_arc_data('evaluation')

train_inputs, train_outputs = prepare_data(train_tasks)
test_inputs, test_outputs = prepare_data(test_tasks)

# Convert to tensors
train_inputs = [torch.tensor(arr, dtype=torch.float32) for arr in train_inputs]
train_outputs = [torch.tensor(arr, dtype=torch.float32) for arr in train_outputs]
test_inputs = [torch.tensor(arr, dtype=torch.float32) for arr in test_inputs]
test_outputs = [torch.tensor(arr, dtype=torch.float32) for arr in test_outputs]

print(f"Number of training samples: {len(train_inputs)}")
print(f"Number of testing samples: {len(test_inputs)}")

class ARCDataset(Dataset):
    def __init__(self, inputs, outputs, transform=None):
        self.inputs = inputs
        self.outputs = outputs
        self.transform = transform

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_image = self.inputs[idx]
        output_image = self.outputs[idx]
        
        if self.transform:
            input_image = self.transform(input_image)
            output_image = self.transform(output_image)
        
        return input_image, output_image

# Define transforms
transform = transforms.Compose([
    transforms.Lambda(lambda x: x.unsqueeze(0) if x.ndim == 2 else x),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1) if x.size(0) == 1 else x),
    transforms.Resize((32, 32)),
    RandomRotation(degrees=90),
    RandomHorizontalFlip(),
    RandomVerticalFlip(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Create datasets
train_dataset = ARCDataset(train_inputs, train_outputs, transform=transform)
test_dataset = ARCDataset(test_inputs, test_outputs, transform=transform)

# Print dataset sizes
print(f"Training dataset size: {len(train_dataset)}")
print(f"Testing dataset size: {len(test_dataset)}")

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Print number of batches
print(f"Number of training batches: {len(train_loader)}")
print(f"Number of testing batches: {len(test_loader)}")

# Define the model
class ARCModel(nn.Module):
    def __init__(self):
        super(ARCModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv4 = nn.Conv2d(256, 128, 3, padding=1)
        self.conv5 = nn.Conv2d(128, 3, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = torch.relu(self.conv3(x))
        x = self.upsample(torch.relu(self.conv4(x)))
        x = self.upsample(torch.relu(self.conv5(x)))
        return x

# Initialize the model, loss function, and optimizer
model = ARCModel()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

# Training loop
num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

best_loss = float('inf')
patience = 10
counter = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss}")
    
    scheduler.step(avg_loss)
    
    if avg_loss < best_loss:
        best_loss = avg_loss
        counter = 0
        torch.save(model.state_dict(), "best_arc_model.pth")
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

print("Training complete!")

# Evaluation
model.eval()
test_loss = 0.0
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()

print(f"Test Loss: {test_loss/len(test_loader)}")

print("Model saved. Now go conquer the world with your ARC-solving prowess! 🚀😈")

Loading data from: ../data/training
Loaded 400 tasks from training
Loading data from: ../data/evaluation
Loaded 400 tasks from evaluation
Prepared 1302 input-output pairs
Prepared 1363 input-output pairs
Number of training samples: 1302
Number of testing samples: 1363
Training dataset size: 1302
Testing dataset size: 1363
Number of training batches: 41
Number of testing batches: 43
Epoch 1/100, Loss: 24.37352289804598
Epoch 2/100, Loss: 17.74205887026903
Epoch 3/100, Loss: 17.536770332150343
Epoch 4/100, Loss: 17.999961876287692
Epoch 5/100, Loss: 18.00680444298721
Epoch 6/100, Loss: 17.656003998547064
Epoch 7/100, Loss: 17.511576419923365
Epoch 8/100, Loss: 17.802117673362172
Epoch 9/100, Loss: 17.37524327999208
Epoch 10/100, Loss: 17.622005532427533
Epoch 11/100, Loss: 17.8683358401787
Epoch 12/100, Loss: 17.314264413787097
Epoch 13/100, Loss: 17.786755910733852
Epoch 14/100, Loss: 17.42245895106618
Epoch 15/100, Loss: 17.06242840464522
Epoch 16/100, Loss: 16.85732957793445
Epoch 17/