In [None]:
# Import libraries and set up directories
from pathlib import Path
import json, pickle
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

NOTEBOOK_DIR = Path.cwd()
OUTPUT_DIR = NOTEBOOK_DIR / "outputs"
OUTPUT_DIR.mkdir(exist_ok=True)

print("Notebook directory:", NOTEBOOK_DIR)
print("Local outputs:", OUTPUT_DIR)

Notebook directory: C:\Users\kvand\PycharmProjects\ARC\arc2\cnn_parallel_branch
Local outputs: C:\Users\kvand\PycharmProjects\ARC\arc2\cnn_parallel_branch\outputs


In [None]:
# Load training and evaluation data from JSON files
DATA_DIR = NOTEBOOK_DIR.parents[1] / "data" / "arc2"
TRAIN_DIR = DATA_DIR / "training"
EVAL_DIR = DATA_DIR / "evaluation"

def load_directory(path):
    tasks = {}
    for file in path.glob("*.json"):
        with open(file, "r") as f:
            tasks[file.stem] = json.load(f)
    return tasks

training_raw = load_directory(TRAIN_DIR)
evaluation_raw = load_directory(EVAL_DIR)

In [None]:
# Convert grid data to numpy arrays
def as_np(grid):
    return np.array(grid, dtype=int)

training = {
    tid: {
        "train_inputs":  [as_np(p["input"]) for p in t["train"]],
        "train_outputs": [as_np(p["output"]) for p in t["train"]],
        "test_inputs":   [as_np(p["input"]) for p in t["test"]],
        "test_outputs":  [as_np(p["output"]) for p in t["test"]],
    }
    for tid, t in training_raw.items()
}



In [None]:
# Pad grids to 30x30 size
def pad_grid(grid, size=30):
    arr = np.array(grid, dtype=int)
    H, W = arr.shape
    out = -1 * np.ones((size, size), dtype=int)
    out[:H, :W] = arr[:size, :size]
    return out

In [None]:
# Augment data by permuting colors
def permute_colors(grid):
    colors = np.arange(10)
    perm = np.random.permutation(colors)
    out = grid.copy()
    for old, new in zip(colors, perm):
        out[grid == old] = new
    return out

def augment_grid_control(grid):
    return permute_colors(grid)

In [None]:
# Prepare input-output pairs for training
X_list = []
y_list = []

for tid, task in training.items():
    for inp, out in zip(task["train_inputs"], task["train_outputs"]):

        padded = pad_grid(inp)

        padded = augment_grid_control(padded)

        X_list.append(padded)
        y_list.append(int(out.sum()) % 10)

X_img = np.stack(X_list)[..., None]
y = np.array(y_list)

In [None]:
# Split data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    X_img, y, test_size=0.15, random_state=0, stratify=y
)

In [None]:
# Define CNN model architecture
from tensorflow.keras import layers, models, Input

def conv_block_no_reg(x, filters):
    x = layers.Conv2D(filters, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(filters, 3, padding="same", activation="relu")(x)
    return x

inputs = Input(shape=(30, 30, 1))

c1 = conv_block_no_reg(inputs, 32)
p1 = layers.MaxPooling2D(2)(c1)

c2 = conv_block_no_reg(p1, 64)
p2 = layers.MaxPooling2D(2)(c2)

b = conv_block_no_reg(p2, 128)

u2 = layers.UpSampling2D(2)(b)
c2c = layers.CenterCrop(14, 14)(c2)
u2 = layers.concatenate([u2, c2c])
c3 = conv_block_no_reg(u2, 64)

u1 = layers.UpSampling2D(2)(c3)
c1c = layers.CenterCrop(28, 28)(c1)
u1 = layers.concatenate([u1, c1c])
c4 = conv_block_no_reg(u1, 32)

z = layers.GlobalAveragePooling2D()(c4)
z = layers.Dropout(0.3)(z)  # lighter dropout only
outputs = layers.Dense(10, activation="softmax")(z)

cnn_control = models.Model(inputs, outputs)

cnn_control.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)

cnn_control.summary()

In [None]:
# Train the model with callbacks
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=8,
        restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.5,
        patience=4,
        min_lr=1e-5
    )
]

history = cnn_control.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=80,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/80
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - accuracy: 0.1311 - loss: 2.2838 - val_accuracy: 0.1464 - val_loss: 2.2623 - learning_rate: 0.0010
Epoch 2/80
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step - accuracy: 0.1376 - loss: 2.2690 - val_accuracy: 0.1526 - val_loss: 2.2562 - learning_rate: 0.0010
Epoch 3/80
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.1398 - loss: 2.2629 - val_accuracy: 0.1526 - val_loss: 2.2586 - learning_rate: 0.0010
Epoch 4/80
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - accuracy: 0.1449 - loss: 2.2598 - val_accuracy: 0.1464 - val_loss: 2.2548 - learning_rate: 0.0010
Epoch 5/80
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.1321 - loss: 2.2573 - val_accuracy: 0.1546 - val_loss: 2.2551 - learning_rate: 0.0010
Epoch 6/80
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 

In [None]:
# Evaluate model on validation set
val_loss, val_acc = cnn_control.evaluate(X_val, y_val, verbose=0)
print(f"Validation accuracy (control): {val_acc:.4f}")

Validation accuracy (control): 0.1546


In [None]:
# Save trained model and training history
cnn_control.save(OUTPUT_DIR / "cnn_control_model.keras")

with open(OUTPUT_DIR / "cnn_control_history.pkl", "wb") as f:
    pickle.dump(history.history, f)