In [1]:
# 2‑Model Ensemble for Digit Recognizer (Keras)
# ---------------------------------------------
import numpy as np, pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks
from sklearn.model_selection import train_test_split

SEED1, SEED2 = 42, 99
np.random.seed(SEED1); tf.random.set_seed(SEED1)

ModuleNotFoundError: No module named 'tensorflow'

In [7]:
# !pip uninstall -y tensorflow keras protobuf
# !pip install "tensorflow-macos==2.15.*" "keras==2.15.*" "protobuf==3.20.*" "numpy<2"
# # Optional (GPU via Metal):
# !pip install "tensorflow-metal==1.1.*"


[0mCollecting tensorflow-macos==2.15.*
  Downloading tensorflow_macos-2.15.1-cp39-cp39-macosx_12_0_arm64.whl.metadata (3.4 kB)
Collecting keras==2.15.*
  Using cached keras-2.15.0-py3-none-any.whl.metadata (2.4 kB)
Collecting protobuf==3.20.*
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Downloading tensorflow_macos-2.15.1-cp39-cp39-macosx_12_0_arm64.whl (2.2 kB)
Using cached keras-2.15.0-py3-none-any.whl (1.7 MB)
Downloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h[33mDEPRECATION: imbalanced-ensemble 0.1.7 has a non-standard dependency specifier pandas>=1.1.3joblib>=0.11. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of imbalanced-ensemble or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can b

In [3]:
import sys, tensorflow as tf, keras, numpy as np
print("python :", sys.version)
print("tf     :", tf.__version__)
print("keras  :", keras.__version__)
print("numpy  :", np.__version__)
print("GPUs   :", tf.config.list_physical_devices("GPU"))


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# 2‑Model Ensemble for Digit Recognizer (Keras)
# ---------------------------------------------
import numpy as np, pandas as pd, tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks
from sklearn.model_selection import train_test_split

SEED1, SEED2 = 42, 99
np.random.seed(SEED1); tf.random.set_seed(SEED1)

# --- Load CSVs ---
train = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test  = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")
y = train["label"].values
X = train.drop(columns=["label"]).values.astype("float32") / 255.0
X_test = test.values.astype("float32") / 255.0
X = X.reshape(-1, 28,28,1)
X_test = X_test.reshape(-1, 28,28,1)

# Train/val split (same for both models for fair comparison)
X_tr, X_val, y_tr_int, y_val_int = train_test_split(
    X, y, test_size=0.1, random_state=SEED1, stratify=y
)
num_classes = 10
y_tr = keras.utils.to_categorical(y_tr_int, num_classes)
y_val = keras.utils.to_categorical(y_val_int, num_classes)

# --- Light augmentation ---
aug = keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.1
)
aug_tr = aug.flow(X_tr, y_tr, batch_size=128, shuffle=True)

def build_model(variant=1, seed=42):
    keras.utils.set_random_seed(seed)
    inputs = keras.Input(shape=(28,28,1))
    x = inputs
    # Shared stem
    x = layers.Conv2D(32, 3, padding="same", activation="relu")(x)
    x = layers.Conv2D(32, 3, padding="same", activation="relu")(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)
    # Variant branch for diversity
    if variant == 1:
        x = layers.Conv2D(64, 3, padding="same", activation="relu")(x)
        x = layers.Conv2D(64, 3, padding="same", activation="relu")(x)
    else:  # slightly different kernel sizes/filters
        x = layers.Conv2D(48, 5, padding="same", activation="relu")(x)
        x = layers.Conv2D(64, 3, padding="same", activation="relu")(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Dropout(0.25)(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128 if variant==1 else 160, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    model = keras.Model(inputs, outputs)
    model.compile(optimizer=keras.optimizers.Adam(1e-3),
                  loss="categorical_crossentropy", metrics=["accuracy"])
    return model

cbs = [
    callbacks.ReduceLROnPlateau(monitor="val_accuracy", factor=0.5, patience=2,
                                min_lr=1e-5, verbose=1),
    callbacks.EarlyStopping(monitor="val_accuracy", patience=5, mode="max",
                            restore_best_weights=True, verbose=1),
]

steps_per_epoch = int(np.ceil(len(X_tr) / 128))

# --- Train Model A ---
model_a = build_model(variant=1, seed=SEED1)
hist_a = model_a.fit(aug_tr, validation_data=(X_val, y_val),
                     epochs=30, steps_per_epoch=steps_per_epoch,
                     callbacks=cbs, verbose=2)

# --- Train Model B ---
model_b = build_model(variant=2, seed=SEED2)
# re-init generator (optional)
aug_tr_b = aug.flow(X_tr, y_tr, batch_size=128, shuffle=True)
hist_b = model_b.fit(aug_tr_b, validation_data=(X_val, y_val),
                     epochs=30, steps_per_epoch=steps_per_epoch,
                     callbacks=cbs, verbose=2)

# --- Ensemble: average softmax probabilities ---
probs_a = model_a.predict(X_test, batch_size=256, verbose=0)
probs_b = model_b.predict(X_test, batch_size=256, verbose=0)
probs_ens = (probs_a + probs_b) / 2.0
labels = probs_ens.argmax(axis=1)

# Optional: check val performance of ensemble (on X_val)
val_pa = model_a.predict(X_val, verbose=0)
val_pb = model_b.predict(X_val, verbose=0)
val_pens = (val_pa + val_pb) / 2.0
val_acc_ens = (val_pens.argmax(1) == y_val_int).mean()
print(f"Ensemble validation accuracy: {val_acc_ens:.4f}")

# --- Submission ---
sub = pd.DataFrame({"ImageId": np.arange(1, len(labels)+1), "Label": labels})
sub.to_csv("submission.csv", index=False)
print("Saved submission.csv")
