In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, GlobalAveragePooling2D, Dropout, Dense, BatchNormalization
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import CosineDecay
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

y_train = train['label']
X_train = train.drop('label', axis=1)

X_train = X_train / 255.0
test = test / 255.0

X_train = X_train.values.reshape(-1, 28, 28, 1)
test = test.values.reshape(-1, 28, 28, 1)

y_train = to_categorical(y_train, num_classes=10)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

In [2]:
model = Sequential()

model.add(Conv2D(64, (3,3), padding='same', activation='relu', kernel_regularizer=l2(1e-4), input_shape=(28,28,1)))  # <--- Mais filtros
model.add(BatchNormalization())
model.add(Conv2D(64, (3,3), padding='same', activation='relu', kernel_regularizer=l2(1e-4)))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.3))  # <--- Dropout aumentado

model.add(Conv2D(128, (3,3), padding='same', activation='relu', kernel_regularizer=l2(1e-4)))  # <--- Nova camada
model.add(BatchNormalization())
model.add(Conv2D(128, (3,3), padding='same', activation='relu', kernel_regularizer=l2(1e-4)))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.3))

model.add(Conv2D(256, (3,3), padding='same', activation='relu', kernel_regularizer=l2(1e-4)))  # <--- Nova camada
model.add(BatchNormalization())
model.add(GlobalAveragePooling2D())  # <--- Substitui Flatten()

model.add(Dense(512, activation='relu', kernel_regularizer=l2(1e-4)))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

initial_learning_rate = 0.01
decay_steps = 1000
lr_schedule = CosineDecay(initial_learning_rate, decay_steps)  

model.compile(
    optimizer=SGD(learning_rate=lr_schedule, momentum=0.9),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [3]:
datagen = ImageDataGenerator(
    rotation_range=20,         
    zoom_range=0.2,             
    width_shift_range=0.2,      
    height_shift_range=0.2,     
    shear_range=0.2,            
    fill_mode='nearest',
    preprocessing_function=lambda x: x + np.random.normal(0, 0.05, x.shape)  # <--- Novo: ruído gaussiano
)
datagen.fit(X_train)

In [4]:
checkpoint = ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True, mode='max')
early_stop = EarlyStopping(monitor='val_accuracy', patience=15, mode='max', restore_best_weights=True)  # <--- Paciência aumentada
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=5, min_lr=1e-6)

callbacks = [checkpoint, early_stop, reduce_lr]

batch_size = 128  
epochs = 100      

history = model.fit(
    datagen.flow(X_train, y_train, batch_size=batch_size, shuffle=True),
    steps_per_epoch=len(X_train) // batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val),
    callbacks=callbacks
)

Epoch 1/100


  self._warn_if_super_not_called()


[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m268s[0m 895ms/step - accuracy: 0.6782 - loss: 1.1609 - val_accuracy: 0.1121 - val_loss: 9.1253 - learning_rate: 0.0080
Epoch 2/100
[1m  1/295[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4:04[0m 833ms/step - accuracy: 0.9375 - loss: 0.4156

  self.gen.throw(typ, value, traceback)


[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 23ms/step - accuracy: 0.9375 - loss: 0.4156 - val_accuracy: 0.1121 - val_loss: 9.0591 - learning_rate: 0.0080
Epoch 3/100
[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 890ms/step - accuracy: 0.9474 - loss: 0.2640 - val_accuracy: 0.9555 - val_loss: 0.2283 - learning_rate: 0.0036
Epoch 4/100
[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 25ms/step - accuracy: 0.9688 - loss: 0.1858 - val_accuracy: 0.9571 - val_loss: 0.2219 - learning_rate: 0.0035
Epoch 5/100
[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 890ms/step - accuracy: 0.9632 - loss: 0.2080 - val_accuracy: 0.9893 - val_loss: 0.1228 - learning_rate: 3.0094e-04
Epoch 6/100
[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 35ms/step - accuracy: 0.9766 - loss: 0.2172 - val_accuracy: 0.9893 - val_loss: 0.1227 - learning_rate: 2.9030e-04
Epoch 7/100
[1m295/295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [5]:
from tensorflow.keras.models import load_model
model = load_model('best_model.keras') 

In [6]:
tta_steps = 15
predictions = []

datagen_test = ImageDataGenerator(
    rotation_range=15,  
    width_shift_range=0.15,
    height_shift_range=0.15,
    zoom_range=0.15,
    shear_range=0.15,
    fill_mode='nearest'
)

for _ in range(tta_steps):
    for batch in datagen_test.flow(test, batch_size=len(test), shuffle=False):
        pred = model.predict(batch, verbose=0)
        predictions.append(pred)
        break  

final_pred = np.mean(predictions, axis=0)
predicted_labels = np.argmax(final_pred, axis=1)

In [7]:
submission = pd.DataFrame({"ImageId": np.arange(1, len(predicted_labels) + 1),
                           "Label": predicted_labels})
submission.to_csv("submission.csv", index=False)