In [12]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, Input, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import os

In [2]:
# Load data
df_labels = pd.read_csv('data/train_labels.csv')
df_samples = pd.read_csv('data/sample_submission.csv')
df_labels = df_labels[:1000]
df_samples = df_samples[:1000]
train = "data/train/"
test = "data/test/"

In [4]:
# Load data
train_data, val_data = train_test_split(df_labels, test_size=0.2, random_state=42, stratify=df_labels['label'])
train_data = train_data.astype(str)
val_data = val_data.astype(str)
train_data['id'] += '.tif'
val_data['id'] += '.tif'

In [5]:
test_data = df_samples.copy()
test_data = test_data.astype(str)
test_data['id'] += '.tif'

In [6]:
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [7]:
# Define model
def create_cnn_classifier(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(32, kernel_size=3, activation='relu')(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(64, kernel_size=3, activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    outputs = Dense(1, activation='sigmoid')(x)
    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [8]:
# Build Model
input_shape = (96, 96, 3)
cnn_model = create_cnn_classifier(input_shape)
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory=train,
    x_col='id',
    y_col='label',
    target_size=(96, 96),
    batch_size=32,
    class_mode='binary'
)
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    directory=train,
    x_col='id',
    y_col='label',
    target_size=(96, 96),
    batch_size=32,
    class_mode='binary'
)

Found 800 validated image filenames belonging to 2 classes.
Found 200 validated image filenames belonging to 2 classes.


In [9]:
# Fit Model
cnn_model.fit(
    train_generator,
    steps_per_epoch=100,
    epochs=10,
    validation_data=val_generator,
    validation_steps=50
)

Epoch 1/10


<keras.src.callbacks.History at 0x1e3f81bc580>

In [10]:
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_data,
    directory=test,
    x_col='id',
    y_col=None,
    class_mode=None,
    target_size=(96, 96),
    batch_size=32,
    shuffle=False
)

Found 1000 validated image filenames.


In [13]:
# Make predicitons
predictions = cnn_model.predict(test_generator, steps=len(test_generator))
test_ids = [os.path.splitext(os.path.basename(filename))[0] for filename in test_generator.filenames]

# Create csv submission
submission_df = pd.DataFrame({
    'Id': test_ids,
    'Predicted': predictions.flatten()
})
submission_df.to_csv('submission.csv', index=False)

