In [8]:
#!pip install tensorflow


In [14]:
import os
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetB0

In [31]:
BASE_TRAIN_DIR = 'dataset' 
BASE_TEST_DIR = 'test'
TRAIN_CSV_PATH = 'train.csv'
TEST_CSV_PATH = 'test.csv'
SUBMISSION_FILE = 'submission.csv'

In [32]:
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 10
FINE_TUNE_EPOCHS = 5

In [33]:
df = pd.read_csv(TRAIN_CSV_PATH)

def map_path(row):
    label_folder = 'training_fake' if row['label'] == 0 else 'training_real'
    return os.path.join(label_folder, f"{row['file_id']}.jpg")

df['file_path'] = df.apply(map_path, axis=1)
df['label_str'] = df['label'].astype(str)

In [34]:
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)

In [35]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1
)
val_datagen = ImageDataGenerator(rescale=1./255)

In [36]:
train_gen = train_datagen.flow_from_dataframe(
    train_df,
    directory=BASE_TRAIN_DIR,
    x_col='file_path',
    y_col='label_str',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)
val_gen = val_datagen.flow_from_dataframe(
    val_df,
    directory=BASE_TRAIN_DIR,
    x_col='file_path',
    y_col='label_str',
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

Found 1367 validated image filenames belonging to 2 classes.
Found 342 validated image filenames belonging to 2 classes.


In [37]:
cnn_model = models.Sequential([
    layers.Input(shape=(*IMG_SIZE, 3)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.GlobalAveragePooling2D(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
cnn_model.fit(train_gen, validation_data=val_gen, epochs=EPOCHS)


  self._warn_if_super_not_called()


Epoch 1/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 1s/step - accuracy: 0.5696 - loss: 0.6882 - val_accuracy: 0.5556 - val_loss: 0.6896
Epoch 2/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 884ms/step - accuracy: 0.5551 - loss: 0.6901 - val_accuracy: 0.5556 - val_loss: 0.6881
Epoch 3/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 883ms/step - accuracy: 0.5652 - loss: 0.6883 - val_accuracy: 0.5556 - val_loss: 0.6886
Epoch 4/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 881ms/step - accuracy: 0.5384 - loss: 0.6928 - val_accuracy: 0.5556 - val_loss: 0.6879
Epoch 5/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 898ms/step - accuracy: 0.5547 - loss: 0.6889 - val_accuracy: 0.5556 - val_loss: 0.6876
Epoch 6/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 895ms/step - accuracy: 0.5519 - loss: 0.6909 - val_accuracy: 0.5556 - val_loss: 0.6873
Epoch 7/10
[1m43/43[0m 

<keras.src.callbacks.history.History at 0x2ad98cbc8f0>

In [38]:
from tensorflow.keras.applications import EfficientNetB0

In [39]:

base_model = EfficientNetB0(include_top=False, input_shape=(224, 224, 3), weights='imagenet')
base_model.trainable = False  # Start frozen

model = tf.keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(train_gen, epochs=10, validation_data=val_gen)


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step
Epoch 1/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 2s/step - accuracy: 0.5330 - loss: 0.6954 - val_accuracy: 0.5556 - val_loss: 0.6886
Epoch 2/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 1s/step - accuracy: 0.5315 - loss: 0.6932 - val_accuracy: 0.5556 - val_loss: 0.6877
Epoch 3/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 1s/step - accuracy: 0.5718 - loss: 0.6871 - val_accuracy: 0.5556 - val_loss: 0.6875
Epoch 4/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 1s/step - accuracy: 0.5068 - loss: 0.7003 - val_accuracy: 0.5556 - val_loss: 0.6870
Epoch 5/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 1s/step - accuracy: 0.5331 - loss: 0.6953 - val_accuracy: 0.5556 - val_loss: 0.6871
Epoch 6/10
[1m43/4

<keras.src.callbacks.history.History at 0x2ad9a99f800>

In [40]:
# Load test file list
test_df = pd.read_csv(TEST_CSV_PATH)
test_df['file_path'] = test_df['file_id'].astype(str) + '.jpg'

# Data generator
test_datagen = ImageDataGenerator(rescale=1./255)
test_flow = test_datagen.flow_from_dataframe(
    test_df,
    directory=BASE_TEST_DIR,
    x_col='file_path',
    y_col=None,
    target_size=IMG_SIZE,
    class_mode=None,
    shuffle=False,
    batch_size=32
)

# Predict
preds = model.predict(test_flow)
test_df['label'] = (preds > 0.5).astype(int)

# Save CSV
test_df[['file_id', 'label']].to_csv(SUBMISSION_FILE, index=False)
print("✅ submission.csv created!")

Found 332 validated image filenames.


  self._warn_if_super_not_called()


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step
✅ submission.csv created!
