In [213]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.applications import VGG16
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam

In [214]:
# Paths to data
data_dir = "C:\\Users\\Apoorva H J\\Deep Learning\\hackathon"
train_dir = "C:\\Users\\Apoorva H J\\Deep Learning\\hackathon\\dataset\\train"
test_dir = "C:\\Users\\Apoorva H J\\Deep Learning\\hackathon\\dataset\\test_1"

In [215]:
train_dir

'C:\\Users\\Apoorva H J\\Deep Learning\\hackathon\\dataset\\train'

In [216]:
# Load CSV files
train_labels = pd.read_csv('train.csv')
test_images = pd.read_csv('test.csv')

In [217]:
train_labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1709 entries, 0 to 1708
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   file_id  1709 non-null   int64
 1   label    1709 non-null   int64
dtypes: int64(2)
memory usage: 26.8 KB


In [218]:
test_images.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 332 entries, 0 to 331
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   file_id  332 non-null    int64
dtypes: int64(1)
memory usage: 2.7 KB


In [219]:
image_size = (128, 128)
batch_size = 32

In [220]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

In [221]:
train_labels['file_path'] = train_labels.apply(
    lambda row: os.path.join(train_dir, "training_real" if row['label'] == 1 else "training_fake", f"{row['file_id']}.jpg"), axis=1
) 

In [222]:
train_labels['file_id'] = train_labels['file_id'].astype(str) + '.jpg'
train_labels['label'] = train_labels['label'].astype(str)

In [223]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_labels,
    x_col='file_path',
    y_col='label',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary')

Found 1709 validated image filenames belonging to 2 classes.


In [232]:
test_images['file_id'] = test_images['file_id'].astype(str) + '.jpg'

In [184]:
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_images,
    directory=test_dir,  # Directory containing the test images
    x_col='file_id',  # Filenames column in CSV
    target_size=image_size,
    batch_size=batch_size,
    class_mode=None,  # No labels for test data
    shuffle=False     # Do not shuffle for prediction
)

Found 332 validated image filenames.


In [228]:
from keras import layers, models
from tensorflow.keras import layers

model_1 = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.Dropout(0.5),      # Adds dropout layer
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Dropout(0.5),      # Adds dropout layer
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.Dropout(0.5),      # Adds dropout layer
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.Dropout(0.5),      # Adds dropout layer
    layers.MaxPooling2D((2, 2)),
    layers.BatchNormalization(),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
model_1.summary()

In [230]:
model_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [226]:
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [231]:
history = model_1.fit(
    train_generator,
    epochs=20,
    validation_data=train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_steps=train_generator.samples // batch_size,
    callbacks=[early_stopping]
)

Epoch 1/20
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 1s/step - accuracy: 0.5080 - loss: 1.3692 - val_accuracy: 0.4469 - val_loss: 0.7115
Epoch 2/20
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3438 - loss: 1.4441 - val_accuracy: 0.4615 - val_loss: 0.6931
Epoch 3/20
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 1s/step - accuracy: 0.5313 - loss: 0.9790 - val_accuracy: 0.4528 - val_loss: 0.7064
Epoch 4/20
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3750 - loss: 1.0473 - val_accuracy: 0.4615 - val_loss: 0.7466
Epoch 5/20
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 1s/step - accuracy: 0.5262 - loss: 0.8718 - val_accuracy: 0.4463 - val_loss: 0.7197
Epoch 6/20
[1m53/53[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5625 - loss: 0.7883 - val_accuracy: 0.4615 - val_loss: 0.6932
Epoch 7/20
[1m53/53[0m [32m━━━━━━━━━━

In [234]:
predictions = model_1.predict(test_generator)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 342ms/step


In [115]:
if test_generator.samples > 0:
    predictions = model.predict(test_generator)

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 458ms/step


In [236]:
predictions = (predictions > 0.5).astype(int)
output = pd.DataFrame({'file_id': test_images['file_id'], 'label': predictions.flatten()})

In [237]:
output['file_id'] = output['file_id'].str.replace('.jpg', '', regex=False)

In [238]:
 output.to_csv('submission_2.csv', index=False)