In [46]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
data_dir = '/content/drive/My Drive/IIITB_Data_SourceCode_Result'
fake_img_dir = data_dir + '/fake_cifake_images'
real_img_dir = data_dir + '/real_cifake_images'
fake_json_path = data_dir + '/fake_cifake_preds.json'
real_json_path = data_dir + '/real_cifake_preds.json'

In [3]:
import pandas as pd
import numpy as np

In [4]:
try:
  fake_json = pd.read_json(fake_json_path)
  real_json = pd.read_json(real_json_path)
except:
  print('Error: Not found')

In [5]:
display(pd.DataFrame(fake_json))
display(pd.DataFrame(real_json))

Unnamed: 0,index,prediction
0,1,fake
1,2,fake
2,3,fake
3,4,fake
4,5,fake
...,...,...
995,996,fake
996,997,fake
997,998,fake
998,999,fake


Unnamed: 0,index,prediction
0,1,real
1,2,fake
2,3,real
3,4,real
4,5,real
...,...,...
995,996,real
996,997,real
997,998,real
998,999,real


In [6]:
real_json['index']+=1000

In [7]:
full_json = pd.concat([fake_json, real_json], ignore_index=True)

In [8]:
full_json.shape

(2000, 2)

In [9]:
print(f'Fake Directory:\n{fake_json['prediction'].value_counts()}, \n\nReal Directory\n{real_json['prediction'].value_counts()}, \n\nTotal\n{full_json['prediction'].value_counts()}')

Fake Directory:
prediction
fake    988
real     12
Name: count, dtype: int64, 

Real Directory
prediction
real    976
fake     24
Name: count, dtype: int64, 

Total
prediction
fake    1012
real     988
Name: count, dtype: int64


In [10]:
full_json

Unnamed: 0,index,prediction
0,1,fake
1,2,fake
2,3,fake
3,4,fake
4,5,fake
...,...,...
1995,1996,real
1996,1997,real
1997,1998,real
1998,1999,real


In [11]:
def get_image_path(row):
  if row.name < len(fake_json):
    return f'{fake_img_dir}/{row["index"]}.png'
  else:
    return f'{real_img_dir}/{row["index"]%1001 + 1}.png'

full_json['image_path'] = full_json.apply(get_image_path, axis=1)

display(full_json.head())
display(full_json.tail())

Unnamed: 0,index,prediction,image_path
0,1,fake,/content/drive/My Drive/DATASET_IIITB/fake_cif...
1,2,fake,/content/drive/My Drive/DATASET_IIITB/fake_cif...
2,3,fake,/content/drive/My Drive/DATASET_IIITB/fake_cif...
3,4,fake,/content/drive/My Drive/DATASET_IIITB/fake_cif...
4,5,fake,/content/drive/My Drive/DATASET_IIITB/fake_cif...


Unnamed: 0,index,prediction,image_path
1995,1996,real,/content/drive/My Drive/DATASET_IIITB/real_cif...
1996,1997,real,/content/drive/My Drive/DATASET_IIITB/real_cif...
1997,1998,real,/content/drive/My Drive/DATASET_IIITB/real_cif...
1998,1999,real,/content/drive/My Drive/DATASET_IIITB/real_cif...
1999,2000,real,/content/drive/My Drive/DATASET_IIITB/real_cif...


In [12]:
full_json['image_path'][1000]

'/content/drive/My Drive/DATASET_IIITB/real_cifake_images/1.png'

In [13]:
full_json['prediction'] = full_json['prediction']=="real"

In [14]:
full_json.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   index       2000 non-null   int64 
 1   prediction  2000 non-null   bool  
 2   image_path  2000 non-null   object
dtypes: bool(1), int64(1), object(1)
memory usage: 33.3+ KB


In [15]:
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_val, y_train, y_val = train_test_split(full_json['image_path'], full_json['prediction'], test_size=0.2, random_state=42)

In [17]:
y_train.value_counts(), y_val.value_counts()

(prediction
 False    811
 True     789
 Name: count, dtype: int64,
 prediction
 False    201
 True     199
 Name: count, dtype: int64)

In [18]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [19]:
IMG_SIZE = 32
CHANNELS = 3
NUM_CLASSES = 2
TOTAL_IMAGES = 2000
BATCH_SIZE = 32
EPOCHS = 50

In [20]:

print(f"Training samples: {X_train.shape[0]}")
print(f"Validation samples: {X_val.shape[0]}")

Training samples: 1600
Validation samples: 400


In [24]:
import os

In [25]:
train_df = pd.DataFrame({'image_path': X_train, 'prediction': y_train})
val_df = pd.DataFrame({'image_path': X_val, 'prediction': y_val})

train_df['prediction'] = train_df['prediction'].astype(str)
val_df['prediction'] = val_df['prediction'].astype(str)

print("Checking if sample paths exist:")
sample_image_paths_check = train_df['image_path'].sample(5).tolist()
for img_path in sample_image_paths_check:
    if os.path.exists(img_path):
        print(f"Path exists: {img_path}")
    else:
        print(f"Path does NOT exist: {img_path}")

print("\nSample fake_json index values:")
print(fake_json['index'].sample(5).tolist())

print(f"\nFiles in fake image directory ({fake_img_dir}):")
try:
    fake_files = os.listdir(fake_img_dir)
    print(fake_files[:10])
except FileNotFoundError:
    print("Fake image directory not found.")

Checking if sample paths exist:
Path exists: /content/drive/My Drive/DATASET_IIITB/real_cifake_images/46.png
Path exists: /content/drive/My Drive/DATASET_IIITB/real_cifake_images/17.png
Path exists: /content/drive/My Drive/DATASET_IIITB/fake_cifake_images/752.png
Path exists: /content/drive/My Drive/DATASET_IIITB/real_cifake_images/187.png
Path exists: /content/drive/My Drive/DATASET_IIITB/fake_cifake_images/220.png

Sample fake_json index values:
[960, 979, 124, 951, 575]

Files in fake image directory (/content/drive/My Drive/DATASET_IIITB/fake_cifake_images):
['90.png', '59.png', '35.png', '88.png', '81.png', '51.png', '24.png', '6.png', '39.png', '91.png']


In [27]:
from tensorflow.keras.preprocessing.image import load_img
import os

sample_image_paths = train_df['image_path'].sample(5).tolist()

print("Attempting to load sample images:")
for img_path in sample_image_paths:
    try:
        img = load_img(img_path)
        print(f"Successfully loaded: {img_path}")
    except Exception as e:
        print(f"Error loading {img_path}: {e}")

Attempting to load sample images:
Successfully loaded: /content/drive/My Drive/DATASET_IIITB/fake_cifake_images/953.png
Successfully loaded: /content/drive/My Drive/DATASET_IIITB/real_cifake_images/582.png
Successfully loaded: /content/drive/My Drive/DATASET_IIITB/fake_cifake_images/951.png
Successfully loaded: /content/drive/My Drive/DATASET_IIITB/fake_cifake_images/819.png
Successfully loaded: /content/drive/My Drive/DATASET_IIITB/fake_cifake_images/492.png


In [28]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.model_selection import train_test_split
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [29]:
df = full_json.drop('index', axis=1)

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['prediction'], random_state=42)

img_size = (224, 224)
batch_size = 50

train_df['prediction'] = train_df['prediction'].astype(str)
val_df['prediction'] = val_df['prediction'].astype(str)


train_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input)
val_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input)


train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='prediction',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    seed=42
)

validation_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='image_path',
    y_col='prediction',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    seed=42
)


base_model = ResNet50(
    include_top=False,
    weights='imagenet',
    input_shape=(224, 224, 3)
)
base_model.trainable = False

model_resnet = Sequential([
    Input(shape=(224, 224, 3)),
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.4),
    Dense(1, activation='sigmoid')
])

model_resnet.compile(
    optimizer=Adam(1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, verbose=1)
]

callbacks_finetune = [
    EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-7, verbose=1),
    ModelCheckpoint('best_resnet50_model.keras', monitor='val_loss', save_best_only=True, verbose=1)
]

print("Starting initial training (feature extraction)...")
history = model_resnet.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=20,
    callbacks=callbacks
)

print("\nStarting fine-tuning...")
base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False

model_resnet.compile(optimizer=Adam(1e-5),
              loss='binary_crossentropy',
              metrics=['accuracy'])

fine_history = model_resnet.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10,
    callbacks=callbacks_finetune
)

Found 1600 validated image filenames belonging to 2 classes.
Found 400 validated image filenames belonging to 2 classes.
Starting initial training (feature extraction)...


  self._warn_if_super_not_called()


Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 6s/step - accuracy: 0.5095 - loss: 0.9831 - val_accuracy: 0.5775 - val_loss: 0.7135 - learning_rate: 1.0000e-04
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 270ms/step - accuracy: 0.5413 - loss: 0.8582 - val_accuracy: 0.6375 - val_loss: 0.6498 - learning_rate: 1.0000e-04
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 340ms/step - accuracy: 0.5810 - loss: 0.7611 - val_accuracy: 0.6750 - val_loss: 0.6002 - learning_rate: 1.0000e-04
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 305ms/step - accuracy: 0.5937 - loss: 0.7415 - val_accuracy: 0.7075 - val_loss: 0.5619 - learning_rate: 1.0000e-04
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 284ms/step - accuracy: 0.6283 - loss: 0.6841 - val_accuracy: 0.7400 - val_loss: 0.5312 - learning_rate: 1.0000e-04
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━

In [30]:
test_dir = data_dir + '/test'

In [31]:
df_test = pd.DataFrame(columns=['image_path'])

In [32]:
N = 500
df_test = pd.DataFrame(columns=['image_path'])
for i in range(N):
  df_test.loc[i, 'image_path'] = test_dir + f'/{i+1}.png'

In [33]:
df_test['image_path'][199]

'/content/drive/My Drive/DATASET_IIITB/test/200.png'

In [35]:
test_datagen_resnet = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.resnet50.preprocess_input
)

test_generator_resnet = test_datagen_resnet.flow_from_dataframe(
    dataframe=df_test,
    x_col='image_path',
    y_col=None,
    target_size=img_size,
    batch_size=batch_size,
    class_mode=None,
    seed = 42
)

print("\nMaking predictions with ResNet50 model...")
predictions_resnet = model_resnet.predict(test_generator_resnet)

Found 500 validated image filenames.

Making predictions with ResNet50 model...


  self._warn_if_super_not_called()


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 23s/step


In [39]:
y_test_resnet = predictions_resnet > 0.5


print("\nResNet50 Predicted Labels (True for > 0.5):")
print(y_test_resnet[:10].flatten())


ResNet50 Predicted Labels (True for > 0.5):
[ True  True  True  True False  True  True  True  True  True]


In [48]:
y_test_resnet_df = pd.DataFrame({
    'index': range(1, len(y_test_resnet) + 1),
    'prediction': y_test_resnet.flatten()
})

y_test_resnet_df['prediction'] = y_test_resnet_df['prediction'].apply(lambda x: 'real' if x else 'fake')

y_test_resnet_df.to_json(data_dir + '/Colabs (source code & Result)/jerry_prediction.json', orient='records', indent=4)

print("Predictions saved to jerry_prediction.json in your Google Drive.")

Predictions saved to jerry_prediction.json in your Google Drive.
