<a href="https://colab.research.google.com/github/SamikshaSolanke/Deepfake-video-detection-CSI-Codezilla/blob/main/CSI_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# Load the labels
labels = pd.read_csv('/content/drive/MyDrive/frame_labels.csv')

In [None]:
# Convert 'label' column to strings
labels['label'] = labels['label'].astype(str)

# Split the data
train_df, test_df = train_test_split(labels, test_size=0.3, stratify=labels['label'], random_state=42)

In [None]:
# Image data generator
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='frame_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=64,
    class_mode='binary'
    )

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='frame_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=64,
    class_mode='binary'
    )

Found 22134 validated image filenames belonging to 2 classes.
Found 9487 validated image filenames belonging to 2 classes.


In [None]:
# Create a model using MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
  layer.trainable = False



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(train_generator, validation_data=test_generator, epochs=30)

In [None]:
# Evaluate the model
y_true = test_df['label'].values
y_pred = model.predict(test_generator)
y_pred = np.round(y_pred).astype(int).flatten()

print(classification_report(y_true, y_pred))

In [None]:
# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Real', 'Deepfake'], yticklabels=['Real', 'Deepfake'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Save the lightweight model
model.save('/content/drive/MyDrive/deepfake_detector_model.h5')

In [None]:
# Plot training history
plt.plot(history.history['accuracy'], label='train_accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
#again new code... 2nd code
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load the labels
labels = pd.read_csv('/content/drive/MyDrive/frame_labels.csv')

# Create a dataframe with file paths and labels
data = []
for row in labels.itertuples():
    folder = 'Celeb-real' if row.label == 0 else 'Celeb-synthesis'
    folder_path = os.path.join('/content/drive/MyDrive/processed_frames', folder, row.id)
    for frame in os.listdir(folder_path):
        data.append((os.path.join(folder_path, frame), row.label))

data_df = pd.DataFrame(data, columns=['file_path', 'label'])

# Split the data
train_df, test_df = train_test_split(data_df, test_size=0.3, stratify=data_df['label'], random_state=42)

# Image data generator
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='file_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='file_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

# Create a model using MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_generator, validation_data=test_generator, epochs=10)

# Evaluate the model
y_true = test_df['label'].values
y_pred = model.predict(test_generator)
y_pred = np.round(y_pred).astype(int).flatten()

print(classification_report(y_true, y_pred))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Real', 'Deepfake'], yticklabels=['Real', 'Deepfake'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Save the lightweight model
model.save('/content/drive/MyDrive/deepfake_detector_model.h5')

# Plot training history
plt.plot(history.history['accuracy'], label='train_accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
#3rd code
# Load the labels
labels = pd.read_csv('/content/drive/MyDrive/frame_labels.csv')

# Create a dataframe with file paths and labels
data = []
for row in labels.itertuples():
    folder = 'Celeb-real' if row.label == 0 else 'Celeb-synthesis'
    folder_path = os.path.join('/content/drive/MyDrive/processed_frames', folder, row.video_id)
    for frame in os.listdir(folder_path):
        data.append((os.path.join(folder_path, frame), row.label))

data_df = pd.DataFrame(data, columns=['file_path', 'label'])

# Split the data
train_df, test_df = train_test_split(data_df, test_size=0.3, stratify=data_df['label'], random_state=42)

# Image data generator
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='file_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='file_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

# Create a model using MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_generator, validation_data=test_generator, epochs=10)

# Evaluate the model
y_true = test_df['label'].values
y_pred = model.predict(test_generator)
y_pred = np.round(y_pred).astype(int).flatten()

print(classification_report(y_true, y_pred))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Real', 'Deepfake'], yticklabels=['Real', 'Deepfake'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Save the lightweight model
model.save('/content/drive/MyDrive/deepfake_detector_model.h5')

# Plot training history
plt.plot(history.history['accuracy'], label='train_accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
#Final right code
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load the labels
labels = pd.read_csv('/content/drive/MyDrive/frame_labels.csv')

# Convert 'label' column to strings
labels['label'] = labels['label'].astype(str)

# Split the data
train_df, test_df = train_test_split(labels, test_size=0.3, stratify=labels['label'], random_state=42)

# Image data generator
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='frame_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='frame_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

# Create a model using MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_generator, validation_data=test_generator, epochs=10)

# Evaluate the model
y_true = test_df['label'].values
y_pred = model.predict(test_generator)
y_pred = np.round(y_pred).astype(int).flatten()

print(classification_report(y_true, y_pred))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Real', 'Deepfake'], yticklabels=['Real', 'Deepfake'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Save the lightweight model
model.save('/content/drive/MyDrive/deepfake_detector_model.h5')

# Plot training history
plt.plot(history.history['accuracy'], label='train_accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
#alternate code
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load the labels
labels = pd.read_csv('/content/drive/MyDrive/frame_labels.csv')

# Split the data
train_df, test_df = train_test_split(labels, test_size=0.3, stratify=labels['label'], random_state=42)

# Image data generator
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='frame_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='sparse'  # Changed to 'sparse'
)

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='frame_path',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='sparse'  # Changed to 'sparse'
)

# Create a model using MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_generator, validation_data=test_generator, epochs=10)

# Evaluate the model
y_true = test_df['label'].values
y_pred = model.predict(test_generator)
y_pred = np.round(y_pred).astype(int).flatten()

print(classification_report(y_true, y_pred))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Real', 'Deepfake'], yticklabels=['Real', 'Deepfake'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Save the lightweight model
model.save('/content/drive/MyDrive/deepfake_detector_model.h5')

# Plot training history
plt.plot(history.history['accuracy'], label='train_accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
#another code to reduce the training time
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load the labels
labels = pd.read_csv('/content/drive/MyDrive/frame_labels.csv')

# Split the data
train_df, test_df = train_test_split(labels, test_size=0.3, stratify=labels['label'], random_state=42)

# Image data generator
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='frame_path',
    y_col='label',
    target_size=(128, 128),  # Reduced image size
    batch_size=64,  # Increased batch size
    class_mode='binary'
)

test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='frame_path',
    y_col='label',
    target_size=(128, 128),  # Reduced image size
    batch_size=64,  # Increased batch size
    class_mode='binary'
)

# Create a model using MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
history = model.fit(train_generator, validation_data=test_generator, epochs=50, callbacks=[early_stopping])

# Evaluate the model
y_true = test_df['label'].values
y_pred = model.predict(test_generator)
y_pred = np.round(y_pred).astype(int).flatten()

print(classification_report(y_true, y_pred))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Real', 'Deepfake'], yticklabels=['Real', 'Deepfake'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Save the lightweight model
model.save('/content/drive/MyDrive/deepfake_detector_model.h5')

# Plot training history
plt.plot(history.history['accuracy'], label='train_accuracy')
plt.plot(history.history['val_accuracy'], label='val_accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()