<a href="https://colab.research.google.com/github/Manya22006/Quantum3_KALP_AI_THON/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import cv2

# ✅ Path to your dataset
DATASET_PATH = "/content/drive/MyDrive/kalp-ai-thon/kalp-ai-thon/DeepfakeTIMIT (1)"
OUTPUT_PATH = "/content/drive/MyDrive/kalp-ai-thon/kalp-ai-thon/frames"

# Make output dirs
os.makedirs(os.path.join(OUTPUT_PATH, "real"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_PATH, "fake"), exist_ok=True)

def extract_frames(video_path, output_folder, label, max_frames=10):
    """Extract frames from a video and save them as images."""
    cap = cv2.VideoCapture(video_path)
    count = 0
    saved = 0
    while cap.isOpened() and saved < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        # Save every 5th frame to reduce redundancy
        if count % 5 == 0:
            frame_filename = os.path.join(output_folder, f"{label}_{saved}.jpg")
            cv2.imwrite(frame_filename, frame)
            saved += 1
        count += 1
    cap.release()

# Loop through both fake & real folders
for label in ["real", "fake"]:
    folder = os.path.join(DATASET_PATH, label)
    for file in os.listdir(folder):
        if file.endswith(".mp4") or file.endswith(".mov"):
            video_path = os.path.join(folder, file)
            output_folder = os.path.join(OUTPUT_PATH, label)
            extract_frames(video_path, output_folder, file.split('.')[0])
            print(f"Extracted frames from {file} → {label}")


Extracted frames from fadg0-fram1-roi93.mov → real
Extracted frames from fram1-original.mov → real
Extracted frames from fadg0-original.mov → real


In [None]:
import tensorflow as tf

IMG_SIZE = (128, 128)  # resize for faster training
BATCH_SIZE = 32

train_ds = tf.keras.utils.image_dataset_from_directory(
    OUTPUT_PATH,
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    OUTPUT_PATH,
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)


Found 30 files belonging to 2 classes.
Using 24 files for training.
Found 30 files belonging to 2 classes.
Using 6 files for validation.


In [None]:
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Rescaling(1./255, input_shape=(128, 128, 3)),
    layers.Conv2D(32, (3,3), activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, (3,3), activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # binary classification: real vs fake
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(train_ds, validation_data=val_ds, epochs=5)


Epoch 1/5


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.0417 - loss: 0.6995 - val_accuracy: 1.0000 - val_loss: 0.1264
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 1.0000 - loss: 0.1293 - val_accuracy: 1.0000 - val_loss: 0.0028
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 1.0000 - loss: 0.0030 - val_accuracy: 1.0000 - val_loss: 1.3558e-05
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 773ms/step - accuracy: 1.0000 - loss: 1.5855e-05 - val_accuracy: 1.0000 - val_loss: 2.8574e-08
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 855ms/step - accuracy: 1.0000 - loss: 3.7130e-08 - val_accuracy: 1.0000 - val_loss: 3.5338e-11


In [None]:
loss, acc = model.evaluate(val_ds)
print(f"Validation Accuracy: {acc*100:.2f}%")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 1.0000 - loss: 3.5338e-11
Validation Accuracy: 100.00%


In [20]:
import tensorflow as tf
from tensorflow.keras.applications import Xception
from tensorflow.keras import layers, models

IMG_SIZE = (160, 160)  # Xception requires >=71, we’ll use 160x160

# Load datasets again
train_ds = tf.keras.utils.image_dataset_from_directory(
    OUTPUT_PATH,
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=IMG_SIZE,
    batch_size=32
)
val_ds = tf.keras.utils.image_dataset_from_directory(
    OUTPUT_PATH,
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=IMG_SIZE,
    batch_size=32
)

# Prefetch for speed
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)

# Base model (pretrained Xception)
base_model = Xception(weights="imagenet", include_top=False, input_shape=IMG_SIZE + (3,))
base_model.trainable = False  # freeze layers

# Add classification head
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.3),
    layers.Dense(1, activation="sigmoid")
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

history = model.fit(train_ds, validation_data=val_ds, epochs=5)

# Evaluate
loss, acc = model.evaluate(val_ds)
print(f"Validation Accuracy: {acc*100:.2f}%")

# Save model
model.save("/content/drive/MyDrive/kalp-ai-thon/kalp-ai-thon/deepfake_model.h5")



Found 30 files belonging to 2 classes.
Using 24 files for training.
Found 30 files belonging to 2 classes.
Using 6 files for validation.
Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9s/step - accuracy: 0.8750 - loss: 0.3988 - val_accuracy: 1.0000 - val_loss: 2.2469e-04
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 1.0000 - loss: 0.0397 - val_accuracy: 1.0000 - val_loss: 3.4454e-05
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 1.0000 - loss: 0.0101 - val_accuracy: 1.0000 - val_loss: 7.7309e-06
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.9583 - loss: 0.3403 - val_accuracy: 1.0000 - val_loss: 1.8476e-06
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 1.0000 - loss: 0.0024 - val_accuracy: 1.0000 - val_loss: 5.5594e-07
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5



Validation Accuracy: 100.00%


In [17]:
!pip install streamlit
!pip install pyngrok  # to tunnel Streamlit app in Colab


Collecting streamlit
  Downloading streamlit-1.49.1-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.49.1-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m36.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.49.1
Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.3.0


In [21]:
import streamlit as st
import tensorflow as tf
import numpy as np
import cv2
import tempfile
import os

# Load trained model
MODEL_PATH = "deepfake_xception.h5"
model = tf.keras.models.load_model(MODEL_PATH)

IMG_SIZE = (160, 160)

st.title("🛡️ Deepfake Defender")
st.write("Upload a video or image, and this tool will detect if it's REAL or FAKE.")

uploaded_file = st.file_uploader("Upload Video/Image", type=["mp4", "mov", "avi", "jpg", "jpeg", "png"])

def preprocess_frame(frame):
    frame = cv2.resize(frame, IMG_SIZE)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = frame / 255.0
    return np.expand_dims(frame, axis=0)

if uploaded_file is not None:
    # If image
    if uploaded_file.type.startswith("image"):
        file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
        image = cv2.imdecode(file_bytes, 1)
        st.image(image, caption="Uploaded Image")

        pred = model.predict(preprocess_frame(image))[0][0]
        score = float(pred) * 100
        st.metric("Deepfake Confidence", f"{score:.2f}%")

        if score > 50:
            st.error("⚠️ Likely FAKE")
        else:
            st.success("✅ Likely REAL")

    # If video
    else:
        tfile = tempfile.NamedTemporaryFile(delete=False)
        tfile.write(uploaded_file.read())
        cap = cv2.VideoCapture(tfile.name)

        frames_pred = []
        count = 0
        while cap.isOpened() and count < 10:  # check first 10 frames
            ret, frame = cap.read()
            if not ret:
                break
            pred = model.predict(preprocess_frame(frame))[0][0]
            frames_pred.append(pred)
            count += 1
        cap.release()

        avg_pred = np.mean(frames_pred)
        score = float(avg_pred) * 100

        st.video(tfile.name)
        st.metric("Deepfake Confidence", f"{score:.2f}%")

        if score > 50:
            st.error("⚠️ Likely FAKE")
        else:
            st.success("✅ Likely REAL")


