In [None]:
#before running this please change the RUNTIME to GPU (Runtime -> Change runtime type -> set harware accelarotor as GPU)
#Mount our google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#import libraries
!pip3 install face_recognition

import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import face_recognition

Collecting face_recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl.metadata (21 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Building wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Created wheel for face-recognition-models: filename=face_recognition_models-0.3.0-py2.py3-none-any.whl size=100566166 sha256=4346edfad391a0387ce27982da6b2285d1717cb36ba602d9eadc5ee9f2f8022d
  Stored in directory: /root/.cache/pip/wheels/04/52/ec/9355da79c29f160b038a20c784db2803c2f9fa2c8a462c176a
Successfully built face-recognition-models
Installing collected packages: face-recogn

In [None]:
#import libraries
import torch
from torch.autograd import Variable
import time
import os
import sys
import os
from torch import nn
from torchvision import models

In [65]:
#efficientnet + tcn with schtoic depth and lower tcn

import torch
from torch import nn
from torchvision import models
import torch.nn.functional as F

class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.5, survival_prob=1.0):
        super().__init__()
        self.survival_prob = survival_prob
        self.conv1 = nn.Conv1d(n_inputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation)
        self.relu1 = nn.ReLU()
        self.dp1 = nn.Dropout(dropout)
        self.conv2 = nn.Conv1d(n_outputs, n_outputs, kernel_size, stride=stride, padding=padding, dilation=dilation)
        self.relu2 = nn.ReLU()
        self.dp2 = nn.Dropout(dropout)
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = nn.ReLU()

    def stochastic_residual(self, x, out):
        if not self.training or self.survival_prob == 1.0:
            return out + x
        else:
            binary_tensor = torch.rand((x.size(0), 1, 1), device=x.device) < self.survival_prob
            out = (out * binary_tensor) / self.survival_prob
            return out + x

    def forward(self, x):
        out = self.conv1(x)
        out = self.relu1(out)
        out = self.dp1(out)
        out = self.conv2(out)
        out = self.relu2(out)
        out = self.dp2(out)

        res = x if self.downsample is None else self.downsample(x)
        diff = out.shape[2] - res.shape[2]
        if diff > 0:
            res = F.pad(res, (diff // 2, diff - diff // 2))
        elif diff < 0:
            res = res[:, :, -out.shape[2]:]

        return self.relu(self.stochastic_residual(res, out))


class TCN(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=3, dropout=0.5, survival_base=0.9):
        super(TCN, self).__init__()
        layers = []
        for i in range(len(num_channels)):
            dilation = 2 ** i
            in_ch = num_inputs if i == 0 else num_channels[i - 1]
            out_ch = num_channels[i]
            survival_prob = survival_base ** (i + 1)
            layers.append(
                TemporalBlock(in_ch, out_ch, kernel_size, 1, dilation, (kernel_size - 1) * dilation,
                              dropout, survival_prob)
            )
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)


class Model(nn.Module):
    def __init__(self, num_classes, latent_dim=1536, reduced_dim=1024, tcn_channels=[1024, 768],
                 kernel_size=3, dropout=0.5):
        super(Model, self).__init__()
        self.efficientnet = models.efficientnet_b3(pretrained=True)
        self.model = self.efficientnet.features
        self.avgpool = nn.AdaptiveAvgPool2d(1)

        for param in self.model.parameters():
            param.requires_grad = True

        self.project = nn.Sequential(
            nn.Conv1d(in_channels=latent_dim, out_channels=reduced_dim, kernel_size=1),
            nn.BatchNorm1d(reduced_dim),
            nn.ReLU()
        )

        self.tcn = TCN(num_inputs=reduced_dim, num_channels=tcn_channels, kernel_size=kernel_size, dropout=dropout)
        self.dp = nn.Dropout(dropout)
        self.linear1 = nn.Linear(tcn_channels[-1], num_classes)

    def forward(self, x):
        batch_size, seq_length, c, h, w = x.shape
        x = x.view(batch_size * seq_length, c, h, w)
        fmap = self.model(x)
        x = self.avgpool(fmap)
        x = x.view(batch_size, seq_length, -1)
        x = x.transpose(1, 2)  # (B, C, T)
        x = self.project(x)
        x = self.tcn(x)
        x = torch.mean(x, dim=2)
        logits = self.linear1(self.dp(x))
        return fmap, logits


In [66]:
im_size = 112
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
sm = nn.Softmax()
inv_normalize =  transforms.Normalize(mean=-1*np.divide(mean,std),std=np.divide([1,1,1],std))
def im_convert(tensor):
    """ Display a tensor as an image. """
    image = tensor.to("cpu").clone().detach()
    image = image.squeeze()
    image = inv_normalize(image)
    image = image.numpy()
    image = image.transpose(1,2,0)
    image = image.clip(0, 1)
    cv2.imwrite('./2.png',image*255)
    return image

import torch.nn.functional as F


def predict(model, img):
    with torch.no_grad():
        fmap, logits = model(img.to('cuda'))  # Only unpack two values
        probs = F.softmax(logits, dim=1)
        prediction = torch.argmax(probs, dim=1).item()
        confidence = probs.max().item()
        print(f"Prediction: {'REAL' if prediction == 1 else 'FAKE'}")
        print(f"Confidence: {confidence:.2f}%")
    return prediction, confidence


In [67]:
#!pip3 install face_recognition
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import face_recognition
class validation_dataset(Dataset):
    def __init__(self,video_names,sequence_length = 60,transform = None):
        self.video_names = video_names
        self.transform = transform
        self.count = sequence_length
    def __len__(self):
        return len(self.video_names)
    def __getitem__(self,idx):
        video_path = self.video_names[idx]
        frames = []
        a = int(100/self.count)
        first_frame = np.random.randint(0,a)
        for i,frame in enumerate(self.frame_extract(video_path)):
            #if(i % a == first_frame):
            faces = face_recognition.face_locations(frame)
            try:
              top,right,bottom,left = faces[0]
              frame = frame[top:bottom,left:right,:]
            except:
              pass
            frames.append(self.transform(frame))
            if(len(frames) == self.count):
              break
        #print("no of frames",len(frames))
        frames = torch.stack(frames)
        frames = frames[:self.count]
        return frames.unsqueeze(0)
    def frame_extract(self,path):
      vidObj = cv2.VideoCapture(path)
      success = 1
      while success:
          success, image = vidObj.read()
          if success:
              yield image
def im_plot(tensor):
    image = tensor.cpu().numpy().transpose(1,2,0)
    b,g,r = cv2.split(image)
    image = cv2.merge((r,g,b))
    image = image*[0.22803, 0.22145, 0.216989] +  [0.43216, 0.394666, 0.37645]
    image = image*255.0
    plt.imshow(image.astype(int))
    plt.show()

In [None]:
#Code for making prediction
im_size = 112
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

train_transforms = transforms.Compose([
                                        transforms.ToPILImage(),
                                        transforms.Resize((im_size,im_size)),
                                        transforms.ToTensor(),
                                        transforms.Normalize(mean,std)])
path_to_videos = ['/content/drive/MyDrive/data/yt1s.com - Ultra realistic Deepfake of Elon Musk_1080p_fixed.mp4',#fake
                                   '/content/drive/MyDrive/data/id0_id1_0005.mp4',#faake
                                   '/content/drive/MyDrive/data/01__meeting_serious_fixed.mp4',
                                   '/content/drive/MyDrive/data/Deep fake test_fixed.mp4',
    ]
'''
path_to_videos = ['/content/drive/MyDrive/data/extractedfolder/Celeb_fake_face_only/id0_id17_0001.mp4',#fake
                  '/content/drive/MyDrive/data/extractedfolder/Celeb_fake_face_only/id0_id1_0005.mp4',#fake
                  '/content/drive/MyDrive/data/extractedfolder/Celeb_real_face_only/id0_0002.mp4',#real
                  '/content/drive/MyDrive/data/extractedfolder/Celeb_real_face_only/id10_0004.mp4'#real


]
'''
#path_to_videos= ["/content/yugal deepfake.mp4"]

video_dataset = validation_dataset(path_to_videos,sequence_length = 10,transform = train_transforms)
model = Model(2).cuda()
path_to_model = '/content/drive/MyDrive/data/best_model.pt'
#model.load_state_dict(torch.load(path_to_model))
checkpoint = torch.load(path_to_model)
model.load_state_dict(checkpoint['model_state_dict'])

model.eval()
for i in range(len(path_to_videos)):
    print(f"Processing video: {path_to_videos[i]}")
    prediction, confidence = predict(model, video_dataset[i])


Processing video: /content/drive/MyDrive/data/yt1s.com - Ultra realistic Deepfake of Elon Musk_1080p_fixed.mp4
Prediction: FAKE
Confidence: 0.82%
Processing video: /content/drive/MyDrive/data/id0_id1_0005.mp4
Prediction: FAKE
Confidence: 0.53%
Processing video: /content/drive/MyDrive/data/01__meeting_serious_fixed.mp4
Prediction: REAL
Confidence: 0.96%
Processing video: /content/drive/MyDrive/data/Deep fake test_fixed.mp4
Prediction: REAL
Confidence: 0.83%
Processing video: /content/yugal deepfake.mp4
Prediction: REAL
Confidence: 0.73%


real time

In [71]:
# ─── 1. IMPORTS & CALLBACK ─────────────────────────────────────────────────
import torch
# Let cuDNN autotuner find the best conv algorithms, and if it still fails, fall back to PyTorch's native kernels
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.enabled   = False
from IPython.display   import display, Javascript, clear_output
from google.colab.output import eval_js, register_callback
import time
from base64             import b64decode
from io                 import BytesIO
from PIL                import Image, UnidentifiedImageError
import numpy            as np
from torchvision        import transforms

# Let JS call this to clear the output area when stopping the webcam
register_callback('clearOutput', lambda _: clear_output(wait=True))


# ─── 2. INJECT JAVASCRIPT UI + FRAME‐GRABBER ────────────────────────────────
js = """
(async function() {
  if (window._deepfakeUI) return;  // only once

  // --- START button ---
  const start = document.createElement('button');
  start.textContent = 'Start Webcam';
  start.style.marginRight = '8px';
  start.onclick = async () => {
    let vid = document.getElementById('deepfake_video');
    if (!vid) {
      vid = document.createElement('video');
      vid.id = 'deepfake_video';
      vid.width = 640; vid.height = 480;
      vid.autoplay = true;
      document.body.prepend(vid);
    }
    try {
      const stream = await navigator.mediaDevices.getUserMedia({video:true});
      window._deepfake_stream = stream;
      vid.srcObject = stream;
    } catch(e) {
      console.error('Camera error:', e);
    }
  };

  // --- STOP button ---
  const stop = document.createElement('button');
  stop.textContent = 'Stop Webcam';
  stop.onclick = () => {
    const s = window._deepfake_stream;
    if (s) {
      s.getTracks().forEach(t => t.stop());  // stops camera
      delete window._deepfake_stream;
    }
    const v = document.getElementById('deepfake_video');
    if (v) v.remove();                       // remove video element
    google.colab.kernel.invokeFunction('clearOutput', [], {});  // clear Python output
  };

  // --- frame grabber helper ---
  const grabScript = document.createElement('script');
  grabScript.textContent = `
    async function grabFrame() {
      const vid = document.getElementById('deepfake_video');
      if (!vid) return null;
      const c = document.createElement('canvas');
      c.width = vid.videoWidth;
      c.height = vid.videoHeight;
      c.getContext('2d').drawImage(vid, 0, 0);
      return c.toDataURL('image/jpeg');
    }
  `;
  document.body.appendChild(grabScript);

  // --- insert buttons into DOM ---
  const ctr = document.createElement('div');
  ctr.style.margin = '12px 0';
  ctr.append(start, stop);
  document.body.prepend(ctr);

  window._deepfakeUI = true;
})();
"""
display(Javascript(js))


# ─── 3. LOAD MODEL & TRANSFORMS ──────────────────────────────────────────────
# (Make sure your Model class is already defined above or imported)
checkpoint = torch.load('/content/drive/MyDrive/data/best_model.pt')
model      = Model(2).cuda()
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

im_size = 112
mean    = [0.485, 0.456, 0.406]
std     = [0.229, 0.224, 0.225]
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((im_size, im_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


# ─── 4. REAL-TIME LOOP ───────────────────────────────────────────────────────
sequence   = []
interval_s = 0.2  # 200 ms between frames

print(">> Click ‘Start Webcam’ above to begin streaming and detection.")

while True:
    # 1) grab frame via JS
    data_url = eval_js('grabFrame()')
    if not data_url or not data_url.startswith('data:image'):
        time.sleep(interval_s)
        continue

    # 2) decode and open
    header, b64 = data_url.split(',', 1)
    try:
        img = Image.open(BytesIO(b64decode(b64))).convert('RGB')
    except (UnidentifiedImageError, ValueError):
        print("[Warning] received invalid frame—skipping")
        time.sleep(interval_s)
        continue

    # 3) preprocess
    frame_np = np.array(img)             # H×W×3 RGB
    tensor   = transform(frame_np)
    sequence.append(tensor)
    print(f"Captured frame {len(sequence)}/10")

    # 4) once 10 frames → predict
    if len(sequence) == 10:
        batch = torch.stack(sequence).unsqueeze(0).cuda()  # shape (1,10,3,112,112)
        with torch.no_grad():
            _, logits = model(batch)
            probs     = torch.softmax(logits, dim=1)
            pred      = torch.argmax(probs, dim=1).item()
            conf      = probs.max().item()
        label = "REAL" if pred == 1 else "FAKE"
        print(f"[{time.strftime('%H:%M:%S')}] {label} ({conf*100:.1f}%)")
        sequence = []

    time.sleep(interval_s)


<IPython.core.display.Javascript object>

>> Click ‘Start Webcam’ above to begin streaming and detection.
Captured frame 1/10
Captured frame 2/10
Captured frame 3/10
Captured frame 4/10
Captured frame 5/10
Captured frame 6/10
Captured frame 7/10
Captured frame 8/10
Captured frame 9/10
Captured frame 10/10
[11:06:42] REAL (72.5%)
Captured frame 1/10
Captured frame 2/10
Captured frame 3/10
Captured frame 4/10
Captured frame 5/10
Captured frame 6/10
Captured frame 7/10
Captured frame 8/10
Captured frame 9/10
Captured frame 10/10
[11:06:46] REAL (83.5%)
Captured frame 1/10
Captured frame 2/10
Captured frame 3/10
Captured frame 4/10
Captured frame 5/10
Captured frame 6/10
Captured frame 7/10
Captured frame 8/10
Captured frame 9/10
Captured frame 10/10
[11:06:51] REAL (81.7%)
Captured frame 1/10
Captured frame 2/10
Captured frame 3/10
Captured frame 4/10
Captured frame 5/10
Captured frame 6/10
Captured frame 7/10
Captured frame 8/10
Captured frame 9/10
Captured frame 10/10
[11:06:56] REAL (95.4%)
Captured frame 1/10
Captured frame 2

KeyboardInterrupt: 