# **1️⃣ IMPORTS**

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
from transformers import AutoImageProcessor, SiglipForImageClassification
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# **2️⃣ LOAD FACE MODEL (RESNET50)**

### Your existing ResNet50 classifier

In [3]:
class ResNetClassifier(nn.Module):
    def __init__(self, model_name='resnet50', num_classes=2):
        super().__init__()
        from timm import create_model
        self.backbone = create_model(model_name, pretrained=True, num_classes=0)
        self.classifier = nn.Linear(self.backbone.num_features, num_classes)

    def forward(self, x):
        features = self.backbone(x)
        logits = self.classifier(features)
        return logits

### Load trained weights

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"

face_model = ResNetClassifier(model_name='resnet50')
checkpoint = torch.load(r"models\final_deepfake_detector.pth", map_location=device)
face_model.load_state_dict(checkpoint['model_state_dict'])
face_model.to(device)
face_model.eval()

ResNetClassifier(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (drop_block): Identity()
        (act2): ReLU(inplace=True)
        (aa): Identity()
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1

# **3️⃣ FACE MODEL PREPROCESSOR**

In [5]:
face_preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# **4️⃣ FACE MODEL PREDICTION FUNCTION**

In [6]:
def predict_face(image):
    img_t = face_preprocess(image).unsqueeze(0).to(device)

    with torch.no_grad():
        logits = face_model(img_t)
        probs = F.softmax(logits, dim=1)[0]

    pred_idx = torch.argmax(probs).item()
    label = "REAL" if pred_idx == 0 else "FAKE"

    return {
        "label": label,
        "real_prob": float(probs[0]),
        "fake_prob": float(probs[1])
    }

# **5️⃣ LOAD SCREENSHOT MODEL (SIGLIP)**

In [7]:
screenshot_model_name = "prithivMLmods/x-bot-profile-detection"

screenshot_model = SiglipForImageClassification.from_pretrained(screenshot_model_name)
screenshot_processor = AutoImageProcessor.from_pretrained(screenshot_model_name)
screenshot_model.eval()

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


SiglipForImageClassification(
  (vision_model): SiglipVisionTransformer(
    (embeddings): SiglipVisionEmbeddings(
      (patch_embedding): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), padding=valid)
      (position_embedding): Embedding(196, 768)
    )
    (encoder): SiglipEncoder(
      (layers): ModuleList(
        (0-11): 12 x SiglipEncoderLayer(
          (layer_norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (self_attn): SiglipAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (layer_norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
          (mlp): SiglipMLP(
            (activation_fn): GELUTanh()
            (fc1): Linear(in_features=768, out_features=30

### Class mapping

In [8]:
id2label = {
    0: "bot",
    1: "cyborg",
    2: "real",
    3: "verified"
}

# **6️⃣ SCREENSHOT MODEL PREDICTION FUNCTION**

In [9]:
def predict_screenshot(image):
    inputs = screenshot_processor(
        images=[image],
        return_tensors="pt",
        padding=True
    )

    with torch.no_grad():
        outputs = screenshot_model(**inputs)
        logits = outputs.logits[0]
        probs = torch.softmax(logits, dim=0).tolist()

    return {
        id2label[i]: round(probs[i], 3)
        for i in range(len(probs))
    }

# **7️⃣ TEST BOTH MODELS IN ONE NOTEBOOK**

### Test a face image

In [12]:
face_image = Image.open(r"test_samples\faces\fake.jpg").convert("RGB")
face_result = predict_face(face_image)
face_result

{'label': 'FAKE',
 'real_prob': 0.10175327211618423,
 'fake_prob': 0.8982467651367188}

### Test a screenshot image

In [11]:
ss_image = Image.open(r"test_samples\profiles_screenshots\verified.png").convert("RGB")
screenshot_result = predict_screenshot(ss_image)
screenshot_result

  return self.preprocess(images, **kwargs)


{'bot': 0.0, 'cyborg': 0.0, 'real': 0.001, 'verified': 0.999}