In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import os
import tqdm
from PIL import Image
import shutil
import csv
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def precision(y_pred, y_true):
    """
    Calculates precision for binary classification.

    Args:
    - y_pred (Tensor): Predicted labels (0 or 1).
    - y_true (Tensor): True labels (0 or 1).

    Returns:
    - precision (float): Precision score.
    """
    true_positives = torch.logical_and(y_pred == 1, y_true == 1).sum().item()
    predicted_positives = (y_pred == 1).sum().item()
    precision = true_positives / (predicted_positives + 1e-20)  # Adding epsilon to avoid division by zero
    return precision

In [None]:
def log_to_csv(filename, data):
    file_exists = os.path.isfile(filename)
    with open(filename, mode='a') as csv_file:
        reader = csv.reader(csv_file)
        if reader.readrows()>0:
            writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        if not file_exists:
            writer.writerow(['Epoch', 'Loss', 'Accuracy', 'Precision'])
        writer.writerow(data)

In [None]:
from torchvision.transforms import Compose, Resize, ToTensor
class CustomDataset(Dataset):
    def __init__(self, data_folder, transform=None):
        self.data_folder = data_folder
        self.transform = transform

        # Get a list of all image files in the folder

        dirs=[os.path.join(data_folder,f) for f in os.listdir(data_folder)]
        #at=[os.path.join(data_folder,f) for f in os.listdir(data_folder) if f.endswith(('.jpg', '.jpeg', '.png', '.gif'))]
        self.image_files=[]
        ant=[dirs[0], dirs[1]]
        lab=torch.tensor([0, 1])
        for j,i in enumerate(dirs):
            self.image_files.extend([(os.path.join(ant[j],f),lab[j]) for f in os.listdir(i)[:1000]])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx][0]
        image = Image.open(img_name).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image[0].reshape(1,256,256), self.image_files[idx][1]

def data_define(data_folder=False, tensor=False):

    transform = Compose([
        ResizeToSquare(256),
        ToTensor()
    ])

    if data_folder:
        real=[os.path.join('/content/drive/MyDrive/deep fake detection/Dataset/Train/Celeb-real-frame', i) for i in os.listdir('/content/drive/MyDrive/deep fake detection/Dataset/Train/Celeb-real-frame')]
        fake=[os.path.join('/content/drive/MyDrive/deep fake detection/Dataset/Train/Celeb-fake-frame', i) for i in os.listdir('/content/drive/MyDrive/deep fake detection/Dataset/Train/Celeb-fake-frame')]
        for j,dir in enumerate([real, fake]):
            for i in dir:
                if j==0:
                    shutil.copy(i,'/content/drive/MyDrive/Test2/Real')
                else:
                    shutil.copy(i,'/content/drive/MyDrive/Test2/Fake')
        data_folder = '/content/drive/MyDrive/Test2'
        custom_dataset = CustomDataset(data_folder, transform=transform)
        dataloader = DataLoader(custom_dataset, batch_size=64, shuffle=True)
        return custom_dataset, dataloader

    elif tensor:
        image= tensor
        if transform:
            image = transform(image)
        return image[0].reshape(1,256,256)

class ResizeToSquare(object):
    def __init__(self, size):
        self.size = size

    def __call__(self, img):
        width, height = img.size
        aspect_ratio = width / height
        if aspect_ratio > 1:
            new_width = self.size
            new_height = int(self.size / aspect_ratio)
        else:
            new_height = self.size
            new_width = int(self.size * aspect_ratio)
        img = img.resize((new_width, new_height))
        canvas = Image.new('RGB', (self.size, self.size), (0, 0, 0))
        h_offset = (self.size - new_width) // 2
        v_offset = (self.size - new_height) // 2
        canvas.paste(img, (h_offset, v_offset))
        return canvas

In [None]:
ds, trainloader=data_define('/content/')

In [7]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()

        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 128, kernel_size=5, stride=2, padding=2),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Conv2d(128, 256, kernel_size=5, stride=2, padding=2),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Conv2d(256, 512, kernel_size=5, stride=2, padding=2),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Conv2d(512, 1024, kernel_size=5, stride=2, padding=2),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Flatten(),
            # Correct the input features for the first linear layer based on the 128x128 input size
            nn.Linear(1024 * 8 * 8, 1024),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Linear(1024, 2)  # Two output classes: real or fake
        )

        # Decoder - Note: The decoder dimensions are also likely incorrect if designed for a 16x16 spatial size after convolution.
        # If the decoder is intended to reconstruct the original 256x256 image, its architecture and the first linear layer
        # might need to be adjusted as well based on the output of the encoder's classification layer (which has 2 features).
        # However, the current error is in the encoder's forward pass during `summary`.
        self.decoder_fc = nn.Sequential(
            nn.Linear(2, 1024), # This takes the 2 output features from the encoder
            nn.LeakyReLU(0.1, inplace=True),
            # This linear layer should output a flattened tensor that can be reshaped to (1024, H, W) for the first ConvTranspose2d
            # If the ConvTranspose2d expects (1024, 16, 16), the output here should be 1024 * 16 * 16.
            # If the encoder's architecture was designed for a different input size leading to 16x16 spatial dimensions,
            # the decoder's linear layer size might be correct in that context, but the encoder's linear layer wasn't.
            # For a 128x128 input resulting in 8x8 spatial dimension before flatten, the encoder output is 2 features.
            # The decoder starts from these 2 features. If you want to reconstruct a 256x256 image, the decoder needs to upsample.
            # The intermediate size (1024, 16, 16) in the decoder suggests an attempt to upsample back to something related to 16x16.
            # If the goal is reconstruction, the decoder's input to ConvTranspose2d (after unflatten) should correspond to the
            # size before flatten in the encoder's mirrored architecture. The encoder ends with 1024 channels and 8x8 spatial.
            # So the decoder's first ConvTranspose2d would ideally take 1024 channels and start from 8x8 spatial.
            # The current decoder architecture is likely mismatched for the 128x128 input and the encoder's structure.
            # For now, let's fix the encoder. If reconstruction is needed, the decoder will require significant review.
            nn.Linear(1024, 1024 * 16 * 16), # This is likely incorrect for reconstructing from 2 features to 256x256.
            nn.LeakyReLU(0.1, inplace=True)
        )

        self.decoder_conv = nn.Sequential(
            nn.Unflatten(1, (1024, 16, 16)), # This expects the output of decoder_fc to be reshapeable to (1024, 16, 16)
            nn.ConvTranspose2d(1024, 512, kernel_size=5, stride=2, padding=2, output_padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.ConvTranspose2d(512, 256, kernel_size=5, stride=2, padding=2, output_padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.ConvTranspose2d(256, 128, kernel_size=5, stride=2, padding=2, output_padding=1),
            nn.LeakyReLU(0.1, inplace=True),
            nn.ConvTranspose2d(128, 3, kernel_size=5, stride=2, padding=2, output_padding=1),
            nn.Softmax()  # To get pixel values in the range [0, 1]
        )

    def forward(self, x):
        encoded = self.encoder(x)
        return encoded

    def decode(self, encoding):
        x = self.decoder_fc(encoding)
        x = self.decoder_conv(x)
        return x

# Create model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [3]:
!pip install torchsummary




In [9]:
from torchsummary import summary
import torch

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize your model and move it to the device
model = Classifier().to(device)

# Display model architecture
# Change the input_size to reflect 1 input channel as expected by the model
summary(model, input_size=(1, 128, 128))
summary(model, output_size=(1, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 128, 64, 64]           3,328
         LeakyReLU-2          [-1, 128, 64, 64]               0
            Conv2d-3          [-1, 256, 32, 32]         819,456
         LeakyReLU-4          [-1, 256, 32, 32]               0
            Conv2d-5          [-1, 512, 16, 16]       3,277,312
         LeakyReLU-6          [-1, 512, 16, 16]               0
            Conv2d-7           [-1, 1024, 8, 8]      13,108,224
         LeakyReLU-8           [-1, 1024, 8, 8]               0
           Flatten-9                [-1, 65536]               0
           Linear-10                 [-1, 1024]      67,109,888
        LeakyReLU-11                 [-1, 1024]               0
           Linear-12                    [-1, 2]           2,050
Total params: 84,320,258
Trainable params: 84,320,258
Non-trainable params: 0
-------------------------

TypeError: summary() got an unexpected keyword argument 'output_size'

In [None]:
def train(model, dataloader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    fake_count = 0
    real_count = 0


    datal=tqdm.tqdm(dataloader)
    for images, labels in datal:

        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        # Counting real and fake images
        fake_count += (predicted == 1).sum().item()
        real_count += (predicted == 0).sum().item()

        datal.set_postfix(loss=running_loss)

    accuracy = 100. * correct / total
    outputs=torch.argmax(outputs, dim=1)
    prec=precision(outputs,labels)
    return running_loss / len(dataloader), accuracy, fake_count, real_count, prec

In [None]:
n_epochs = 1
val_accuracies = []
for epoch in range(n_epochs):
    train_loss, train_acc, train_fake_count, train_real_count, prec = train(model, trainloader, criterion, optimizer)
    #val_loss, val_acc, val_fake_count, val_real_count, y_true, y_pred = validate(model, test_loader, criterion)
    #val_accuracies.append(val_acc)
    #print(f'Epoch {epoch+1}/{n_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')
    print(f'Epoch {epoch+1}/{n_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
    print(f'Training: Fake Images: {train_fake_count}, Real Images: {train_real_count}')
    #print(f'Validation: Fake Images: {val_fake_count}, Real Images: {val_real_count}')
    log_to_csv('training_log.csv', [epoch, train_loss, train_acc, prec])
    #torch.save(model.state_dict(),'rf_face_det_weights.pth')
    #torch.save(optimizer.state_dict(),'rf_face_det_opt.pth')

In [None]:
torch.save(model.state_dict(),'rf_face_det_weights.pth')
torch.save(optimizer.state_dict(),'rf_face_det_opt.pth')

In [None]:
pip install facenet-pytorch

Collecting facenet-pytorch
  Downloading facenet_pytorch-2.6.0-py3-none-any.whl.metadata (12 kB)
Collecting Pillow<10.3.0,>=10.2.0 (from facenet-pytorch)
  Downloading pillow-10.2.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting torch<2.3.0,>=2.2.0 (from facenet-pytorch)
  Downloading torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting torchvision<0.18.0,>=0.17.0 (from facenet-pytorch)
  Downloading torchvision-0.17.2-cp310-cp310-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch<2.3.0,>=2.2.0->facenet-pytorch)
  Using cached nvidia_cu

In [None]:
import csv
import os

def log_to_csv(filename, data):
    file_exists = os.path.isfile(filename)
    with open(filename, mode='a', newline='') as csv_file:  # Use newline='' to avoid extra empty rows
        writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        if not file_exists:  # Write header if file is newly created
            writer.writerow(["Epoch", "Train Loss", "Train Accuracy", "Precision"])
        writer.writerow(data)  # Write the data

In [None]:
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
import matplotlib.pyplot as plt
mtcnn = MTCNN()
model = Classifier().to(device)
image_path = '/home/developer/Downloads/example3'
image = Image.open(image_path)
boxes, probs = mtcnn.detect(image)
def crop_faces(image, boxes):
    faces = []
    for box in boxes:
        x1, y1, x2, y2 = box.astype(int)
        faces.append(image.crop((x1, y1, x2, y2)))
    return faces
cropped_faces = crop_faces(image, boxes)
num_faces = len(cropped_faces)
if num_faces > 0:
    plt.imshow(cropped_faces[0])
    plt.axis(False)
    plt.show()
else:
    print("No faces detected.")


y=data_define(tensor=cropped_faces[0].convert('RGB')).cuda()
outputs=model(y.unsqueeze(0))
pred=torch.argmax(outputs,axis=1)
if pred==0:
    print('real')
else:
    print('fake')

ModuleNotFoundError: No module named 'facenet_pytorch'

In [None]:
import cv2
import torch
from facenet_pytorch import MTCNN
from torchvision import transforms
from PIL import Image
import os
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
mtcnn = MTCNN(keep_all=True, device=device)

def load_model(a):
    model = a
    model.load_state_dict(torch.load('rf_face_det_weights.pth'))
    model.to(device)
    model.eval()
    return model
def crop_faces(image, boxes):
    faces = []
    for box in boxes:
        x1, y1, x2, y2 = box.astype(int)
        faces.append(image.crop((x1, y1, x2, y2)))
    return faces
video_path = '/home/developer/Celeb-real/id0_0005.mp4'
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print(f"Error opening video file {video_path}")
    exit()
while cap.isOpened():
    ret, frame = cap.read()
    if ret:
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        boxes, probs = mtcnn.detect(rgb_frame)
        if boxes is not None:
            for box in boxes:
                startX, startY, endX, endY = box.astype(int)
                face_region = frame[startY:endY, startX:endX]
                pil_image = Image.fromarray(cv2.cvtColor(face_region, cv2.COLOR_BGR2RGB))
                input_tensor = data_define(tensor=pil_image.convert('RGB')).cuda()
                outputs = model(input_tensor.unsqueeze(0))
                prediction = torch.argmax(outputs, dim=1).item()
                if prediction == 0:
                    label = "Real"
                    color = (0, 255, 0)
                else:
                    label = "Fake"
                    color = (0, 0, 255)
                cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
                cv2.putText(frame, label, (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, cv2.LINE_AA)
        cv2.imshow('Frame', frame)
        if cv2.waitKey(25) & 0xFF == ord('q'):
            break
    else:
        break
cap.release()
cv2.destroyAllWindows()


In [None]:
import csv
def log_to_csv(filename, data):
    file_exists = os.path.isfile(filename)
    with open(filename, mode='a') as csv_file:
        with open(filename, 'r', newline='') as file:
            csv_reader = csv.reader(file)
            l=[row for row in csv_reader]
            if len(l)>0 and data[0]==0:
                if l[-1][0].isdigit():  # Ensure the last row's first element is a digit
                    data[0] = int(l[-1][0]) + data[0] + 1
                else:
                    data[0] = 1
        writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        if not file_exists:
            writer.writerow(['Epoch', 'Loss', 'Accuracy', 'Precision'])
        writer.writerow(data)

In [None]:
!pip install train_mod # Assuming 'train_mod' is a valid package on PyPI. If it's a local module, adjust the path accordingly.
import torch
from torch import nn, optim
from torchvision.transforms import Compose
import csv
import os
# If 'train_mod' is a local file, make sure it's in the same directory as this script or in a directory included in your Python path.
from train_mod import Classifier, data_define, train

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
inp=int(input('Train/Inference')) #input must 0/1
if inp==0:
    ds, trainloader=data_define('/home/developer')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Classifier().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    if os.path.exists('rf_face_det_weights.pth') and os.path.exists('rf_face_det_opt.pth'):
        model.load_state_dict(torch.load('rf_face_det_weights.pth'))
        optimizer.load_state_dict(torch.load('rf_face_det_opt.pth'))
    n_epochs = 30 #changeable parameter
    for epoch in range(n_epochs):
        train_loss, train_acc, train_fake_count, train_real_count, prec, cm = train(model, trainloader, criterion, optimizer)

        print(f'Epoch {epoch+1}/{n_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'Training: Fake Images: {train_fake_count}, Real Images: {train_real_count}')
        print('Confusion Matrix')
        print(cm)
        log_to_csv('train.csv', [epoch, train_loss, train_acc, prec])
        torch.save(model.state_dict(),'rf_face_det_weights.pth')
        torch.save(optimizer.state_dict(),'rf_face_det_opt.pth')
elif inp==1:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Classifier().to(device)
    model.load_state_dict(torch.load('rf_face_det_weights.pth'))
    model.to(device)
    st=int(input('Image/Video'))
    if st==0:
        from facenet_pytorch import MTCNN, InceptionResnetV1
        from PIL import Image
        import matplotlib.pyplot as plt
        mtcnn = MTCNN()
        model = Classifier().to(device)
        image_path = '/home/developer/Downloads/example3'
        image = Image.open(image_path)
        boxes, probs = mtcnn.detect(image)
        def crop_faces(image, boxes):
            faces = []
            for box in boxes:
                x1, y1, x2, y2 = box.astype(int)
                faces.append(image.crop((x1, y1, x2, y2)))
            return faces
        cropped_faces = crop_faces(image, boxes)
        num_faces = len(cropped_faces)
        if num_faces > 0:
            plt.imshow(cropped_faces[0])
            plt.axis(False)
            plt.show()
        else:
            print("No faces detected.")


        y=data_define(tensor=cropped_faces[0].convert('RGB')).cuda()
        outputs=model(y.unsqueeze(0))
        pred=torch.argmax(outputs,axis=1)
        if pred==0:
            print('real')
        else:
            print('fake')
    elif st==1:
        import cv2
        import torch
        from facenet_pytorch import MTCNN
        from torchvision import transforms
        from PIL import Image
        import os
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        mtcnn = MTCNN(keep_all=True, device=device)

        def load_model(a):
            model = a
            model.load_state_dict(torch.load('rf_face_det_weights.pth'))
            model.to(device)
            model.eval()
            return model
        def crop_faces(image, boxes):
            faces = []
            for box in boxes:
                x1, y1, x2, y2 = box.astype(int)
                faces.append(image.crop((x1, y1, x2, y2)))
            return faces
        video_path = '/home/developer/Celeb-real/id0_0005.mp4'
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"Error opening video file {video_path}")
            exit()
        while cap.isOpened():
            ret, frame = cap.read()
            if ret:
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                boxes, probs = mtcnn.detect(rgb_frame)
                if boxes is not None:
                    for box in boxes:
                        startX, startY, endX, endY = box.astype(int)
                        face_region = frame[startY:endY, startX:endX]
                        pil_image = Image.fromarray(cv2.cvtColor(face_region, cv2.COLOR_BGR2RGB))
                        input_tensor = data_define(tensor=pil_image.convert('RGB')).cuda()
                        outputs = model(input_tensor.unsqueeze(0))
                        prediction = torch.argmax(outputs, dim=1).item()
                        if prediction == 0:
                            label = "Real"
                            color = (0, 255, 0)
                        else:
                            label = "Fake"
                            color = (0, 0, 255)
                        cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)
                        cv2.putText(frame, label, (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, cv2.LINE_AA)
                cv2.imshow('Frame', frame)
                if cv2.waitKey(25) & 0xFF == ord('q'):
                    break
            else:
                break
        cap.release()
        cv2.destroyAllWindows()


[31mERROR: Could not find a version that satisfies the requirement train_mod (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for train_mod[0m[31m
[0m

ModuleNotFoundError: No module named 'train_mod'

In [None]:
def confusion_matrix:


In [None]:

actual = np.random.randn(100)
predicted = np.random.randn(100)

