<a href="https://colab.research.google.com/github/OneFineStarstuff/TheOneEverAfter/blob/main/_Complete_script_encompasses_all_the_major_steps_for_building_a_deepfake_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import dnnlib
import legacy
import matplotlib.pyplot as plt
import subprocess
import dlib
import cv2
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

# ==========================
# Load Pre-trained StyleGAN2 Model
# ==========================
def load_stylegan_model(path):
    with open(path, 'rb') as f:
        return legacy.load_network_pkl(f)['G_ema'].to('cuda')

def generate_image(G):
    z = torch.randn([1, G.z_dim]).cuda()  # Random latent vector
    img = G(z, None)  # Generate image with no conditioning
    img = (img.permute(0, 2, 3, 1) * 127.5 + 127.5).clamp(0, 255).to(torch.uint8)
    return img

def display_image(img):
    plt.imshow(img[0].cpu().numpy())
    plt.axis('off')
    plt.show()

# ==========================
# Define Autoencoder Model
# ==========================
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=4, stride=2, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1)
        self.fc = nn.Linear(256 * 8 * 8, 1024)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = x.view(-1, 256 * 8 * 8)
        return torch.relu(self.fc(x))

class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.fc = nn.Linear(1024, 256 * 8 * 8)
        self.deconv1 = nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1)
        self.deconv2 = nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1)
        self.deconv3 = nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1)

    def forward(self, x):
        x = torch.relu(self.fc(x)).view(-1, 256, 8, 8)
        x = torch.relu(self.deconv1(x))
        x = torch.relu(self.deconv2(x))
        return torch.sigmoid(self.deconv3(x))

class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()

    def forward(self, x):
        return self.decoder(self.encoder(x))

# ==========================
# Training Setup and Loop
# ==========================
def train_autoencoder(model_path='deepfake_autoencoder.pth', epochs=50):
    # Hyperparameters
    batch_size = 32
    learning_rate = 0.001

    # Data preparation
    transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor()
    ])

    dataset = datasets.ImageFolder('/path/to/faces/dataset', transform=transform)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Model and optimizer setup
    model = Autoencoder().to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()

    for epoch in range(epochs):
        for images, _ in data_loader:
            images = images.to(device)

            # Forward pass and loss calculation
            reconstructed = model(images)
            loss = criterion(reconstructed, images)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

    # Save the model state
    torch.save(model.state_dict(), model_path)

# ==========================
# Wav2Lip Inference Script Execution
# ==========================
def run_wav2lip(audio_path: str, video_path: str, output_path: str,
                checkpoint_path: str):
    subprocess.run([
        "python", "inference.py",
        "--checkpoint_path", checkpoint_path,
        "--face", video_path,
        "--audio", audio_path,
        "--outfile", output_path
    ])

# ==========================
# Face Alignment Functionality
# ==========================
def align_face(image):
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor("/path/to/shape_predictor_68_face_landmarks.dat")

    faces = detector(image)

    for face in faces:
        landmarks = predictor(image, face)

        left_eye = (landmarks.part(36).x, landmarks.part(36).y)
        right_eye = (landmarks.part(45).x, landmarks.part(45).y)

        eye_center = ((left_eye[0] + right_eye[0]) // 2,
                       (left_eye[1] + right_eye[1]) // 2)

        delta_x = right_eye[0] - left_eye[0]
        delta_y = right_eye[1] - left_eye[1]

        angle = np.arctan2(delta_y, delta_x) * 180 / np.pi

        desired_face_width = 256
        desired_face_height = 256

        M = cv2.getRotationMatrix2D(eye_center, angle, 1.0)

        t_x = desired_face_width * 0.5
        t_y = desired_face_height * 0.35

        M[0, 2] += (t_x - eye_center[0])
        M[1, 2] += (t_y - eye_center[1])

        aligned_face = cv2.warpAffine(image, M,
                                       (desired_face_width,
                                        desired_face_height))

        return aligned_face

    return None

# ==========================
# Seamless Cloning Functionality
# ==========================
def seamless_clone(generated_face_path: str,
                   target_image_path: str,
                   output_image_path: str):

    generated_face = cv2.imread(generated_face_path)
    target_image = cv2.imread(target_image_path)

    if generated_face is None or target_image is None:
        print("Error loading images.")
        return

    mask = np.zeros_like(target_image[:, :, 0])

    face_position = slice(100, 300), slice(100, 300)
    mask[face_position] = 255

    center_position = (200, 200)

    blended_image = cv2.seamlessClone(
        src=generated_face,
        dst=target_image,
        mask=mask,
        p=center_position,
        flags=cv2.NORMAL_CLONE
    )

    cv2.imwrite(output_image_path, blended_image)

# ==========================
# Video Processing Functionality
# ==========================
def process_output_video(input_video_path: str,
                         output_video_path: str):

    cap = cv2.VideoCapture(input_video_path)

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    out = cv2.VideoWriter(output_video_path,
                          cv2.VideoWriter_fourcc(*'MP4V'),
                          fps,
                          (frame_width,
                           frame_height))

    while cap.isOpened():
        ret, frame = cap.read()

        if ret:
            processed_frame = cv2.GaussianBlur(frame,(5 ,5),0)
            out.write(processed_frame)

            continue

        break

    cap.release()
    out.release()