In [None]:
# Extract frames from the mp4 file and preprocess frames

import cv2
import os

def extract_and_preprocess_frames(video_path, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Convert to binary frames
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        (_, binary_frame) = cv2.threshold(gray_frame, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        cv2.imwrite(os.path.join(output_folder, f"frame_{frame_count:04d}.png"), binary_frame)
        frame_count += 1
    
    cap.release()
    print(f"Extracted and converted {frame_count} frames to binary.")
    return frame_count, fps

video_path = 'animation.mp4'
output_folder = 'binary_frames'
frame_count, fps = extract_and_preprocess_frames(video_path, output_folder)

In [None]:
import numpy as np
import torch

def prepare_dataset(frame_folder):
    frames = sorted([os.path.join(frame_folder, f) for f in os.listdir(frame_folder) if f.endswith('.png')])
    X, y = [], []
    
    for i in range(len(frames) - 1):
        current_frame = cv2.imread(frames[i], cv2.IMREAD_GRAYSCALE)
        next_frame = cv2.imread(frames[i + 1], cv2.IMREAD_GRAYSCALE)
        
        current_frame = np.expand_dims(current_frame, axis=0) / 255.0
        next_frame = np.expand_dims(next_frame, axis=0) / 255.0
        
        X.append(current_frame)
        y.append(next_frame)
    
    X = np.array(X, dtype=np.float32)
    y = np.array(y, dtype=np.float32)
    
    X = torch.tensor(X)
    y = torch.tensor(y)
    
    return X, y

X, y = prepare_dataset(output_folder)

In [None]:
# Define convolutional autoencoder architecture 

import torch.nn as nn
import torch.optim as optim

class PredictNextFrame(nn.Module):
    def __init__(self):
        super(PredictNextFrame, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 4, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(4, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 4, kernel_size=2, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(4, 1, kernel_size=2, stride=2),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = PredictNextFrame().to(device)
print(model)

In [None]:
# Train
from torch.utils.data import DataLoader, TensorDataset

epochs = 100

dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.train()
for epoch in range(epochs):
    average_loss = 0.0
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        average_loss += loss.item()
    
    print(f'Epoch {epoch+1}/{epochs}, Loss: {average_loss/len(dataloader)}')

In [None]:
# Generate frames
model.eval()

generated_frames = [X[0].unsqueeze(0).to(device)]
for i in range(1, frame_count - 1):
    with torch.no_grad():

        next_frame = model(X[i].unsqueeze(0).to(device))
        generated_frames.append(next_frame)

generated_frames = [frame.squeeze(0).cpu() for frame in generated_frames]


In [None]:
# Convert the generated frames back into an MP4 file

def frames_to_video(frames, output_path, fps):
    height, width = frames[0].shape[1:]
    size = (width, height)
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, size, isColor=False)
    
    for frame in frames:
        frame = (frame.numpy().squeeze() * 255).astype(np.uint8)

        # Could convert to binary frame to reduce noises
        # (_, frame) = cv2.threshold(frame, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        out.write(frame)
    out.release()

output_path = 'generated_video.mp4'
frames_to_video(generated_frames, output_path, fps)
print(f"Generated video saved to {output_path}")

In [6]:
# Display generated video
from IPython.display import HTML

def display_video(video_path, width=200, height=200):
    video_tag = f'''
    <video width="{width}" height="{height}" controls>
        <source src="{video_path}" type="video/mp4">
        Your browser does not support the video tag.
    </video>
    '''
    return HTML(video_tag)

# Display the video
display_video('generated_video.mp4')