# Spatio-Temporal GANs for Video Generation

This notebook implements a Spatio-Temporal GAN using TensorFlow and Keras.

In [None]:

# Install TensorFlow if not already installed
!pip install tensorflow
    

## Data Preparation

In [None]:

import numpy as np
import tensorflow as tf

# Load preprocessed video data
# Shape: (num_videos, num_frames, height, width, channels)
video_data = np.load('video_data.npy')  # Replace with your dataset path

# Normalize pixel values to [-1, 1] for GANs
video_data = (video_data.astype('float32') - 127.5) / 127.5
    

## Generator Model

In [None]:

from tensorflow.keras.layers import Input, ConvLSTM2D, Conv3D, BatchNormalization, Activation, UpSampling3D
from tensorflow.keras.models import Model

def build_generator(noise_dim, num_frames, height, width, channels):
    input_noise = Input(shape=(noise_dim,))
    
    # Expand noise to a shape suitable for 3D convolution
    x = tf.keras.layers.Dense(8 * 8 * 8 * 128, activation='relu')(input_noise)
    x = tf.keras.layers.Reshape((8, 8, 8, 128))(x)  # Initial spatial dimensions (adjustable)

    # Temporal dimension is added progressively
    x = UpSampling3D(size=(2, 2, 2))(x)  # Expand spatially and temporally
    x = Conv3D(128, kernel_size=3, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # Add ConvLSTM for spatio-temporal modeling
    x = ConvLSTM2D(64, kernel_size=3, padding='same', return_sequences=True)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # Output layer
    x = Conv3D(channels, kernel_size=3, padding='same', activation='tanh')(x)

    model = Model(input_noise, x)
    return model

# Hyperparameters
noise_dim = 100
num_frames = 16
height, width, channels = 64, 64, 3

generator = build_generator(noise_dim, num_frames, height, width, channels)
generator.summary()
    

## Discriminator Model

In [None]:

from tensorflow.keras.layers import Flatten, Dense, LeakyReLU

def build_discriminator(num_frames, height, width, channels):
    input_video = Input(shape=(num_frames, height, width, channels))
    
    # 3D Convolutional layers
    x = Conv3D(64, kernel_size=3, strides=2, padding='same')(input_video)
    x = LeakyReLU(alpha=0.2)(x)
    
    x = Conv3D(128, kernel_size=3, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)
    
    x = Conv3D(256, kernel_size=3, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)

    # Classification output
    x = Flatten()(x)
    x = Dense(1, activation='sigmoid')(x)

    model = Model(input_video, x)
    return model

discriminator = build_discriminator(num_frames, height, width, channels)
discriminator.summary()
    

## Loss Functions

In [None]:

cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    return real_loss + fake_loss

def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)
    

## Training Loop

In [None]:

import tensorflow.keras.optimizers as optimizers

# Optimizers
generator_optimizer = optimizers.Adam(learning_rate=0.0002, beta_1=0.5)
discriminator_optimizer = optimizers.Adam(learning_rate=0.0002, beta_1=0.5)

# Training function
@tf.function
def train_step(videos):
    noise = tf.random.normal([videos.shape[0], noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_videos = generator(noise, training=True)
        
        real_output = discriminator(videos, training=True)
        fake_output = discriminator(generated_videos, training=True)
        
        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)
    
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
    
    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
    
    return gen_loss, disc_loss

# Training loop
def train(dataset, epochs):
    for epoch in range(epochs):
        for batch in dataset:
            gen_loss, disc_loss = train_step(batch)
        
        print(f'Epoch {epoch+1}, Generator Loss: {gen_loss}, Discriminator Loss: {disc_loss}')

# Prepare dataset
batch_size = 16
dataset = tf.data.Dataset.from_tensor_slices(video_data).shuffle(1000).batch(batch_size)

# Train
train(dataset, epochs=50)
    

## Generate and Save Videos

In [None]:

import matplotlib.pyplot as plt

def generate_and_save_videos(generator, num_videos, noise_dim):
    noise = tf.random.normal([num_videos, noise_dim])
    generated_videos = generator(noise, training=False)
    generated_videos = (generated_videos + 1) / 2  # Rescale to [0, 1]
    
    for i, video in enumerate(generated_videos):
        plt.figure(figsize=(10, 2))
        for t, frame in enumerate(video):
            plt.subplot(1, video.shape[0], t + 1)
            plt.imshow(frame)
            plt.axis('off')
        plt.show()

generate_and_save_videos(generator, num_videos=3, noise_dim=noise_dim)
    