## Loading the library

In [2]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os

# TensorFlow and Keras imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Conv2DTranspose, Flatten, Reshape, Input
from tensorflow.keras import layers as L
from tensorflow.keras.optimizers import Adam

In [13]:
# !pip install opencv-python

## Initializing paramaters

In [10]:
IMG_SHAPE = (64, 64, 3)
CODE_SIZE = 100  

In [13]:
class GAN:
    # Constructor to initialize key parameters and attributes
    def __init__(self, img_shape, code_size):
        self.global_epoch = 0  # Tracks the total number of training epochs
        self.IMG_SHAPE = img_shape  # Shape of input images
        self.CODE_SIZE = code_size  # Dimension of the latent vector (noise input)
        self.data = None  # Placeholder for processed data
        self.generator = None  # Placeholder for generator model
        self.discriminator = None  # Placeholder for discriminator model

    # Function to extract and preprocess frames from a video file
    def get_data(self, video_file='data/video.avi', frame_skip=5):
        print('Generating data from video.')
        vidcap = cv2.VideoCapture(video_file)  # Open the video file
        success, image = vidcap.read()  # Read the first frame
        count = 0
        data = []

        # Loop through frames in the video
        while success:
            y_dim, x_dim = image.shape[:2]  # Get dimensions of the frame
            dim_diff = x_dim - y_dim  # Calculate the difference for cropping square
            x_start = np.random.randint(dim_diff)  # Random start position for cropping
            image = image[:, x_start:x_start + y_dim, :]  # Crop the image to square
            image = cv2.resize(image, self.IMG_SHAPE[:2])  # Resize to target dimensions
            image = image[:, :, ::-1]  # Convert BGR to RGB

            if count % frame_skip == 0:  # Save frames at specified intervals
                data.append(image)
            count += 1
            success, image = vidcap.read()  # Read the next frame

        # Normalize data to the range [0, 1] and store it
        self.data = np.asarray(data) / 255.0
        print(f'Data prepared. {len(data)} frames generated.')

    # Function to define the generator model
    def create_generator(self):
        self.generator = Sequential([
            L.Input(shape=(self.CODE_SIZE,)),  # Input layer with latent vector
            L.Dense(16 * 16 * 128, activation='elu'),  # Fully connected layer
            L.Reshape((16, 16, 128)),  # Reshape output to image-like dimensions
            L.Conv2DTranspose(128, kernel_size=(5, 5), strides=2, padding='same', activation='elu'),  # Upsample with Conv2DTranspose
            L.Conv2DTranspose(64, kernel_size=(5, 5), strides=2, padding='same', activation='elu'),  # Another upsampling layer
            L.Conv2D(3, kernel_size=(7, 7), padding='same', activation='sigmoid')  # Output RGB image with pixel values in [0, 1]
        ])
        print('Generator created successfully.')

    # Function to define the discriminator model
    def create_discriminator(self):
        self.discriminator = Sequential([
            L.Input(shape=self.IMG_SHAPE),  # Input layer with image shape
            L.Conv2D(64, kernel_size=(5, 5), strides=2, padding='same', activation='elu'),  # Downsample with Conv2D
            L.Conv2D(128, kernel_size=(5, 5), strides=2, padding='same', activation='elu'),  # Another downsampling layer
            L.Flatten(),  # Flatten the output for the fully connected layer
            L.Dense(1, activation='sigmoid')  # Single output neuron for binary classification
        ])
        print('Discriminator created successfully.')

    # Compile the discriminator model with loss and optimizer
    def compile_models(self):
        self.discriminator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        print('Models compiled successfully.')

    # Function to sample a batch of random noise vectors
    def sample_noise_batch(self, bsize):
        return np.random.normal(size=(bsize, self.CODE_SIZE)).astype('float32')

    # Function to sample a batch of real images from the dataset
    def sample_data_batch(self, bsize):
        idxs = np.random.choice(np.arange(self.data.shape[0]), size=bsize)  # Randomly sample indices
        return self.data[idxs]

    # Function to generate and save a grid of images for visualization
    def sample_images(self, nrow, ncol, epoch, sharp=False):
        images = self.generator.predict(self.sample_noise_batch(bsize=nrow * ncol))  # Generate images
        images = np.clip(images, 0, 1)  # Ensure pixel values are in [0, 1]

        # Plot and save the generated images
        for i in range(nrow * ncol):
            plt.subplot(nrow, ncol, i + 1)
            plt.imshow(images[i].reshape(self.IMG_SHAPE), cmap="gray", interpolation="kaiser")
            plt.xticks([])
            plt.yticks([])
        plt.suptitle('Epochs: ' + str(epoch))

        sample_dir = 'output'  # Directory to save images
        os.makedirs(sample_dir, exist_ok=True)
        plt.savefig(os.path.join(sample_dir, f'render_epochs_{epoch}.png'), dpi=200)
        plt.close()

    # Training function for GAN
    def train(self, epochs, batch_size, discriminator_steps=5, generator_steps=1):
        for epoch in range(epochs):
            # Sample real images and noise for this epoch
            real_batch = self.sample_data_batch(batch_size)
            noise_batch = self.sample_noise_batch(batch_size)

            # Train discriminator on real and fake data
            for _ in range(discriminator_steps):
                d_loss_real = self.discriminator.train_on_batch(real_batch, np.ones((batch_size, 1)))  # Real images labeled as 1
                generated_images = self.generator.predict(noise_batch)
                d_loss_fake = self.discriminator.train_on_batch(generated_images, np.zeros((batch_size, 1)))  # Fake images labeled as 0

            # Train generator to fool the discriminator
            g_loss = self.discriminator.train_on_batch(self.generator.predict(noise_batch), np.ones((batch_size, 1)))  # Fake images labeled as real (1)

            # Save generated images and print losses at intervals
            if epoch % 100 == 0:
                self.sample_images(4, 5, self.global_epoch, sharp=True)
                print('Images successfully generated.')

            if epoch % 1 == 0:
                print(f'Epoch: {self.global_epoch}, Discriminator loss: {d_loss_real[0]:.4f}, Generator loss: {g_loss[0]:.4f}')

            self.global_epoch += 1  # Increment the epoch counter

            # Plot and save the loss
            plt.figure(figsize=(10, 5))
            plt.title("Generator and Discriminator Loss During Training")
            plt.xlabel("Epochs")
            plt.ylabel("Loss")
            plt.legend()
            plt.savefig("output/gan_loss_plot.png")
            plt.show()


In [None]:
gan = GAN(IMG_SHAPE, CODE_SIZE)
gan.get_data(video_file='data/video.avi', frame_skip=5)
gan.create_generator()
gan.create_discriminator()
gan.compile_models() 
gan.train(epochs=10000, batch_size=100, discriminator_steps=5, generator_steps=1)


Generating data from video.
Data prepared. 6630 frames generated.
Generator created successfully.
Discriminator created successfully.
Models compiled successfully.
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Images successfully generated.
Epoch: 0, Discriminator loss: 1.2811, Generator loss: 1.2280
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m4/4[0m [32m━━━━━━━