In [None]:
!pip install keras==3.*

In [None]:
import os

os.environ["KERAS_BACKEND"] = "tensorflow"

import numpy as np
import tensorflow as tf
import keras
from keras import ops
from keras import layers

In [None]:
import matplotlib.pyplot as plt
import os
import glob
import pandas as pd
import random
import numpy as np
import cv2
import base64
import imageio

In [None]:
!pip install kaggle

In [None]:
# Step 2: Upload Kaggle API Key
from google.colab import files
files.upload()

In [None]:
# Move the uploaded kaggle.json to ~/.kaggle
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
# Step 3: Download and Unzip the Dataset
!kaggle datasets download -d jessicali9530/lfw-dataset

In [None]:
!unzip lfw-dataset.zip -d lfw-dataset

In [None]:
import glob
import os
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Paths to the downloaded dataset and attributes
DATASET_PATH = "/content/lfw-dataset/lfw-deepfunneled/lfw-deepfunneled"
#ATTRIBUTES_PATH = "lfw_attributes.txt"

# Load the dataset
dataset = []
for path in glob.iglob(os.path.join(DATASET_PATH, "**", "*.jpg")):
    person = path.split("/")[-2]
    dataset.append({"person": person, "path": path})

dataset = pd.DataFrame(dataset)

In [None]:
# Filter the dataset (too much Bush)
dataset = dataset.groupby("person").filter(lambda x: len(x) < 25)

In [None]:
# Display the first 10 entries
print(dataset.head(10))

In [None]:
# Plot the first 200 persons' counts
dataset.groupby("person").count()[:200].plot(kind='bar', figsize=(20, 5))
plt.show()

In [None]:
plt.figure(figsize=(20,10))
for i in range(20):
    idx = random.randint(0, len(dataset))
    img = plt.imread(dataset.path.iloc[idx])
    plt.subplot(4, 5, i+1)
    plt.imshow(img)
    plt.title(dataset.person.iloc[idx])
    plt.xticks([])
    plt.yticks([])
plt.tight_layout()
plt.show()

In [None]:
!kaggle datasets download -d averkij/lfw-attributes

In [None]:
!unzip lfw-attributes.zip

In [None]:
ATTRIBUTES_PATH = "/content/lfw_attributes.txt"

In [None]:
# Import required libraries
import os
import pandas as pd
import imageio
import numpy as np
from PIL import Image

# Define the function to fetch and preprocess the dataset
def fetch_dataset(dx=80, dy=80, dimx=64, dimy=64):
    # Read the attributes file
    df_attrs = pd.read_csv(ATTRIBUTES_PATH, sep='\t', skiprows=1)
    df_attrs = pd.DataFrame(df_attrs.iloc[:, :-1].values, columns=df_attrs.columns[1:])

    # List to hold photo details
    photo_ids = []
    for dirpath, dirnames, filenames in os.walk(DATASET_PATH):
        for fname in filenames:
            if fname.endswith(".jpg"):
                fpath = os.path.join(dirpath, fname)
                photo_id = fname[:-4].replace('_', ' ').split()
                person_id = ' '.join(photo_id[:-1])
                photo_number = int(photo_id[-1])
                photo_ids.append({'person': person_id, 'imagenum': photo_number, 'photo_path': fpath})

    # Create a DataFrame from the photo details
    photo_ids = pd.DataFrame(photo_ids)
    df = pd.merge(df_attrs, photo_ids, on=('person', 'imagenum'))

    # Ensure no data is lost during merge
    assert len(df) == len(df_attrs), "lost some data when merging dataframes"

    # Preprocess images
    all_photos = df['photo_path'].apply(imageio.imread)\
                                 .apply(lambda img: img[dy:-dy, dx:-dx])\
                                 .apply(lambda img: np.array(Image.fromarray(img).resize([dimx, dimy])))

    # Stack all photos into a numpy array
    all_photos = np.stack(all_photos.values).astype('uint8')
    # Drop unnecessary columns from attributes
    all_attrs = df.drop(["photo_path", "person", "imagenum"], axis=1)

    return all_photos, all_attrs




In [None]:
data, attrs = fetch_dataset()

In [None]:
data.shape

In [None]:
#normalization of dataset
data = np.array(data / 255, dtype='float32')

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt

# Check TensorFlow version
print(tf.__version__)


# Model Architecture

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Assuming 'images' is your numpy array of shape (500, 64, 64, 3)
# Normalize the images to the range [0, 1]
#images = images.astype('float32') / 255.

# VAE parameters
input_shape = (64, 64, 3)
latent_dim = 128
batch_size = 64

# Encoder
encoder_inputs = keras.Input(shape=input_shape)
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16 * latent_dim, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)

class Sampling(layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = Sampling()([z_mean, z_log_var])

encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")

# Decoder
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(16 * 16 * 64, activation="relu")(latent_inputs)
x = layers.Reshape((16, 16, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(3, 3, activation="sigmoid", padding="same")(x)

decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

# VAE model
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(name="reconstruction_loss")
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [self.total_loss_tracker, self.reconstruction_loss_tracker, self.kl_loss_tracker]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

vae = VAE(encoder, decoder)


# training and saving the weights after every epoch

In [None]:
checkpoint_path = "training_1/cp.weights.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)
# Compile and train
vae.compile(optimizer=keras.optimizers.Adam())
vae.fit(data, epochs=100, batch_size=batch_size, callbacks=[cp_callback])

In [None]:
#function for random sampling from normal distribution
def generate_faces(num_samples):
    z_sample = np.random.normal(size=(num_samples, latent_dim))
    return vae.decoder.predict(z_sample)

In [None]:
# Function to display generated images
def display_generated_images(generated_images, n=10):
    plt.figure(figsize=(20, 4))
    for i in range(n):
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(generated_images[i])
        plt.title("Generated")
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    plt.show()

In [None]:
# Generate and display new faces
new_faces = generate_faces(10)
display_generated_images(new_faces)