In [None]:
# !pip install datasets

import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from textwrap import wrap
from datasets import load_dataset



mydata = load_dataset('valhalla/emoji-dataset')
keywords = ['face','christmas','superhero','supervillian','mage','vampire','monkey','elf','juggling',
            'boy','girl','adult','person','man','woman','male','female','worker','scientist','technologist',
            'singer','artist','pilot','astronaut','firefighter','police','sleuth','construction']
# keywords = ['face']


plt.rcParams["figure.figsize"] = [1.5, 1.50]

mydata = load_dataset('valhalla/emoji-dataset')
print(len(mydata['train']))
total_obs = len(mydata['train'])
data = np.zeros([total_obs,3,64,64],dtype='float32')
for i in range(total_obs):
    print(i)
    im = mydata['train'][i]['image']
    title = mydata['train'][i]['text']
    if title.find(keywords[0]) > -1:
        plt.imshow(im)
        print(title)
        plt.show()

        im = im.resize((64,64),Image.LANCZOS)
        title = "lower resolution"
        plt.imshow(im)
        print(title)
        plt.show()
        pix = im.load()
        for x in range(im.size[0]):
            for y in range(im.size[1]):
                for c in range(3):
                    data[i,c,x,y] = float(pix[x,y][c]) / 255.0
                    
# keep only samples that have any nonzero pixel
mask = (data != 0).any(axis=(1,2,3))
data_nz = data[mask]
print(data.shape, "â†’", data_nz.shape)

# Data Augmentation 

In [None]:
def add_gaussian_noise(arr: np.ndarray, sigma=0.02):
    noise = np.random.normal(0.0, sigma, size=arr.shape).astype(np.float32)
    out = arr + noise
    return out.clip(0.0, 1.0)

sigma = 0.02
outs = [data_nz.astype(np.float32, copy=False)]
for _ in range(5):  # +5 noisy copies = 6x total with original
    outs.append(add_gaussian_noise(data_nz, sigma))
augmented_data = np.concatenate(outs, axis=0)
print(augmented_data.shape)

# Step 1

In [None]:
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML

# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)
torch.use_deterministic_algorithms(True) # Needed for reproducible results

In [None]:
# # Root directory for dataset
dataset = torch.utils.data.TensorDataset(torch.from_numpy(augmented_data))  # each item -> (img,)

# Number of workers for dataloader
workers = 2

# Batch size during training
batch_size = 128

# Spatial size of training images. All images will be resized to this
#   size using a transformer.
image_size = 64

# Number of channels in the training images. For color images this is 3
nc = 3

# Size of z latent vector (i.e. size of generator input)
nz = 100

# Size of feature maps in generator
ngf = 64

# Size of feature maps in discriminator
ndf = 64

# Number of training epochs
num_epochs = 100

# Learning rate for optimizers
lr = 0.001

# Beta1 hyperparameter for Adam optimizers
beta1 = 0.5



In [None]:
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=workers)
# Decide which device we want to run on
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Plot some training images
real_batch = next(iter(dataloader))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(2,1,0)))
plt.show()

In [None]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

In [None]:
class AutoEncoder(nn.Module): 
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            # input is ``(nc) x 64 x 64``
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. ``(ndf) x 32 x 32``
            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. ``(ndf*2) x 16 x 16``
            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. ``(ndf*4) x 8 x 8``
            nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            # state size. ``(ndf*8) x 4 x 4``
            nn.Conv2d(ndf * 8, nz, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

        self.decoder = nn.Sequential(
            # input is Z, going into a convolution
            nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            # state size. ``(ngf*8) x 4 x 4``
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            # state size. ``(ngf*4) x 8 x 8``
            nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            # state size. ``(ngf*2) x 16 x 16``
            nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            # state size. ``(ngf) x 32 x 32``
            nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
            # state size. ``(nc) x 64 x 64``
        )
    def forward(self, input):
        return 0.5*(self.decoder(self.encoder(input))+1)

# Create the Discriminator
netAutoEncoder = AutoEncoder().to(device)

# Apply the ``weights_init`` function to randomly initialize all weights
# like this: ``to mean=0, stdev=0.2``.
netAutoEncoder.apply(weights_init)

# Print the model
print(netAutoEncoder)

In [None]:
# Initialize the ``MSELoss`` function
criterion = nn.MSELoss()

optimizer = optim.Adam(netAutoEncoder.parameters(), lr=lr)

In [None]:
# Training Loop

# Lists to keep track of progress
losses = []
iters = 0

print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
    # For each batch in the dataloader
    for i, data in enumerate(dataloader, 0):
        netAutoEncoder.zero_grad()
        
        x = data[0].to(device)
        x_out = netAutoEncoder(x)
        
        err = criterion(x_out, x)
        err.backward()
        optimizer.step()

        # Output training stats
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss: %.4f'
                  % (epoch, num_epochs, i, len(dataloader),
                     err.item()))

        # Save Losses for plotting later
        losses.append(err.item())
        iters += 1

In [None]:
plt.figure(figsize=(10,5))
plt.title("Generator and Discriminator Loss During Training")
plt.plot(losses,label="Train")
plt.xlabel("iterations")
plt.ylabel("Loss")
plt.legend()
plt.show()

In [None]:
# Grab a batch of real images from the dataloader
real_batch = next(iter(dataloader))[0]
out_batch = netAutoEncoder(real_batch)

# Plot the real images
plt.figure(figsize=(15,15))
plt.subplot(1,2,1)
plt.axis("off")
plt.title("Real Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch.cpu()[:64], padding=2, normalize=True).cpu(),(2,1,0)))

# Plot the fake images from the last epoch
plt.subplot(1,2,2)
plt.axis("off")
plt.title("Reconstructed Images")
plt.imshow(np.transpose(vutils.make_grid(out_batch.cpu()[:64], padding=2, normalize=True).cpu(),(2,1,0)))
plt.show()

- describe your dataset and the steps that you used to create it,
- provide a summary of your architecture
- discuss and explain your design choices,
- list hyper-parameters used in the model,
- plot learning curves for training and validation loss as a function of training epochs,
- provide the final average error of your autoencoder on your test set,
- provide a side-by-side example of 5 input and output images, and
- discuss any decisions or observations that you find relevant.

In [None]:
real_batch = next(iter(dataloader))[0]
from ipywidgets import interact, FloatSlider

@interact(
    alpha=FloatSlider(min=-1.0, max=1.0, step=0.01, value=1.0, description='alpha'),
    beta =FloatSlider(min=-1.0, max=1.0, step=0.01, value=1.0, description='beta')
)
def draw(alpha=1,beta=1):
    fig, axes = plt.subplots(1, 4, figsize = (12, 4))
    axes[0].imshow(np.transpose(real_batch[0].cpu(),(2,1,0)))
    axes[0].set_title('Image 0')
    axes[0].axis('off')
    axes[1].imshow(np.transpose(real_batch[1].cpu(),(2,1,0)))
    axes[1].set_title('Image 1')
    axes[1].axis('off')
    axes[2].imshow(np.transpose(real_batch[2].cpu(),(2,1,0)))
    axes[2].set_title('Image 2')
    axes[2].axis('off')
    
    z_0 = netAutoEncoder.encoder(real_batch[0].unsqueeze(0))
    z_1 = netAutoEncoder.encoder(real_batch[1].unsqueeze(0))
    z_2 = netAutoEncoder.encoder(real_batch[2].unsqueeze(0))
    with torch.no_grad():
        z_3 = (z_2 + alpha*z_0 + beta*z_1)/(1+alpha+beta)
        recon_composite = 0.5*(netAutoEncoder.decoder(z_3).squeeze()+1)
        axes[3].imshow(np.transpose(recon_composite,(2,1,0)))
        axes[3].set_title('Image 3')
        axes[3].axis('off')
    plt.show()