## Setup

## Built on top of https://gist.github.com/AFAgarap/4f8a8d8edf352271fa06d85ba0361f26

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
baseurl = '/content/drive/MyDrive/Datalist'

import pickle
import os
## youtube module load from drive folder

import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import TensorDataset, DataLoader

##### IMPORT ALL THUMBNAIL IMAGES FROM YOUTUBE DATASET AND CONVERT TO GRAYSCALE ####

os.chdir('/content/drive/MyDrive/')
## youtube module load from drive folder
import youtube.ytf

gs_thumbnails = []
if os.path.isfile('thumbnails_grayscale_channel.pkl'): # if file exists and is saved, load the file
  with open('thumbnails_grayscale_channel.pkl', 'rb') as handle:
    gs_thumbnails_channel = pickle.load(handle)
    imgsize = (gs_thumbnails_channel[0].shape[1],gs_thumbnails_channel[0].shape[2])


else: # if 'thumbnails_grayscale.pkl' does not exist, pull from raw data and create file of grayscale images and save as 'thumbnails_grayscale.pkl'
  for filename in os.listdir(baseurl):
    f = os.path.join(baseurl, filename)
    if os.path.isfile(f):
      print('Unpacking file : '+baseurl + '/' + filename)
      ytfile = open(f, "rb")
      ytraw = pickle.load(ytfile)
      for vid in ytraw:
        if type(vid.image) is not list:
          gs_thumbnails.append(np.dot(vid.image, [0.299, 0.587, 0.114])) # convert image to grayscale
      ytfile.close()

  # Convert each image to size (1,?,?)

  imgsize = gs_thumbnails[0].shape
  gs_thumbnails_channel = np.array([elem.reshape((1,imgsize[0],imgsize[1])) for elem in gs_thumbnails])
  print(gs_thumbnails_channel[0].shape)  # ensure size is (1,40,80)

  del vid,ytraw,ytfile, gs_thumbnails # delete these files for clearing up RAM

  with open('thumbnails_grayscale_channel.pkl', 'wb') as handle:
    pickle.dump(gs_thumbnails_channel, handle, protocol=pickle.HIGHEST_PROTOCOL)

num_videos = len(gs_thumbnails_channel)
print('Number of video thumbnails = {}'.format(num_videos))

# check if shapes are consistent
assert (gs_thumbnails_channel[0].shape[1],gs_thumbnails_channel[0].shape[2]) == imgsize



Number of video thumbnails = 114724


## Set training parameters, set up dataloader

In [3]:
# Reproducibility

seed = 42
torch.manual_seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

### Learning Parameters

batch_size = 256
epochs = 2000
learning_rate = 1e-4
decay_rate = 0.975 # exponential decay for learning rate


# ## Load Datasets and create train and test data loaders

val_frac = 0.2
indices = list(range(len(gs_thumbnails_channel)))
split = int(len(gs_thumbnails_channel)*val_frac)

np.random.seed(1); np.random.shuffle(indices)
train_indices = indices[:split]
val_indices = indices[split:]


train_dataset = TensorDataset(torch.Tensor(gs_thumbnails_channel[train_indices])) # transform to torch tensor dataset
val_dataset  = TensorDataset(torch.Tensor(gs_thumbnails_channel[val_indices])) # transform to torch tensor dataset

transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])

#train_dataset = torchvision.datasets.MNIST(root="~/torch_datasets", train=True, transform=transform, download=True

# returns sampler and iterable with specified batch_size
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

### Ensure train_dataset is a torch.utils.data.dataset.TensorDataset object
print(type(train_dataset))

#### RAM Preservation
del train_indices, val_indices, split, indices, train_dataset, val_dataset
####

### Ensure elem[0].shape is (batch_size,1,40,80)
count = 0
for elem in val_loader:
  count = count + 1
  if count == 2:
    break
  print(elem[0].shape)

<class 'torch.utils.data.dataset.TensorDataset'>
torch.Size([256, 1, 40, 80])


## Playground for Conv2D and ConvTranspose2D

In [4]:
# import pickle
# import os
# ## youtube module load from drive folder

# import matplotlib.pyplot as plt
# import numpy as np

# import torch
# import torch.nn as nn
# import torch.optim as optim
# import torchvision
# from torch.utils.data import TensorDataset, DataLoader

# x = torch.randn(2,8,20,40)
# conv = nn.Conv2d(8,16,3,padding = 1)
# convt = nn.ConvTranspose2d(16,8,3,padding = 1)
# maxpool = nn.MaxPool2d(kernel_size=2, return_indices=True)
# maxunpool = nn.MaxUnpool2d(kernel_size=2)
# # upsample = nn.UpsamplingNearest2d(size = (10,20))

# print(x.shape)
# y = conv(x)
# print(y.shape)
# y,ind = maxpool(y)
# print(y.shape)
# xrecon = maxunpool(y,ind)
# print(xrecon.shape)
# xrecon = convt(xrecon)
# print(xrecon.shape)

In [5]:
# import pickle
# import os
# ## youtube module load from drive folder

# import matplotlib.pyplot as plt
# import numpy as np

# import torch
# import torch.nn as nn
# import torch.optim as optim
# import torchvision
# from torch.utils.data import TensorDataset, DataLoader

# encoder_conv_layer_1 = nn.Sequential(
#           # Conv_L1 ImgIn shape=(?, 40, 80, 1)
#           # Conv -> (?, 40, 80, 8)
#           # Pool -> (?, 20, 40, 8)
#           nn.Conv2d(1, 8, kernel_size=3, stride=2, padding=1),
#           nn.ReLU()
#           )
# maxpool_1 = nn.MaxPool2d(kernel_size=2, return_indices=True)
# dropout_1 = nn.Dropout(p = 0.2)

# encoder_conv_layer_2 = nn.Sequential(
#           # Conv_L2 ImgIn shape=(?, 20, 40, 8)
#           # Conv -> (?, 20, 40, 16)
#           # Pool -> (?, 10, 20, 16)
#           nn.Conv2d(8, 16, kernel_size=3, stride=2, padding=1),
#           nn.ReLU()
#           )
# maxpool_2 = nn.MaxPool2d(kernel_size=2, padding = 1, return_indices=True)
# dropout_2 = nn.Dropout(p=0.2)

# # encoder_conv_layer_3 = nn.Sequential(
# #           # Conv_L3 ImgIn shape=(?, 10, 20, 16)
# #           # Conv ->(?, 10, 20, 32)
# #           # Pool ->(?, 5, 10, 32)
# #           nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),
# #           nn.ReLU()
# #           )
# # dropout_3 = nn.Dropout(p=0.2)

# flatten = nn.Flatten()

# encoder_hidden_layer_1 = nn.Linear(
#           in_features=288, out_features=96
#       )
# encoder_hidden_layer_2 = nn.Linear(
#           in_features=96, out_features=24
#       )
# encoder_hidden_layer_3 = nn.Linear(
#           in_features=24, out_features=8
#       )
# encoder_output_layer = nn.Linear(
#           in_features=8, out_features=8
#       )
# decoder_hidden_layer_1 = nn.Linear(
#           in_features=8, out_features=24
#       )
# decoder_hidden_layer_2 = nn.Linear(
#           in_features=24, out_features=96
#       )
# decoder_output_layer = nn.Linear(
#           in_features=96, out_features=288
#       )

# unflatten = nn.Unflatten(dim=1, unflattened_size = (16,3,6))

# #deconv layers
# # decoder_conv_layer_1 = nn.Sequential(
# #           nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1), # exactly same as encoder_conv_layer_3
# #           nn.ReLU(),
# #           nn.Dropout(p = 0.2)
# #           )

# maxunpool_1 = nn.MaxUnpool2d(kernel_size=2, padding = 1) 
# decoder_conv_layer_1 = nn.Sequential(
#           # DeConv_L1 ImgIn shape=(?, 20, 40, 16)
#           # Deconv ->(?, 20, 40, 8)
#           nn.ConvTranspose2d(16, 8, kernel_size=3, stride=2, padding=1),
#           nn.BatchNorm2d(8),
#           nn.ReLU(),
#           nn.Dropout(p = 0.2)
#       )

# maxunpool_2 = nn.MaxUnpool2d(kernel_size=2, padding = 1) # dimension = (?,40,80,8)
# decoder_conv_layer_2 = nn.Sequential(
#           # DeConv_L1 ImgIn shape=(?, 20, 40, 8)
#           # Deconv ->(?, 40, 80, 1)
#           nn.ConvTranspose2d(8, 1, kernel_size = 3, stride = 2, padding = 1)
# )


# x = torch.randn(10,1,40,80)
# print(x.shape)
# x = encoder_conv_layer_1(x)
# print(x.shape)
# x,ind1 = maxpool_1(x)
# print(x.shape)
# x = dropout_1(x)
# print(x.shape)
# x = encoder_conv_layer_2(x)
# print(x.shape)
# x,ind2 = maxpool_2(x)
# print(x.shape)
# x = dropout_2(x)
# print(x.shape)
# x = flatten(x)
# print(x.shape)

# x = encoder_hidden_layer_1(x)
# print(x.shape)
# x = encoder_hidden_layer_2(x)
# print(x.shape)
# x = encoder_hidden_layer_3(x)
# print(x.shape)
# x = encoder_output_layer(x)
# x = torch.sigmoid(x)
# print(x.shape)
# x = decoder_hidden_layer_1(x)
# print(x.shape)
# x = decoder_hidden_layer_2(x)
# print(x.shape)
# x = decoder_output_layer(x)
# x = torch.sigmoid(x)


# print(x.shape)
# x = unflatten(x)
# print(x.shape)
# x = maxunpool_1(x,ind2)
# print(x.shape)
# x = decoder_conv_layer_1(x)
# print(x.shape)



#####################################################

# Autoencoder architecture with CNNs
# https://analyticsindiamag.com/how-to-implement-convolutional-autoencoder-in-pytorch-with-cuda/
# https://medium.com/dataseries/convolutional-autoencoder-in-pytorch-on-mnist-dataset-d65145c132ac
# https://towardsdatascience.com/convolutional-autoencoders-for-image-noise-reduction-32fce9fc1763

# class AE_CNN(nn.Module):
#   def __init__(self, **kwargs):
#       super().__init__()
#       # THREE CONV LAYERS
#       self.encoder_conv_layer_1 = nn.Sequential(
#           # Conv_L1 ImgIn shape=(?, 40, 80, 1)
#           # Conv -> (?, 40, 80, 8)
#           # Pool -> (?, 20, 40, 8)
#           nn.Conv2d(1, 8, kernel_size=3, stride=1, padding=1),
#           nn.ReLU()
#           )
#       self.maxpool_1 = nn.MaxPool2d(kernel_size=2, padding = 1, return_indices=True)
#       self.dropout_1 = nn.Dropout(p = 1 - kwargs['keep_prob'])
      
#       self.encoder_conv_layer_2 = nn.Sequential(
#           # Conv_L2 ImgIn shape=(?, 20, 40, 8)
#           # Conv -> (?, 20, 40, 16)
#           # Pool -> (?, 10, 20, 16)
#           nn.Conv2d(8, 16, kernel_size=3, stride=1, padding=1),
#           nn.ReLU()
#           )
#       self.maxpool_2 = nn.MaxPool2d(kernel_size=2, stride=2, padding = 1, return_indices=True)
#       self.dropout_2 = nn.Dropout(p=1 - kwargs['keep_prob'])
      
#       self.encoder_conv_layer_3 = nn.Sequential(
#           # Conv_L3 ImgIn shape=(?, 10, 20, 16)
#           # Conv ->(?, 10, 20, 32)
#           # Pool ->(?, 5, 10, 32)
#           nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
#           nn.ReLU()
#           )
#       self.maxpool_3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=1, return_indices = True)
#       self.dropout_3 = nn.Dropout(p=1 - kwargs['keep_prob'])

#       self.flatten = nn.Flatten()

#       # Fully Connected Encoder and Decoder Layers
#       self.encoder_hidden_layer_1 = nn.Linear(
#           in_features=kwargs['input_shape'][0]*kwargs['input_shape'][1]*32//64, out_features=128
#       )
#       self.encoder_hidden_layer_2 = nn.Linear(
#           in_features=128, out_features=32
#       )
#       self.encoder_hidden_layer_3 = nn.Linear(
#           in_features=32, out_features=8
#       )
#       self.encoder_output_layer = nn.Linear(
#           in_features=8, out_features=8
#       )
#       self.decoder_hidden_layer_1 = nn.Linear(
#           in_features=8, out_features=32
#       )
#       self.decoder_hidden_layer_2 = nn.Linear(
#           in_features=32, out_features=128
#       )
#       self.decoder_output_layer = nn.Linear(
#           in_features=128, out_features=kwargs['input_shape'][0]*kwargs['input_shape'][1]*32//64
#       )

#       # DeConv2D layers
#       #
#       # self.decoder_lin = nn.Sequential(
#       #     nn.Linear(kwargs['input_shape'][0]*kwargs['input_shape'][1]*128//64, 128),
#       #     nn.ReLU(True),
#       #     nn.Linear(128, 3 * 3 * 32),
#       #     nn.ReLU(True)
#       # )

#       # Reshape into last conv layer's output dims
#       self.unflatten = nn.Unflatten(dim=1, 
#       unflattened_size=(kwargs['input_shape'][0]//8,kwargs['input_shape'][1]//8,32) )

#       self.maxunpool_1 = nn.MaxUnpool2d(kernel_size=2, stride = 2, padding = 1) # dimension = (?,10,20,32)
#       self.decoder_conv_layer_1 = nn.Sequential(
#           # DeConv_L1 ImgIn shape=(?, 10, 20, 32)
#           # Deconv ->(?, 10, 20, 16)
#           nn.ConvTranspose2d(32,16, kernel_size = 3, stride=1, padding=1), # exactly same as encodeer_conv_layer_3
#           nn.BatchNorm2d(16),
#           nn.ReLU(),
#           nn.Dropout(p = 1 - kwargs['keep_prob'])
#       )

#       self.maxunpool_2 = nn.MaxUnpool2d(kernel_size=2, stride = 2, padding = 1) # dimension = (?,20,40,16)
#       self.decoder_conv_layer_2 = nn.Sequential(
#           # DeConv_L1 ImgIn shape=(?, 20, 40, 16)
#           # Deconv ->(?, 20, 40, 8)
#           nn.ConvTranspose2d(16, 8, kernel_size=3, stride=1, padding=1),
#           nn.BatchNorm2d(8),
#           nn.ReLU(),
#           nn.Dropout(p = 1 - kwargs['keep_prob'])
#       )

#       self.maxunpool_3 = nn.MaxUnpool2d(kernel_size=2, stride = 2, padding = 1) # dimension = (?,40,80,8)
#       self.decoder_conv_layer_3 = nn.Sequential(
#           # DeConv_L1 ImgIn shape=(?, 20, 40, 8)
#           # Deconv ->(?, 40, 80, 1)
#           nn.ConvTranspose2d(8, 1, kernel_size = 3, stride = 1, padding = 1)
#       )



#   def forward(self, features):
#     # convolution operations
#     activation = self.encoder_conv_layer_1(features)
#     activation,indices_1 = self.maxpool_1(activation)
#     activation = self.dropout_1(activation)

#     activation = self.encoder_conv_layer_2(activation)
#     activation,indices_2 = self.maxpool_2(activation)
#     activation = self.dropout_2(activation)

#     activation = self.encoder_conv_layer_3(activation)
#     activation,indices_3 = self.maxpool_3(activation)
#     activation = self.dropout_3(activation)

#     # fully connected operations
#     activation = self.encoder_hidden_layer_1(activation)
#     activation = torch.relu(activation)
#     activation = self.encoder_hidden_layer_2(activation)
#     activation = torch.relu(activation)
#     activation = self.encoder_hidden_layer_3(activation)
#     activation = torch.relu(activation)
#     code = self.encoder_output_layer(activation)
#     code = torch.sigmoid(code)
#     activation = self.decoder_hidden_layer_1(code)
#     activation = torch.relu(activation)
#     activation = self.decoder_hidden_layer_2(activation)
#     activation = torch.relu(activation)
#     activation = self.decoder_output_layer(activation)
#     activation = torch.sigmoid(activation)


#     # convolution transpose operations
#     activation = self.unflatten(activation)

#     activation = self.maxunpool_1(activation, indices_1)
#     activation = self.decoder_conv_layer_1(activation)

#     activation = self.maxunpool_2(activation, indices_2)
#     activation = self.decoder_conv_layer_2(activation)

#     activation = self.maxunpool_3(activation, indices_3)
#     reconstructed = self.decoder_conv_layer_3(activation)

#     return reconstructed
    
#   # low dimensional embeddings
#   def embeddings(self,features):
#     # convolution operations
#     activation = self.encoder_conv_layer_1(activation)
#     activation,indices_1 = self.maxpool_1(activation)
#     activation = self.dropout_1(activation)

#     activation = self.encoder_conv_layer_2(activation)
#     activation,indices_2 = self.maxpool_2(activation)
#     activation = self.dropout_2(activation)

#     activation = self.encoder_conv_layer_3(activation)
#     activation,indices_3 = self.maxpool_3(activation)
#     activation = self.dropout_3(activation)

#     # fully connected operations
#     activation = self.encoder_hidden_layer_1(activation)
#     activation = torch.relu(activation)
#     activation = self.encoder_hidden_layer_2(activation)
#     activation = torch.relu(activation)
#     activation = self.encoder_hidden_layer_3(activation)
#     activation = torch.relu(activation)
#     code = self.encoder_output_layer(activation)
#     code = torch.sigmoid(code)
#     return code


#####################################################

## Define Autoencoder Architectures, Check CUDA, Create Optimizer Object, Set criterion

In [6]:
# Autoencoder architecture without CNNs
class AE(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        self.encoder_hidden_layer_1 = nn.Linear(
            in_features=kwargs["input_shape"], out_features=512
        )
        self.dropout1 = nn.Dropout(p = kwargs["drop_prob"])
        self.encoder_hidden_layer_2 = nn.Linear(
            in_features=512, out_features=128
        )
        self.dropout2 = nn.Dropout(p = kwargs["drop_prob"])
        self.encoder_hidden_layer_3 = nn.Linear(
            in_features=128, out_features=32
        )
        self.dropout3 = nn.Dropout(p = kwargs["drop_prob"])
        self.encoder_hidden_layer_4 = nn.Linear(
            in_features=32, out_features=16
        )
        self.dropout4 = nn.Dropout(p = kwargs["drop_prob"])
        self.encoder_output_layer = nn.Linear(
            in_features=16, out_features=16 # make change in assert statement after training process
        )
        self.decoder_hidden_layer_1 = nn.Linear(
            in_features=16, out_features=32
        )
        self.decdropout1 = nn.Dropout(p = kwargs["drop_prob"])
        self.decoder_hidden_layer_2 = nn.Linear(
            in_features=32, out_features=128
        )
        self.decdropout2 = nn.Dropout(p = kwargs["drop_prob"])
        self.decoder_hidden_layer_3 = nn.Linear(
            in_features=128, out_features=512
        )
        self.decdropout3 = nn.Dropout(p = kwargs["drop_prob"])
        self.decoder_output_layer = nn.Linear(
            in_features=512, out_features=kwargs["input_shape"]
        )

    def forward(self, features):
        activation = self.encoder_hidden_layer_1(features)
        activation = torch.relu(activation)
        activation = self.dropout1(activation)

        activation = self.encoder_hidden_layer_2(activation)
        activation = torch.relu(activation)
        activation = self.dropout2(activation)

        activation = self.encoder_hidden_layer_3(activation)
        activation = torch.relu(activation)
        activation = self.dropout3(activation)

        activation = self.encoder_hidden_layer_4(activation)
        activation = torch.relu(activation)
        activation = self.dropout4(activation)

        code = self.encoder_output_layer(activation)
        code = torch.sigmoid(code)
        activation = self.decoder_hidden_layer_1(code)
        activation = torch.relu(activation)
        activation = self.decdropout1(activation)

        activation = self.decoder_hidden_layer_2(activation)
        activation = torch.relu(activation)
        activation = self.decdropout2(activation)

        activation = self.decoder_hidden_layer_3(activation)
        activation = torch.relu(activation)
        activation = self.decdropout3(activation)

        activation = self.decoder_output_layer(activation)
        reconstructed = torch.sigmoid(activation)
        return reconstructed
    
    # low dimensional embeddings
    def embeddings(self,features):
      activation = self.encoder_hidden_layer_1(features)
      activation = torch.relu(activation)
      #activation = self.dropout1(activation)

      activation = self.encoder_hidden_layer_2(activation)
      activation = torch.relu(activation)
      #activation = self.dropout2(activation)

      activation = self.encoder_hidden_layer_3(activation)
      activation = torch.relu(activation)
      #activation = self.dropout3(activation)

      activation = self.encoder_hidden_layer_4(activation)
      activation = torch.relu(activation)
      #activation = self.dropout4(activation)

      code = self.encoder_output_layer(activation)
      code = torch.sigmoid(code)
      return code

####################### 

#  use gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# create a model from `AE` autoencoder class
# load it to the specified device, either gpu or cpu

flattened_imgsize = imgsize[0]*imgsize[1]
model = AE(input_shape=flattened_imgsize, drop_prob = 0.15).to(device)

#model = AE(input_shape=(1,40,80),keep_prob = 0.75).to(device) # Auto-encoder with conv encoder-decoder

# create an optimizer object
# Adam optimizer with learning rate 1e-3
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# mean-squared error loss
criterion = nn.MSELoss()

# learning rate scheduler
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma= decay_rate)

# ensure you start and end with 40*80 number of features
print(model.parameters)

<bound method Module.parameters of AE(
  (encoder_hidden_layer_1): Linear(in_features=3200, out_features=512, bias=True)
  (dropout1): Dropout(p=0.15, inplace=False)
  (encoder_hidden_layer_2): Linear(in_features=512, out_features=128, bias=True)
  (dropout2): Dropout(p=0.15, inplace=False)
  (encoder_hidden_layer_3): Linear(in_features=128, out_features=32, bias=True)
  (dropout3): Dropout(p=0.15, inplace=False)
  (encoder_hidden_layer_4): Linear(in_features=32, out_features=16, bias=True)
  (dropout4): Dropout(p=0.15, inplace=False)
  (encoder_output_layer): Linear(in_features=16, out_features=16, bias=True)
  (decoder_hidden_layer_1): Linear(in_features=16, out_features=32, bias=True)
  (decdropout1): Dropout(p=0.15, inplace=False)
  (decoder_hidden_layer_2): Linear(in_features=32, out_features=128, bias=True)
  (decdropout2): Dropout(p=0.15, inplace=False)
  (decoder_hidden_layer_3): Linear(in_features=128, out_features=512, bias=True)
  (decdropout3): Dropout(p=0.15, inplace=False

In [7]:
# # Visualize Neural Network
# !pip install torchviz
# from torchviz import make_dot
# # model = AE_CNN(input_shape=(1,40,80),keep_prob = 0.75).to(device) # Auto-encoder with dropout
# model = AE(input_shape=3200, drop_prob = 0.2).to(device) # Auto-encoder with dense layers only
# make_dot(model(torch.randn(1,3200).to(device)), params=dict(model.named_parameters()), show_attrs=True, show_saved=True)

In [8]:
# class parent(object):
#   def __init__(self,arg1):
#     self.att1 = arg1
#   def shapeofatt(self):
#     return 'warning'

# class child(parent):
#   def __init__(self,arg2):
#     #super().__init__([2,3])
#     self.att2 = arg2
#   def showfirst(self):
#     return self.att2[0]

# childobj = child([1,0])
# childobj.shapeofatt()

In [9]:
#del batch_features, val_features, outputs, voutputs, model, optimizer, criterion, train_loader, val_loader, gs_thumbnails, train_dataset, val_dataset, train_indices, val_indices, indices
import gc 
gc.collect()

12

## Train with respect to set parameters

In [None]:

from copy import deepcopy
best_vloss = 10000.0
model_path = 'best_model_state_dict.pth' # standard naming convention for models


for epoch in range(epochs):
  loss = 0
  for batch_features in train_loader:
    # reshape mini-batch data to [N, 3200] matrix
    # load it to the active device

    # flatten image for input
    batch_features = batch_features[0].view(-1, flattened_imgsize).to(device)
    
    # reset the gradients back to zero
    # PyTorch accumulates gradients on subsequent backward passes
    optimizer.zero_grad()
    
    # compute reconstructions
    outputs = model(batch_features)
    
    # compute training reconstruction loss
    train_loss = criterion(outputs, batch_features)
    
    # compute accumulated gradients
    train_loss.backward()
    
    # perform parameter update based on current gradients
    optimizer.step()
    
    # add the mini-batch training loss to epoch loss
    loss += train_loss.item()
    
  # compute the epoch training loss
  loss = loss/len(train_loader)

  # display the epoch training loss
  if epoch%50 == 0: # print loss after every 20 epochs
    

    vloss_avg = 0.0

    for i, vdata in enumerate(val_loader):
      val_features = vdata[0].view(-1, flattened_imgsize).to(device)
      voutputs = model(val_features)
      vloss = criterion(voutputs, val_features)
      vloss_avg = (vloss_avg*i + vloss)/(i+1)

    print("epoch : {}/{}, recon loss = {:.8f}, val loss = {:.8f}".format(epoch+1, epochs, loss, vloss_avg))

    # Track best performance, and save the model's state
    if vloss_avg < best_vloss:
        best_vloss = vloss_avg
        best_model_state_dict = model.state_dict()
        torch.save(best_model_state_dict, model_path) # save parameters of the model
        #### load up using model = TheModelClass(*args, **kwargs), model.load_state_dict(torch.load(PATH))

print('Training finished. Total epochs: {}'.format(epochs))
##############


# load best model
model.load_state_dict(best_model_state_dict)


# Compute model embeddings - 16 dimensional representation of images
embeddings = []
with torch.no_grad(): # disable gradient accumulation, since model weights are frozen
  for batch_features in train_loader:
    batch_features = batch_features[0].view(-1, flattened_imgsize).to(device)
    # embeddings.append(model.embeddings(batch_features))
    embs = model.embeddings(batch_features)
    for emb in embs:
      embeddings.append(list(emb.cpu().numpy()))
  for batch_features in val_loader:
    batch_features = batch_features[0].view(-1, flattened_imgsize).to(device)
    embs = model.embeddings(batch_features)
    for emb in embs:
      embeddings.append(list(emb.cpu().numpy()))

assert len(embeddings[0]) == 16 and len(embeddings) == num_videos
#assert list(embeddings[0].shape)[1] == 16 # ensure embedding is a low dim quantity

os.chdir('/content/drive/MyDrive/')
# # save model embeddings () and save to drive location
with open('thumbnail_grayscale_embeddings.pkl','wb') as handle:
  pickle.dump(embeddings, handle, protocol=pickle.HIGHEST_PROTOCOL)


# # save model and optimizer state dictionaries
torch.save({'model_state_dict': best_model_state_dict, 'optimizer_state_dict': optimizer.state_dict()}, 'autoencoder_state_dicts.pth')


# # # DELETE ALL VARIABLES
# # del embeddings, batch_features, val_features, outputs, voutputs, criterion, train_loader, val_loader, gs_thumbnails
# del batch_features, val_features, outputs, voutputs, criterion, train_loader, val_loader, gs_thumbnails

epoch : 1/2000, recon loss = 0.08964081, val loss = 0.07951336
epoch : 51/2000, recon loss = 0.04780652, val loss = 0.04799373
epoch : 101/2000, recon loss = 0.04723851, val loss = 0.04760629
epoch : 151/2000, recon loss = 0.04279443, val loss = 0.04320178
epoch : 201/2000, recon loss = 0.04129151, val loss = 0.04159717
epoch : 251/2000, recon loss = 0.04096934, val loss = 0.04124769
epoch : 301/2000, recon loss = 0.04063924, val loss = 0.04085506
epoch : 351/2000, recon loss = 0.04037092, val loss = 0.04084807
epoch : 401/2000, recon loss = 0.04007038, val loss = 0.04035657
epoch : 451/2000, recon loss = 0.03966054, val loss = 0.04006924
epoch : 501/2000, recon loss = 0.03894858, val loss = 0.03922544
epoch : 551/2000, recon loss = 0.03844632, val loss = 0.03865421
epoch : 601/2000, recon loss = 0.03792514, val loss = 0.03833986
epoch : 651/2000, recon loss = 0.03768818, val loss = 0.03811654
epoch : 701/2000, recon loss = 0.03734724, val loss = 0.03770908
epoch : 751/2000, recon loss

## End session

In [None]:
from google.colab import runtime
runtime.unassign()