# Cognitv

In [None]:
from google.colab import drive

drive.mount("/content/gdrive")



please put image files path: /content/gdrive/MyDrive/cognitv/48
and "/content/gdrive/MyDrive/cognitv/255

In [None]:
files_path = "/content/gdrive/MyDrive/cognitv"

In [None]:
!pip install torchmetrics

In [None]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import os
from skimage import io
from PIL import Image
from torchvision.transforms import Resize, Compose, ToTensor, Normalize
import numpy as np
import skimage
import matplotlib.pyplot as plt
import glob
import time
from torchmetrics import PeakSignalNoiseRatio



In [None]:
torch.cuda.is_available()

# Cognitv solution

Overview:

Neural networks can be used for signal representation, in many domains. These approaches have several advantages over other traditional lossy methods. Examples of this concept are presented by Bricman et al. for representing single images, and similarly by Sitzmann et al. using sinusoidal activation functions for this task.


* [The "Naive" Solution](#naive_solution)
* [Probing the "Naive" implicit function](#probing_solution)
* [Improving the solution - A fully connected Sinus Activation Module with entity embedding for different images](#improving_solution)
* [Interpolation and Similarity measures for representations](#interpolation )


<a name="naive_solution"></a>
# Naive Solution

In [None]:
def get_mgrid(sidelen, dim=2):
    '''Generates a flattened grid of (x,y,...) coordinates in a range of -1 to 1.
    sidelen: int
    dim: int'''
    tensors = tuple(dim * [torch.linspace(-1, 1, steps=sidelen)])
    mgrid = torch.stack(torch.meshgrid(*tensors), dim=-1)
    mgrid = mgrid.reshape(-1, dim)
    return mgrid

In [None]:
class SineLayer(nn.Module):
    # See paper sec. 3.2, final paragraph, and supplement Sec. 1.5 for discussion of omega_0.
    
    # If is_first=True, omega_0 is a frequency factor which simply multiplies the activations before the 
    # nonlinearity. Different signals may require different omega_0 in the first layer - this is a 
    # hyperparameter.
    
    # If is_first=False, then the weights will be divided by omega_0 so as to keep the magnitude of 
    # activations constant, but boost gradients to the weight matrix (see supplement Sec. 1.5)
    
    def __init__(self, in_features, out_features, bias=True,
                 is_first=False, omega_0=30):
        super().__init__()
        self.omega_0 = omega_0
        self.is_first = is_first
        
        self.in_features = in_features
        self.linear = nn.Linear(in_features, out_features, bias=bias)
        
        self.init_weights()
    
    def init_weights(self):
        with torch.no_grad():
            if self.is_first:
                self.linear.weight.uniform_(-1 / self.in_features, 
                                             1 / self.in_features)      
            else:
                self.linear.weight.uniform_(-np.sqrt(6 / self.in_features) / self.omega_0, 
                                             np.sqrt(6 / self.in_features) / self.omega_0)
        
    def forward(self, input):
        return torch.sin(self.omega_0 * self.linear(input))
    
    def forward_with_intermediate(self, input): 
        # For visualization of activation distributions
        intermediate = self.omega_0 * self.linear(input)
        return torch.sin(intermediate), intermediate
    
    
class Siren(nn.Module):
    def __init__(self, in_features, hidden_features, hidden_layers, out_features, outermost_linear=False, 
                 first_omega_0=30, hidden_omega_0=30.):
        super().__init__()
        
        self.net = []
        self.net.append(SineLayer(in_features, hidden_features, 
                                  is_first=True, omega_0=first_omega_0))

        for i in range(hidden_layers):
            self.net.append(SineLayer(hidden_features, hidden_features, 
                                      is_first=False, omega_0=hidden_omega_0))

        if outermost_linear:
            final_linear = nn.Linear(hidden_features, out_features)
            
            with torch.no_grad():
                final_linear.weight.uniform_(-np.sqrt(6 / hidden_features) / hidden_omega_0, 
                                              np.sqrt(6 / hidden_features) / hidden_omega_0)
                
            self.net.append(final_linear)
        else:
            self.net.append(SineLayer(hidden_features, out_features, 
                                      is_first=False, omega_0=hidden_omega_0))
        
        self.net = nn.Sequential(*self.net)
    
    def forward(self, coords):
        coords = coords.clone().detach().requires_grad_(True) # allows to take derivative w.r.t. input
        output = self.net(coords)
        return output, coords        

    def forward_with_activations(self, coords, retain_grad=False):
        '''Returns not only model output, but also intermediate activations.
        Only used for visualizing activations later!'''
        activations = OrderedDict()

        activation_count = 0
        x = coords.clone().detach().requires_grad_(True)
        activations['input'] = x
        for i, layer in enumerate(self.net):
            if isinstance(layer, SineLayer):
                x, intermed = layer.forward_with_intermediate(x)
                
                if retain_grad:
                    x.retain_grad()
                    intermed.retain_grad()
                    
                activations['_'.join((str(layer.__class__), "%d" % activation_count))] = intermed
                activation_count += 1
            else: 
                x = layer(x)
                
                if retain_grad:
                    x.retain_grad()
                    
            activations['_'.join((str(layer.__class__), "%d" % activation_count))] = x
            activation_count += 1

        return activations

In [None]:
def laplace(y, x):
    grad = gradient(y, x)
    return divergence(grad, x)


def divergence(y, x):
    div = 0.
    for i in range(y.shape[-1]):
        div += torch.autograd.grad(y[..., i], x, torch.ones_like(y[..., i]), create_graph=True)[0][..., i:i+1]
    return div


def gradient(y, x, grad_outputs=None):
    if grad_outputs is None:
        grad_outputs = torch.ones_like(y)
    grad = torch.autograd.grad(y, [x], grad_outputs=grad_outputs, create_graph=True)[0]
    return grad

## Combining all small 48 images to one big image

In [None]:
def get_concat_h(im1, im2):
    dst = Image.new('RGB', (im1.width + im2.width, im1.height))
    dst.paste(im1, (0, 0))
    dst.paste(im2, (im1.width, 0))
    return dst

def get_concat_v(im1, im2):
    dst = Image.new('RGB', (im1.width, im1.height + im2.height))
    dst.paste(im1, (0, 0))
    dst.paste(im2, (0, im1.height))
    return dst


In [None]:
#big_img = np.array([0,0,4])
path =  files_path + "/48/*.*"
vertical_images = []
for idx,file in enumerate(glob.glob(path)):
   new_image = Image.fromarray(io.imread(file))
   if idx == 0:
      big_img = new_image
      continue
   elif idx> 0 and idx %10 == 0:
     vertical_images.append(big_img)
     #big_img = np.array([0,0,4])
     big_img = new_image
     continue
   big_img = get_concat_h(big_img, new_image)
vertical_images.append(big_img)

for idx, vertical_image in enumerate(vertical_images):
  print("idx", idx)
  if idx == 0:
    big_img = vertical_image
    continue
  # if idx == 5:
  #   break
  print("big_img", big_img)
  print("vertical_image", vertical_image)
  big_img = get_concat_v(big_img, vertical_image)


In [None]:
big_img

## Train on the bigger image

In [None]:
def image_tensor(img, sidelength):
    print(img.size)
    transform = Compose([
        Resize(sidelength),
        ToTensor(),
        Normalize(torch.Tensor([0.5]), torch.Tensor([0.5]))
    ])
    
    img = transform(img)
    return img



In [None]:
class ImageFitting(Dataset):
    def __init__(self, img, img_size):
        super().__init__()
        img = image_tensor(img,img_size)
        print(img.shape)
        self.pixels = img.permute(1, 2, 0).view(-1, 3)
        self.coords = get_mgrid(img_size, 2)

    def __len__(self):
        return 1

    def __getitem__(self, idx):    
        if idx > 0: raise IndexError
            
        return self.coords, self.pixels

In [None]:
all_images = ImageFitting(big_img, 480)
dataloader = DataLoader(all_images, batch_size=1, pin_memory=True, num_workers=0)

# small architecture to handle bigger picture
img_siren = Siren(in_features=2, out_features=3, hidden_features=64, 
                  hidden_layers=3, outermost_linear=True)
#TBD: GPU
img_siren.cuda() 

In [None]:
total_steps = 1000 # Since the whole image is our dataset, this just means 808 gradient descent steps.
steps_til_summary = 10

optim = torch.optim.Adam(lr=1e-4, params=img_siren.parameters())

model_input, ground_truth = next(iter(dataloader))
#TBD: GPU
model_input, ground_truth = model_input.cuda(), ground_truth.cuda()
#model_input, ground_truth = model_input, ground_truth

for step in range(total_steps):
    model_output, coords = img_siren(model_input)    
    # print("model_output.shape: " , model_output.shape)

    # print("ground_truch: ", ground_truth)
    # print("ground_truch.shape: ", ground_truth.shape)

    loss = ((model_output - ground_truth)**2).mean()
    
    if not step % steps_til_summary:
        print("Step %d, Total loss %0.6f" % (step, loss))
        # img_grad = gradient(model_output, coords)
        # img_laplacian = laplace(model_output, coords)

        fig, axes = plt.subplots(1,1, figsize=(18,6))
        axes.imshow(model_output.cpu().view(480,480,3).detach().numpy())
        # axes[1].imshow(img_grad.norm(dim=-1).cpu().view(480,480).detach().numpy())
        # axes[2].imshow(img_laplacian.cpu().view(480,480).detach().numpy())
        plt.show()

    optim.zero_grad()
    loss.backward()
    optim.step()

<a name="probing_solution"></a>
# Probing the "Naive" implicit function


### Upscaling

In [None]:
def get_specific_coords(input_coords, horizontal , vertical, original_size, img_num = 0):
  specific_coords = []

  vertical_offset = int(( (img_num//10)) * (original_size*vertical) )
  print("vertical_offset", vertical_offset)

  img_num_offset = img_num * horizontal + vertical_offset
  for vertical_coords in range(vertical):    
    specific_coords.append( input_coords[img_num_offset + vertical_coords*original_size : img_num_offset + vertical_coords*original_size + horizontal])    

  return torch.stack(specific_coords, 0)

In [None]:
NUM_OF_IMAGES_ON_AX = 10
def get_img_cords(img_num, new_img_size,new_size_coords):
  return get_specific_coords(new_size_coords,new_img_size,new_img_size,new_img_size*NUM_OF_IMAGES_ON_AX,img_num)


In [None]:
def up_sample_img(img_num,new_size):
  new_size_coords = get_mgrid(new_size*10, 2)
  #TBD: GPU
  img_coords = get_img_cords(img_num,new_size,new_size_coords).cuda()
  super_size_img1, _ = img_siren(img_coords)
  return super_size_img1

In [None]:
super_size_img1 = up_sample_img(85,255)
fig, axes = plt.subplots(1,1, figsize=(18,6))
axes.imshow(super_size_img1.cpu().view(255,255,3).detach().numpy())
plt.show()

In [None]:
super_size_img1 = up_sample_img(32,255)
fig, axes = plt.subplots(1,1, figsize=(18,6))
axes.imshow(super_size_img1.cpu().view(255,255,3).detach().numpy())
plt.show()

In [None]:
super_size_img1 = up_sample_img(0,255)
fig, axes = plt.subplots(1,1, figsize=(18,6))
axes.imshow(super_size_img1.cpu().view(255,255,3).detach().numpy())
plt.show()

results looks ugly, but remember this is only a baseline

### Now that we have this simple baseline model we can try a more serious solution

<a name="improving_solution"></a>
# Improving the solution - A fully connected Sinus Activation Module with entity embedding for different images

In [None]:
def get_mgrid(sidelen, dim=2):
    '''Generates a flattened grid of (x,y,...) coordinates in a range of -1 to 1.
    sidelen: int
    dim: int'''
    tensors = tuple(dim * [torch.linspace(-1, 1, steps=sidelen)])
    mgrid = torch.stack(torch.meshgrid(*tensors), dim=-1)
    mgrid = mgrid.reshape(-1, dim)
    return mgrid

In [None]:
class SineLayer(nn.Module):
    # See paper sec. 3.2, final paragraph, and supplement Sec. 1.5 for discussion of omega_0.
    
    # If is_first=True, omega_0 is a frequency factor which simply multiplies the activations before the 
    # nonlinearity. Different signals may require different omega_0 in the first layer - this is a 
    # hyperparameter.
    
    # If is_first=False, then the weights will be divided by omega_0 so as to keep the magnitude of 
    # activations constant, but boost gradients to the weight matrix (see supplement Sec. 1.5)
    
    def __init__(self, in_features, out_features, bias=True,
                 is_first=False, omega_0=30):
        super().__init__()
        self.omega_0 = omega_0
        self.is_first = is_first
        
        self.in_features = in_features

        self.linear = nn.Linear(in_features, out_features, bias=bias)
        

        self.init_weights()
    
    def init_weights(self):
        with torch.no_grad():
            if self.is_first:
                self.linear.weight.uniform_(-1 / self.in_features, 
                                             1 / self.in_features)      
            else:
                self.linear.weight.uniform_(-np.sqrt(6 / self.in_features) / self.omega_0, 
                                             np.sqrt(6 / self.in_features) / self.omega_0)
        
    def forward(self, input):
        return torch.sin(self.omega_0 * self.linear(input))
    
    def forward_with_intermediate(self, input): 
        # For visualization of activation distributions
        intermediate = self.omega_0 * self.linear(input)
        return torch.sin(intermediate), intermediate
    
    
class Img_Representation(nn.Module):
    def __init__(self, coords_features, img_num_embedding_size, hidden_features, hidden_layers, out_features, outermost_linear=False, 
                 first_omega_0=30, hidden_omega_0=30.):
        super().__init__()
        
        #embedding layer for the image number
        self.image_embedding = nn.Embedding(100, img_num_embedding_size)

        self.net = []
        self.net.append(SineLayer(coords_features + img_num_embedding_size, hidden_features, 
                                  is_first=True, omega_0=first_omega_0))

        for i in range(hidden_layers):
            self.net.append(SineLayer(hidden_features, hidden_features, 
                                      is_first=False, omega_0=hidden_omega_0))

        if outermost_linear:
            final_linear = nn.Linear(hidden_features, out_features)
            
            with torch.no_grad():
                final_linear.weight.uniform_(-np.sqrt(6 / hidden_features) / hidden_omega_0, 
                                              np.sqrt(6 / hidden_features) / hidden_omega_0)
                
            self.net.append(final_linear)
        else:
            self.net.append(SineLayer(hidden_features, out_features, 
                                      is_first=False, omega_0=hidden_omega_0))
        
        self.net = nn.Sequential(*self.net)
    
    def forward(self,  coords_and_imagenum):
#        print("len(coords_and_imagenum): " , len(coords_and_imagenum) )

#        print("coords_and_imagenum[0].shape: " , coords_and_imagenum[0].shape )
        #print("coords_and_imagenum[1].shape: " , coords_and_imagenum[1].shape )

#        print("before cutting: ", coords_and_imagenum.shape )
        coords = coords_and_imagenum[:,:,1:3]

#        print("coords after cutting, shape: " , coords.shape)

        img_num = coords_and_imagenum[:,:,0]
        print("coords_and_imagenum after cutting, shape: " , img_num.shape)

        coords = coords.clone().detach().requires_grad_(True) # allows to take derivative w.r.t. input
        output = self.net(coords)
        return output, coords        
#        return None, None

    def forward_with_activations(self, coords_and_imagenum, retain_grad=False):
        '''Returns not only model output, but also intermediate activations.
        Only used for visualizing activations later!'''
        activations = OrderedDict()

        print("coords_and_imagenum.shape: ", coords_and_imagenum.shape)
        coords = coords_and_imagenum[:,1:3]
        img_num = coords_and_imagenum[:,0]

        activation_count = 0
        x = coords.clone().detach().requires_grad_(True)
        
        img_embedded = self.image_embedding(img_num)
        activations['input'] = torch.cat((img_embedded,x), 0)

        for i, layer in enumerate(self.net):
            if isinstance(layer, SineLayer):
                x, intermed = layer.forward_with_intermediate(x)
                
                if retain_grad:
                    x.retain_grad()
                    intermed.retain_grad()
                    
                activations['_'.join((str(layer.__class__), "%d" % activation_count))] = intermed
                activation_count += 1
            else: 
                x = layer(x)
                
                if retain_grad:
                    x.retain_grad()
                    
            activations['_'.join((str(layer.__class__), "%d" % activation_count))] = x
            activation_count += 1

        return activations

In [None]:
class Img_Representation(nn.Module):
    def __init__(self, coords_features, img_num_embedding_size, hidden_features, hidden_layers, out_features, outermost_linear=False, 
                 first_omega_0=30, hidden_omega_0=30.):
        super().__init__()
        
        #embedding layer for the image number
        self.image_embedding = nn.Embedding(100, img_num_embedding_size)

        self.net = []
        self.net.append(SineLayer(coords_features + img_num_embedding_size, hidden_features, 
                                  is_first=True, omega_0=first_omega_0))

        for i in range(hidden_layers):
            self.net.append(SineLayer(hidden_features, hidden_features, 
                                      is_first=False, omega_0=hidden_omega_0))

        if outermost_linear:
            final_linear = nn.Linear(hidden_features, out_features)
            
            with torch.no_grad():
                final_linear.weight.uniform_(-np.sqrt(6 / hidden_features) / hidden_omega_0, 
                                              np.sqrt(6 / hidden_features) / hidden_omega_0)
                
            self.net.append(final_linear)
        else:
            self.net.append(SineLayer(hidden_features, out_features, 
                                      is_first=False, omega_0=hidden_omega_0))
        
        self.net = nn.Sequential(*self.net)
    
    def forward(self, coords_and_imagenum):

        coords = coords_and_imagenum[:,:,1:3]
        img_num = coords_and_imagenum[:,:,0].long()

        img_embedded = self.image_embedding(img_num)

        coords_and_img_num = torch.cat((img_embedded,coords), 2)
        
        coords = coords.clone().detach().requires_grad_(True) # allows to take derivative w.r.t. input
        output = self.net(coords_and_img_num)
        return output, coords        

    def forward_with_activations(self, coords, retain_grad=False):
        '''Returns not only model output, but also intermediate activations.
        Only used for visualizing activations later!'''
        activations = OrderedDict()

        activation_count = 0
        x = coords.clone().detach().requires_grad_(True)
        activations['input'] = x
        for i, layer in enumerate(self.net):
            if isinstance(layer, SineLayer):
                x, intermed = layer.forward_with_intermediate(x)
                
                if retain_grad:
                    x.retain_grad()
                    intermed.retain_grad()
                    
                activations['_'.join((str(layer.__class__), "%d" % activation_count))] = intermed
                activation_count += 1
            else: 
                x = layer(x)
                
                if retain_grad:
                    x.retain_grad()
                    
            activations['_'.join((str(layer.__class__), "%d" % activation_count))] = x
            activation_count += 1

        return activations

In [None]:
def image_tensor(img, sidelength):
    transform = Compose([
        Resize(sidelength),
        ToTensor(),
        Normalize(torch.Tensor([0.5]), torch.Tensor([0.5]))
    ])
    
    img = transform(img)
    return img


In [None]:
class ImageFitting(Dataset):
    def __init__(self, images_path, img_size):
        super().__init__()

        self.pixels = torch.tensor([])
        self.coords_and_img_num = torch.tensor([])
        for img_num,img_file in enumerate(glob.glob(path)):
          new_image = Image.fromarray(io.imread(img_file)).convert('RGB')
          img = image_tensor(new_image,img_size)

          img_pixels = img.permute(1, 2, 0).view(-1, 3)
          img_coords = get_mgrid(img_size, 2)
          img_img_num = torch.full((img_coords.shape[0], 1), img_num)
          coords_and_img_num = torch.hstack((img_img_num, img_coords))
          self.pixels = torch.cat((self.pixels, img_pixels),0)
          self.coords_and_img_num = torch.cat((self.coords_and_img_num, coords_and_img_num),0)
        print("self.coords_and_img_num.shape: ", self.coords_and_img_num.shape)
    def __len__(self):
        return 1

    def __getitem__(self, idx):    
        if idx > 0: raise IndexError
            
        return self.coords_and_img_num, self.pixels

In [None]:
def create_image_gt(img_file, img_size, img_num):
  new_image = Image.fromarray(skimage.io.imread(file)).convert('RGB')
  return ImageFitting(new_image, img_size, img_num) 


In [None]:
def up_sample(img_num,size,return_pixels = False):
  new_size_coords = get_mgrid(size, 2)
  img_img_num = torch.full((new_size_coords.shape[0], 1), img_num)
  coords_and_img_num = torch.hstack((img_img_num, new_size_coords))
  model_output, coords = img_representation(coords_and_img_num.unsqueeze(0).cuda())    

  model_output_pixels = model_output.cpu()[:,:,:].view(size,size,3).detach().numpy()

  if return_pixels:
    return model_output_pixels, model_output.cpu()[:,:,:]
  fig, axes = plt.subplots(1,1, figsize=(18,6))
  axes.imshow(model_output_pixels)
  #axes.imshow(super_size_img1.cpu().view(255,255,3).detach().numpy())
  plt.show()

## Searching for architecture

In [None]:
# didn't generalize well enough
# img_representation = Img_Representation(coords_features=2, img_num_embedding_size = 3, out_features=3, hidden_features=64, 
#                   hidden_layers=3, outermost_linear=True)
# best so far
img_representation = Img_Representation(coords_features=2, img_num_embedding_size = 4, out_features=3, hidden_features=100, 
                   hidden_layers=4, outermost_linear=True)
# no serious improvement
# img_representation = Img_Representation(coords_features=2, img_num_embedding_size = 4, out_features=3, hidden_features=200, 
#                    hidden_layers=6, outermost_linear=True)

# img_representation = Img_Representation(coords_features=2, img_num_embedding_size = 4, out_features=3, hidden_features=300, 
#                    hidden_layers=8, outermost_linear=True)


# img_representation = Img_Representation(coords_features=2, img_num_embedding_size = 7, out_features=3, hidden_features=120, 
#                    hidden_layers=4, outermost_linear=True)

# seems to give ok results
# img_representation = Img_Representation(coords_features=2, img_num_embedding_size = 4, out_features=3, hidden_features=100, 
#                    hidden_layers=8, outermost_linear=True)



## Arranging for training

In [None]:
path = files_path + "/48/*.*"

all_images = ImageFitting(path, 48)
#print("all_images: ", all_images)
dataloader = DataLoader(all_images, batch_size=1, pin_memory=True, num_workers=0)

In [None]:
#TBD: GPU
img_representation.cuda() 

### Training Loop

reference super resolution image (255)

In [None]:
high_res_image = Image.fromarray(io.imread(files_path + "/256/workstation-256.png")).convert('RGB')
high_res_image_pixels = image_tensor(high_res_image,255).permute(1, 2, 0).view(-1, 3)
fig, axes = plt.subplots(1,1, figsize=(18,6))
axes.imshow(high_res_image)
#axes.imshow(super_size_img1.cpu().view(255,255,3).detach().numpy())
plt.show()

In [None]:
## Testing generalization by upscaling and comparison to high resolution with PSNR

In [None]:
total_steps = 2500 
steps_til_summary = 10
psnr = PeakSignalNoiseRatio()


optim = torch.optim.Adam(lr=1e-4, params=img_representation.parameters())

model_input, ground_truth = next(iter(dataloader))
#print(model_input.shape)
#TBD: GPU
model_input, ground_truth = model_input.cuda(), ground_truth.cuda()
#model_input, ground_truth = model_input, ground_truth

for step in range(total_steps):
    model_output, coords = img_representation(model_input)    
    # print("model_output.shape: " , model_output.shape)

    # print("ground_truch: ", ground_truth)
    # print("ground_truch.shape: ", ground_truth.shape)

    loss = ((model_output - ground_truth)**2).mean()
    
    if not step % steps_til_summary:
        print("Step %d, Total loss %0.6f" % (step, loss))
        # img_grad = gradient(model_output, coords)
        # img_laplacian = laplace(model_output, coords)

        img_num = 99
        start = img_num*48*48
        end = (img_num+1)*48*48

        fig, axes = plt.subplots(1,3, figsize=(18,6))

        axes[0].imshow(model_output.cpu()[:,start:end,:].view(48,48,3).detach().numpy())
        upsampled_pixels, upsampled_pixels_tensor = up_sample(img_num,255,return_pixels = True)

        psnr_res = psnr(upsampled_pixels_tensor, high_res_image_pixels)
        print("PSNR between upsample and high resolution: ", psnr_res)
        axes[1].imshow(upsampled_pixels)
        axes[2].imshow(high_res_image)
        
        
        plt.show()



    optim.zero_grad()
    loss.backward()
    optim.step()

### Load model instead of training...

In [None]:
#model_path = files_path + "/models/img_representation.pt"

In [None]:
#img_representation = torch.load(model_path)

In [None]:
#model_input, ground_truth = model_input.cuda(), ground_truth.cuda()


In [None]:
#model_output, coords = img_representation(model_input)  

number of parameters of the model

In [None]:
sum(p.numel() for p in img_representation.parameters())

Check output

In [None]:
img_num =  37
start = img_num*48*48
end = (img_num+1)*48*48

model_output_pixels = model_output.cpu()[:,start:end,:].view(48,48,3).detach().numpy()

fig, axes = plt.subplots(1,1, figsize=(18,6))
axes.imshow(model_output_pixels)
#axes.imshow(super_size_img1.cpu().view(255,255,3).detach().numpy())
plt.show()


## Testing upsampling 

In [None]:
def get_img_pixels(img_num,size):
  new_size_coords = get_mgrid(size, 2)
  img_img_num = torch.full((new_size_coords.shape[0], 1), img_num)
  coords_and_img_num = torch.hstack((img_img_num, new_size_coords))
  model_output, coords = img_representation(coords_and_img_num.unsqueeze(0).cuda())    
  model_output_pixels = model_output.cpu()[:,:,:].view(size,size,3).detach().numpy()

  return model_output_pixels, model_output.cpu()[:,:,:]


In [None]:
up_sample(78,255)

In [None]:
up_sample(83,255)

<a name="`interpolation`"></a>
# Interpolation and Similarity measures for representations




## Interpolating

In [None]:
def lerp(factor, a, b):
  return factor*a + (1.0 - factor)*b

In [None]:
def interpolate_pixels(factor, size, src1, src2):
  dest = torch.zeros(size, size,3)
  for x in range(size):
    for y in range(size):
      dest[x,y,0] = lerp(factor, src1[x,y,0], src2[x,y,0])
      dest[x,y,1] = lerp(factor, src1[x,y,1], src2[x,y,1])
      dest[x,y,2] = lerp(factor, src1[x,y,2], src2[x,y,2])
  return dest


In [None]:
def interpolate_images(img1, img2):
  # get img1
  img_num =  img1

  start = img_num*48*48
  end = (img_num+1)*48*48

  model_output_pixels_img_1 = model_output.cpu()[:,start:end,:].view(48,48,3).detach().numpy()

  img_num =  img2

  start = img_num*48*48
  end = (img_num+1)*48*48

  model_output_pixels_img_2 = model_output.cpu()[:,start:end,:].view(48,48,3).detach().numpy()

  interpolated_image = interpolate_pixels(0.5, 48, model_output_pixels_img_1, model_output_pixels_img_2)

  # show image
  fig, axes = plt.subplots(1,1, figsize=(18,6))
  axes.imshow(interpolated_image)
  plt.show()  

In [None]:
interpolate_images(37, 85)

In [None]:
interpolate_images(97, 95)

In [None]:
interpolate_images(22, 38)

## Measuring distance between model activations of different images


Didn't have time to get it to work :-( 

The idea was to use this approach to: # https://github.com/AntixK/PyTorch-Model-Compare to compare the representation of the layers over activations taken from different images




In [None]:
%pwd

In [None]:
%mkdir /content/gdrive/MyDrive/Github

In [None]:
%cd /content/gdrive/MyDrive/Github

In [None]:
!git clone https://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@github.com/AntixK/PyTorch-Model-Compare



In [None]:
%cd PyTorch-Model-Compare
 

In [None]:
cd  /content/gdrive/MyDrive/Github/PyTorch-Model-Compare

In [None]:
class ImageFitting(Dataset):
    def __init__(self, images_path, img_size, only_img = None):
        super().__init__()

        self.pixels = torch.tensor([])
        self.coords_and_img_num = torch.tensor([])
        for img_num,img_file in enumerate(glob.glob(path)):
          new_image = Image.fromarray(io.imread(img_file)).convert('RGB')
          #print("img_num:", img_num, "img_file: ", img_file )
          img = image_tensor(new_image,img_size)

          img_pixels = img.permute(1, 2, 0).view(-1, 3)
          img_coords = get_mgrid(img_size, 2)
          img_img_num = torch.full((img_coords.shape[0], 1), img_num)
          coords_and_img_num = torch.hstack((img_img_num, img_coords))

          
          if only_img is None:
            self.pixels = torch.cat((self.pixels, img_pixels),0)
            self.coords_and_img_num = torch.cat((self.coords_and_img_num, coords_and_img_num),0)
          #only be a loader for the only_img
          elif only_img == img_num:
            self.pixels = torch.cat((self.pixels, img_pixels),0)
            self.coords_and_img_num = torch.cat((self.coords_and_img_num, coords_and_img_num),0)
            #only for test
            return

    def __len__(self):
        return 1

    def __getitem__(self, idx):    
        if idx > 0: raise IndexError
            
        return self.coords_and_img_num, self.pixels

In [None]:
# img_representation = Img_Representation(coords_features=2, img_num_embedding_size = 3, out_features=3, hidden_features=64, 
#                   hidden_layers=3, outermost_linear=True)
# img_representation1 = Img_Representation(coords_features=2, img_num_embedding_size = 4, out_features=3, hidden_features=200, 
#                    hidden_layers=4, outermost_linear=True)

# img_representation2 = Img_Representation(coords_features=2, img_num_embedding_size = 4, out_features=3, hidden_features=200, 
#                    hidden_layers=4, outermost_linear=True)

In [None]:
model1 = img_representation
model2 = img_representation

In [None]:
path = files_path + "/48/*.*"

image0 = ImageFitting(path, 48, only_img = 0)
dataloader1 = DataLoader(image0, batch_size=1, pin_memory=True, num_workers=0)
print("image0.pixels.shape", image0.pixels.shape)

image1 = ImageFitting(path, 48, only_img = 1)
dataloader2 = DataLoader(image0, batch_size=1, pin_memory=True, num_workers=0)
print("image1.pixels.shape", image1.pixels.shape)


In [None]:
from torch_cka import CKA


In [None]:
print(model1)

In [None]:
cka = CKA(model1, model2,
          model1_name="img_representation1",   # good idea to provide names to avoid confusion
          model2_name="img_representation2",   
          model1_layers=["0","1","2"], # List of layers to extract features from
          model2_layers=["0","1","2"], # extracts all layer features by default
          device='cuda')



In [None]:
cka.compare(dataloader1, dataloader2) # secondary dataloader is optional

results = cka.export()  # returns a dict that contains model names, layer names
                        # and the CKA matrix
