In [5]:
from torch import nn
from copy import deepcopy
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torchvision
import tqdm
import os
import numpy as np
import clip
from PIL import Image

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device)

test_representation = []
valid_representation = []
train_representation = []

label = ["Black_Hair", "Blond_Hair"]
a = ["5_o_Clock_Shadow","Arched_Eyebrows", "Attractive", "Bags_Under_Eyes", "Bald", "Bangs", "Big_Lips",
         "Big_Nose", "Black_Hair", "Blond_Hair", "Blurry", "Brown_Hair", "Bushy_Eyebrows", "Chubby", "Double_Chin",
         "Eyeglasses", "Goatee", "Gray_Hair", "Heavy_Makeup", "High_Cheekbones", "Male", "Mouth_Slightly_Open",
         "Mustache", "Narrow_Eyes", "No_Beard", "Oval_Face", "Pale_Skin", "Pointy_Nose", "Receding_Hairline",
         "Rosy_Cheeks", "Sideburns", "Smiling", "Straight_Hair", "Wavy_Hair", "Wearing_Earrings", "Wearing_Hat",
         "Wearing_Lipstick", "Wearing_Necklace", "Wearing_Necktie", "Young"]

for j in range(len(label)):
    
    hasf = open("./CelebA_Anno/" + label[j] +".txt")

    test_features = []
    valid_features = []
    train_features = []

    for i in range(0, 500):
        imgName = str(hasf.readline().split())
        image = preprocess(Image.open("./CelebA/Img/img_align_celeba/" + imgName)).unsqueeze(0).to(device)

        with torch.no_grad():
            image_features = model.encode_image(image)
            test_features.append(image_features)
    
    for k in range(0, 500):
        imgName = str(hasf.readline().split())
        image = preprocess(Image.open("./CelebA/Img/img_align_celeba/" + imgName)).unsqueeze(0).to(device)

        with torch.no_grad():
            image_features = model.encode_image(image)
            valid_features.append(image_features)
    
    imgName = str(hasf.readline().split())
    while imgName:
        image = preprocess(Image.open("./CelebA/Img/img_align_celeba/" + imgName)).unsqueeze(0).to(device)

        with torch.no_grad():
            image_features = model.encode_image(image)
            train_features.append(image_features)
        
        imgName = str(hasf.readline().split())

    features0 = torch.cat(test_features).cpu().numpy()
    features1 = torch.cat(valid_features).cpu().numpy()
    features2 = torch.cat(trian_features).cpu().numpy()
    test_representation.append(features0)
    valid_representation.append(features1)
    train_representation.append(features2)

In [None]:
class LinearLayerWithActivation(nn.Module):
    def __init__(self, input_shape, num_units, bias=False, activation_type=nn.ReLU()):
        super(LinearLayerWithActivation, self).__init__()
        self.activation_type = activation_type
        self.weights = nn.Parameter(torch.empty(size=(num_units, input_shape[1]), requires_grad=True))
        
        nn.init.normal_(self.weights)
        
        if bias:
            self.bias = nn.Parameter(torch.zeros(num_units), requires_grad=True)
        else:
            self.bias = None
        
    def forward(self, x):
        out = F.linear(x, self.weights, self.bias)
        out = self.activation_type.forward(out)
        return out

In [None]:
import sys
eps = sys.float_info.epsilon

k = 40 # categories

fcc_net = LinearLayerWithActivation(input_shape= x.shape, num_units=k, bias=True, activation_type=nn.Identity())
optimizer = optim.Adam(fcc_net.parameters(), amsgrad=False, weight_decay=0.0)


for name, params in fcc_net.named_parameters():
    print('Parameters with name', name, 'and shape', params.shape)

metric_dict = {'losses1': [], 'losses2':[],'losses':[]}    
    
epochs = 100
    
for epoch in epochs:
    batch_num0 = len(train_representation[0])//128
    batch_num1 = len(train_representation[1])//128
    num = min(batch_num0, batch_num1)
    
    for i in range(0, num):
        x = torch.from_numpy(train_representation[i*128:(i+1)*128])
        y = torch.from_numpy(train_representation[i*128:(i+1)*128])

        out1 = fcc_net.forward(x)
        out2 = fcc_net.forward(y)

        v_x_pos = torch.var(out1, axis = 0)[0]
        v_x_neg = torch.var(out1, axis = 0)[1]
        v_y_pos = torch.var(out2, axis = 0)[0]
        v_y_neg = torch.var(out2, axis = 0)[1]
        loss1 = log(eps + v_x_pos) - log(eps + v_x_neg)
        loss2 = log(eps + v_y_pos) - log(eps + v_y_neg)
        loss = log(eps + v_x_pos) + log(eps + v_y_pos) - log(eps + v_x_neg) - log(eps + v_y_neg)

        fcc_net.zero_grad() #removes grads of previous step
        optimizer.zero_grad() #removes grads of previous step
        loss.backward() #compute gradients of current step
        optimizer.step() #update step
        metric_dict['losses1'].append(loss1.detach().cpu().numpy()) #.detach: Copies the value of the loss 
    #                                                               and removes it from the graph, 
    #                                                             .cpu() sends to cpu, and 
    #                                                              numpy(), converts it to numpy format.
        metric_dict['losses2'].append(loss2.detach().cpu().numpy()) 
        metric_dict['losses'].append(loss.detach().cpu().numpy())

In [None]:
plot_stats_in_graph(metric_dict, y_axis_label='Loss', x_axis_label='Number of Steps')

In [10]:
l = [[1,2],[3,4],[5,6],[7,8]]
a = torch.from_numpy(np.array(l[0:2]))
print(a)

tensor([[1, 2],
        [3, 4]])
