In [1]:
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import transforms

import torch

a = torch.randn(5,3)

b = torch.flatten(a)

c = b.view(a.shape[0] , a.shape[1])

print(a.shape[0])
print(a.shape[1])
print(a.shape)
print(b)

print(c)

5
3
torch.Size([5, 3])
tensor([ 1.8063, -0.1446,  0.4808, -1.3045,  0.3946, -0.6175, -0.3307, -0.5379,
         0.7020,  0.7159,  0.2873, -1.1844,  1.1351, -0.3666,  0.9283])
tensor([[ 1.8063, -0.1446,  0.4808],
        [-1.3045,  0.3946, -0.6175],
        [-0.3307, -0.5379,  0.7020],
        [ 0.7159,  0.2873, -1.1844],
        [ 1.1351, -0.3666,  0.9283]])


In [2]:
import os 

path = "./dataset/images/"
image_lis = os.listdir(path)

In [3]:
input_size = 224
data_transforms = transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


In [4]:
class ImageAutoEncoder(nn.Module):
    def __init__(self):
        super(ImageAutoEncoder, self).__init__()
        
        #Encoder
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)  
        self.conv2 = nn.Conv2d(16, 4, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        
        self.linear_1 = nn.Linear(12544 , 1200)
        self.linear_2 = nn.Linear(1200 , 200)
        
        self.delinear_1 = nn.Linear(200 , 1200)
        self.delinear_2 = nn.Linear(1200 , 12544)
        
        #Decoder
        #self.t_conv1 = nn.ConvTranspose2d(4, 16, 2, stride=2)
        #self.t_conv2 = nn.ConvTranspose2d(16, 3, 2, stride=2)
        
        self.deconv1 = nn.ConvTranspose2d(1, 16, 3, stride=2, output_padding=1)
        self.deconv2 = nn.ConvTranspose2d(16, 3, 3, stride=1, padding = 2, output_padding=0)
        
    def forward(self , x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        
        x = torch.flatten(x ,start_dim = 1)
        x = nn.ReLU()(self.linear_1(x))
        x = nn.ReLU()(self.linear_2(x))
        
        #start of decoder
        x = nn.ReLU()(self.delinear_1(x))
        x = nn.ReLU()(self.delinear_2(x))
        x = x.view(-1 , 1 , 112 , 112)
        x = nn.ReLU()(self.deconv1(x))
        x = self.deconv2(x)
        
        
        #print("X.shape" , x.shape)
        
        return x
        

In [5]:
import pandas as pd
df_csv = pd.read_csv("./dataset/books_with_genres.csv")

In [6]:
df_csv.columns

Index(['isbn', 'text_reviews_count', 'series', 'country_code', 'language_code',
       'popular_shelves', 'asin', 'is_ebook', 'average_rating', 'kindle_asin',
       'similar_books', 'description', 'format', 'link', 'authors',
       'publisher', 'num_pages', 'publication_day', 'isbn13',
       'publication_month', 'edition_information', 'publication_year', 'url',
       'image_url', 'book_id', 'ratings_count', 'work_id', 'title',
       'title_without_series', 'history, historical fiction, biography',
       'fiction', 'fantasy, paranormal', 'mystery, thriller, crime', 'poetry',
       'romance', 'non-fiction', 'children', 'young-adult', 'comics, graphic',
       'category', 'binary_category'],
      dtype='object')

In [7]:
df_csv["fiction_binary"] = np.where(df_csv['binary_category']=='fiction', 1, 0)
y = df_csv['fiction_binary']

In [8]:
import torch
from PIL import Image
ind = 0
batch_size = 32
book_id , category = df_csv['book_id'] , df_csv['fiction_binary']
input_list = []
for i in book_id:
    im = Image.open(path + str(i))
    im_rgb = im.convert("RGB")
    input_tensor = data_transforms(im_rgb)
    input_list.append(input_tensor)


In [9]:
input_X = torch.stack(input_list).cuda()
input_Y = torch.LongTensor(y).cuda()

In [10]:
print(input_Y.shape)
print(input_X.shape)

torch.Size([5000])
torch.Size([5000, 3, 224, 224])


In [11]:
model = ImageAutoEncoder().cuda()
criterion = nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters() , lr = 0.001)

In [12]:
epoch_count = 100
batch_size = 32
for i in range(0 , epoch_count):
    print("iteration number " , i + 1)
    train_loss = 0
    for j in range(0 , len(input_X) , 32):
        img = input_X[j:j+32]#.unsqueeze(0)
        #print(img.shape)
        img.requires_grad_ = False
        X_predicted = model.forward(img)
        loss = criterion(X_predicted , img)
        train_loss += loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("Loss is " ,train_loss)
    

iteration number  1
Loss is  tensor(213.8831, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  2
Loss is  tensor(147.8370, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  3
Loss is  tensor(132.9646, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  4
Loss is  tensor(124.6258, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  5
Loss is  tensor(120.4789, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  6
Loss is  tensor(116.2557, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  7
Loss is  tensor(113.5704, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  8
Loss is  tensor(110.1972, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  9
Loss is  tensor(107.7115, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  10
Loss is  tensor(105.8790, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  11
Loss is  tensor(104.5052, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  12
Loss is  tensor(103.

Loss is  tensor(72.5551, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  96
Loss is  tensor(72.7032, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  97
Loss is  tensor(73.0023, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  98
Loss is  tensor(73.2439, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  99
Loss is  tensor(73.1914, device='cuda:0', grad_fn=<AddBackward0>)
iteration number  100
Loss is  tensor(73.0806, device='cuda:0', grad_fn=<AddBackward0>)


In [16]:
use_cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if use_cuda else 'cpu')

def get_embeddings(x):
    
      #Encoder
    x = F.relu(model.conv1(x))
    x = model.pool(x)
    x = F.relu(model.conv2(x))
    x = model.pool(x)

    x = torch.flatten(x ,start_dim = 1)
    x = nn.ReLU()(model.linear_1(x))
    x = nn.ReLU()(model.linear_2(x))
    
    return x

embeddings = []
with torch.no_grad():
    for i in book_id:
        im = Image.open(path + str(i))
        im_rgb = im.convert("RGB")
        input_tensor = data_transforms(im_rgb)
        input_tensor = input_tensor.to(device)
        val = get_embeddings(input_tensor.unsqueeze(0))
        embeddings.append({str(i) : val})
        
    

In [17]:
print(len(embeddings))

5000


In [18]:
torch.save(embeddings, "embeddings_conv_encoder_decoder.pkl")