In [43]:
import numpy as np
import pandas as pd
import cv2
from PIL import Image
import torch
import os
from torch import nn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
import gensim
import gensim.downloader as api
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import seaborn as sns
from collections import OrderedDict
import pickle

In [2]:
glove = api.load("glove-wiki-gigaword-50")

In [32]:
device = torch.device("cuda")

In [33]:
class AE_imgs(torch.nn.Module):
    def __init__(self):
        super().__init__()


        self.encoder = nn.Sequential(OrderedDict([
            ("conv1", nn.Conv1d(720,500,64)),
            ("relu1", nn.ReLU()),
            ("dropout1", nn.Dropout(0.1)),
            ("conv2", nn.Conv1d(500,200,8)),
            ("relu2", nn.ReLU()),
            ("dropout2", nn.Dropout(0.1)),
            ("conv3", nn.Conv1d(200,50,1)),
            ("linear", nn.Linear(410,20))
        ]))

        self.decoder = nn.Sequential(OrderedDict([
            ("linear", nn.Linear(20,410)),
            ("conv2", nn.Conv1d(50,200,1)),
            ("dropout2", nn.Dropout(0.1)),
            ("relu2", nn.ReLU()),
            ("conv3", nn.ConvTranspose1d(200,500,8)),
            ("dropout1", nn.Dropout(0.1)),
            ("relu3", nn.ReLU()),
            ("conv4", nn.ConvTranspose1d(500,720,64)),
        ]))

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
class AE_corpus(torch.nn.Module):
    def __init__(self):
        super().__init__()


        self.encoder = nn.Sequential(OrderedDict([
            ("lin1", nn.Linear(2400, 1000)),
            ("lin2", nn.Linear(1000, 500)),
            ("dp", nn.Dropout(.2)),
            ("lin3", nn.Linear(500, 100)),
            ("lin4", nn.Linear(100, 50)),
            ("linear", nn.Linear(50,20)),
        ]))

        self.decoder = nn.Sequential(OrderedDict([
            ("linear", nn.Linear(20,50)),
            ("lin4", nn.Linear(50, 100)),
            ("lin3", nn.Linear(100, 500)),
            ("dp", nn.Dropout(.2)),
            ("lin2", nn.Linear(500, 1000)),
            ("lin1", nn.Linear(1000, 2400)),
        ]))

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
class CLIP(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        ## the shape of the input is (1,20). And the output is (50, 20)
        self.translator = nn.Sequential(OrderedDict([
            ("conv1", nn.ConvTranspose1d(1,50,3)),
#             ('relu1', nn.ReLU()),
            ('dout1', nn.Dropout(.1)),
            ("lin1", nn.Linear(22,20)),
            ('dout2', nn.Dropout(.1)),
#             ('relu2', nn.ReLU()),
            ("lin2", nn.Linear(20,30)),
#             ('relu3', nn.ReLU()),
            ("lin3", nn.Linear(30,50)),
#             ('relu4', nn.ReLU()),
            ("lin4", nn.Linear(50,20)),
        ]))
        
    def forward(self, x):
        trans = self.translator(x)
        
        return trans

In [34]:
# Model Initialization
model_img = AE_imgs()
 
# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()
 
# Using an Adam Optimizer with lr = 0.0001
optimizer = torch.optim.Adam(model_img.parameters(),
                             lr = .0001, weight_decay = .0005)
# Load the model
model_img = torch.load("MLModels/AE_images")
model_img = model_img.to(device)

In [35]:
# Model Initialization
model_corpus = AE_corpus()
 
# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()
 
# Using an Adam Optimizer with lr = 0.0001
optimizer = torch.optim.Adam(model_corpus.parameters(),
                             lr = .0001, weight_decay = .0001)
# Load the model
model_corpus = torch.load("MLModels/AE_corpus")
model_corpus = model_corpus.to(device)

In [36]:
# Model Initialization
model_clip = CLIP()

# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()
 
# Using an Adam Optimizer with lr = 0.0001
optimizer = torch.optim.Adam(model_clip.parameters(),
                             lr = .01, weight_decay = .0001)
# Load the model
model_clip = torch.load("MLModels/CLIP")
model_clip = model_clip.to(device)

In [37]:
cv = pickle.load(open("MLModels/Vectorizer.pkl","rb"))

In [38]:
def pipline(text):
    corpus = np.array([text])
    features = cv.transform(corpus)
    vectors = np.zeros((features.toarray().shape[0], features.toarray().shape[1], 50))
    for i in range(features.toarray().shape[0]):
        copy = features.toarray()[i]
        for idx in np.argwhere(copy).reshape(1,-1)[0]:
            vectors[i][idx] = glove.get_vector(cv.get_feature_names()[idx])
            
    vect_ten = []
    for i in range(vectors.shape[0]):
        vect_ten.append(vectors[i])
    vect_ten = torch.tensor(np.array(vect_ten))
    vect_ten = vect_ten.type(torch.FloatTensor)
    return vect_ten

In [41]:
def text_image(vect_ten):
    enc = model_corpus.encoder(vect_ten[0].reshape(1,-1))
    translate = model_clip(enc)
    image = model_img.decoder(translate)
    
    return image

## Start

In [77]:
text = input("Description: ")
vect_ten = pipline(text)
vect_ten = vect_ten.to(device)

Description: Painting of the Monalisa in modern art.


In [78]:
array_image = text_image(vect_ten)

In [79]:
im = Image.fromarray(array_image.cpu().detach().numpy())

In [80]:
im.convert('RGB').save("test.png")