In [32]:
import pandas as pd
import numpy as np

In [33]:
# Load the saved file
df = pd.read_parquet('../data/processed_emoji_dataset.parquet')

# Convert lists back to Numpy arrays of float32
df["combined_embedding"] = df["combined_embedding"].apply(lambda x: np.array(x, dtype=np.float32))

In [34]:
df.head()

Unnamed: 0,combined_embedding,image_path
0,"[0.02519471, -0.04989285, 0.04940779, -0.01256...",../data/tensor_images/GoogleEmoji/0023-20e3.pt
1,"[0.02519471, -0.04989285, 0.04940779, -0.01256...",../data/tensor_images/JoyPixelsEmoji/0023-20e3.pt
2,"[0.02519471, -0.04989285, 0.04940779, -0.01256...",../data/tensor_images/OpenMojiEmoji/0023-20e3.pt
3,"[0.030324753, -0.047731012, 0.04585727, -0.008...",../data/tensor_images/GoogleEmoji/002a-20e3.pt
4,"[0.030324753, -0.047731012, 0.04585727, -0.008...",../data/tensor_images/JoyPixelsEmoji/002a-20e3.pt


In [35]:
# Check the dimension of the combined_embedding column
embedding_sample = df["combined_embedding"].iloc[0]
embedding_dim = len(embedding_sample)

print(f"The dimension of the combined_embedding is: {embedding_dim}")

The dimension of the combined_embedding is: 1152


In [47]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image

# Filter out non-existent files from the DataFrame
df = df[df['image_path'].apply(os.path.exists)]

# Define the generator network
class Generator(nn.Module):
    def __init__(self, embedding_dim, noise_dim, image_channels, feature_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(embedding_dim + noise_dim, feature_dim),
            nn.ReLU(True),
            nn.Linear(feature_dim, feature_dim * 2),
            nn.ReLU(True),
            nn.Linear(feature_dim * 2, feature_dim * 4),
            nn.ReLU(True),
            nn.Linear(feature_dim * 4, image_channels * 64 * 64),
            nn.Tanh()
        )

    def forward(self, embedding, noise):
        x = torch.cat((embedding, noise), dim=1)
        x = self.model(x)
        x = x.view(x.size(0), 3, 64, 64)
        return x

# Define the discriminator network
class Discriminator(nn.Module):
    def __init__(self, embedding_dim, image_channels, feature_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(embedding_dim + image_channels * 64 * 64, feature_dim * 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(feature_dim * 4, feature_dim * 2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(feature_dim * 2, feature_dim),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(feature_dim, 1),
            nn.Sigmoid()
        )

    def forward(self, embedding, image):
        image_flat = image.view(image.size(0), -1)
        x = torch.cat((embedding, image_flat), dim=1)
        x = self.model(x)
        return x

# Custom dataset
class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        embedding = self.dataframe.iloc[idx]['combined_embedding']
        image_path = self.dataframe.iloc[idx]['image_path']
        
        image = torch.load(image_path)  # Assuming the image is saved as a tensor file
        image = transforms.ToPILImage()(image)  # Convert tensor to PIL Image
        if self.transform:
            image = self.transform(image)
        return torch.tensor(embedding, dtype=torch.float32), image

# Hyperparameters
embedding_dim = len(df["combined_embedding"].iloc[0])  # Automatically determine the embedding dimension
noise_dim = 100
image_channels = 3
feature_dim = 128
batch_size = 64
lr = 0.0002
num_epochs = 3

# Transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Ensure all images are resized to 64x64
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# Create dataset and dataloader
dataset = CustomDataset(df, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize models
generator = Generator(embedding_dim, noise_dim, image_channels, feature_dim)
discriminator = Discriminator(embedding_dim, image_channels, feature_dim)

# Loss and optimizers
criterion = nn.BCELoss()
optimizer_G = optim.Adam(generator.parameters(), lr=lr, betas=(0.5, 0.999))
optimizer_D = optim.Adam(discriminator.parameters(), lr=lr, betas=(0.5, 0.999))

# Training loop
for epoch in range(num_epochs):
    for i, (embeddings, real_images) in enumerate(dataloader):
        batch_size = real_images.size(0)
        
        # Labels
        real_labels = torch.ones(batch_size, 1)
        fake_labels = torch.zeros(batch_size, 1)
        
        # Train Discriminator
        optimizer_D.zero_grad()
        
        # Real images
        outputs = discriminator(embeddings, real_images)
        d_loss_real = criterion(outputs, real_labels)
        
        # Fake images
        noise = torch.randn(batch_size, noise_dim)
        fake_images = generator(embeddings, noise)
        outputs = discriminator(embeddings, fake_images.detach())
        d_loss_fake = criterion(outputs, fake_labels)
        
        # Backprop and optimize
        d_loss = d_loss_real + d_loss_fake
        d_loss.backward()
        optimizer_D.step()
        
        # Train Generator
        optimizer_G.zero_grad()
        
        # Generate fake images
        outputs = discriminator(embeddings, fake_images)
        g_loss = criterion(outputs, real_labels)
        
        # Backprop and optimize
        g_loss.backward()
        optimizer_G.step()
        
        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(dataloader)}], D Loss: {d_loss.item()}, G Loss: {g_loss.item()}')

Epoch [1/3], Step [100/196], D Loss: 0.08970563858747482, G Loss: 5.17540979385376
Epoch [2/3], Step [100/196], D Loss: 0.933929443359375, G Loss: 11.28445816040039
Epoch [3/3], Step [100/196], D Loss: 0.14226925373077393, G Loss: 6.479770660400391


In [50]:
#Run the generator to generate an emoji based on text "similing text"
text = "smiling text"

from sentence_transformers import SentenceTransformer


# Load SBERT model
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
sbert_model = sbert_model.to(device)

# Ensure text columns are strings
# final_df["emojipedia_description"] = final_df["emojipedia_description"].fillna("").astype(str)
# final_df["llm_description"] = final_df["llm_description"].fillna("").astype(str)

# Return SBERT embedding for a given text.
def embed_text(text):
    if pd.isna(text) or text.strip() == "":
        return np.zeros(384, dtype=np.float32)  # Return zero vector for missing values (SBERT output size = 384)
    return sbert_model.encode(text).astype(np.float32)


# Embed the text
text_embedding = embed_text(text)

# Generate a random noise vector
noise = torch.randn(1, noise_dim)

# Generate the emoji
generated_emoji = generator(torch.tensor(text_embedding, dtype=torch.float32).unsqueeze(0), noise)
generated_emoji = generated_emoji.squeeze().detach().cpu().numpy()

# Convert the generated emoji to a PIL image
generated_emoji = np.moveaxis(generated_emoji, 0, -1)
generated_emoji = (generated_emoji + 1) / 2  # Denormalize the image
generated_emoji = (generated_emoji * 255).astype(np.uint8)
generated_emoji = Image.fromarray(generated_emoji)

# Display the generated emoji
generated_emoji.show()




RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x484 and 1252x128)