In [12]:
import pandas as pd
from PIL import Image
import os
from torchvision import transforms

# Load CSV file
csv_file_path = './labels.csv'
images_folder_path = './images'

data = pd.read_csv(csv_file_path)

# Display the first few rows of the dataframe
print(data.head())

# Function to load images based on serial number
def load_image(image_name):
    image_path = os.path.join(images_folder_path, image_name)
    if os.path.exists(image_path):
        return Image.open(image_path)
    raise FileNotFoundError(f"No image found for {image_name}")

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize images to a standard size
    transforms.ToTensor(),          # Convert images to tensor
])

# Preprocess images
def preprocess_image(image):
    return transform(image)

# Example: Load and preprocess the first image
image_name = data.iloc[0]['image_name']
image = load_image(image_name)
preprocessed_image = preprocess_image(image)


   serial_no    image_name                                           text_ocr  \
0          0   image_1.jpg  LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...   
1          1  image_2.jpeg  The best of #10 YearChallenge! Completed in le...   
2          2   image_3.JPG  Sam Thorne @Strippin ( Follow Follow Saw every...   
3          3   image_4.png              10 Year Challenge - Sweet Dee Edition   
4          4   image_5.png  10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...   

                                      text_corrected overall_sentiment  
0  LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...     very_positive  
1  The best of #10 YearChallenge! Completed in le...     very_positive  
2  Sam Thorne @Strippin ( Follow Follow Saw every...          positive  
3              10 Year Challenge - Sweet Dee Edition          positive  
4  10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...           neutral  


In [14]:
from transformers import BertTokenizer

# Load a pre-trained tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize text
def preprocess_text(text):
    tokens = tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=128)
    return tokens.input_ids, tokens.attention_mask

# Example: Preprocess text
text_ocr = data.iloc[0]['text_ocr']
input_ids, attention_mask = preprocess_text(text_ocr)


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [15]:
import torch
import torch.nn as nn

class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(128, 256), 
            nn.ReLU(True),
            nn.Linear(256, 512),
            nn.ReLU(True),
            nn.Linear(512, 256 * 256 * 3),
            nn.Tanh()
        )

    def forward(self, x):
        return self.main(x).view(-1, 3, 256, 256)

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(256 * 256 * 3, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.main(x.view(-1, 256 * 256 * 3))

generator = Generator()
discriminator = Discriminator()



In [22]:
import pandas as pd
from PIL import Image, ImageFile
import os
from torchvision import transforms
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer

# Allow loading of truncated images
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Load CSV file
csv_file_path = './labels.csv'
images_folder_path = './images'

data = pd.read_csv(csv_file_path)

# Display the first few rows of the dataframe
print(data.head())

# Function to load images based on image name
def load_image(image_name):
    image_path = os.path.join(images_folder_path, image_name)
    if os.path.exists(image_path):
        try:
            img = Image.open(image_path).convert('RGB')  # Convert image to RGB
            return img
        except (OSError, IOError) as e:
            print(f"Error loading image {image_name}: {e}")
            return None
    raise FileNotFoundError(f"No image found for {image_name}")

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize images to a standard size
    transforms.ToTensor(),          # Convert images to tensor
])

# Preprocess images
def preprocess_image(image):
    return transform(image)

# Load a pre-trained tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize text
def preprocess_text(text):
    if not isinstance(text, str):
        text = str(text)
    tokens = tokenizer(text, return_tensors='pt', padding='max_length', truncation=True, max_length=128)
    return tokens.input_ids.float(), tokens.attention_mask.float()  # Convert to float

# Define the generator and discriminator for the Conditional GAN
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(128, 256), 
            nn.ReLU(True),
            nn.Linear(256, 512),
            nn.ReLU(True),
            nn.Linear(512, 256 * 256 * 3),
            nn.Tanh()
        )

    def forward(self, x):
        return self.main(x).view(-1, 3, 256, 256)

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(256 * 256 * 3, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.main(x.view(-1, 256 * 256 * 3))

generator = Generator()
discriminator = Discriminator()

# Define loss and optimizers
criterion = nn.BCELoss()
optimizer_g = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_d = optim.Adam(discriminator.parameters(), lr=0.0002)

# Training loop
num_epochs = 10000
batch_size = 64

for epoch in range(num_epochs):
    for i in range(0, len(data), batch_size):
        # Get batch data
        batch_data = data.iloc[i:i+batch_size]
        images = []
        texts = []

        for _, row in batch_data.iterrows():
            img = load_image(row['image_name'])
            if img is not None:
                images.append(preprocess_image(img))
                text_ocr = row['text_ocr']
                input_ids, _ = preprocess_text(text_ocr)
                texts.append(input_ids)

        if len(images) == 0:
            continue

        images = torch.stack(images)
        texts = torch.cat(texts)
        
        # Labels for real and fake images
        real_labels = torch.ones(images.size(0), 1)  # Adjust to match the actual batch size
        fake_labels = torch.zeros(images.size(0), 1)  # Adjust to match the actual batch size
        
        # Train Discriminator
        optimizer_d.zero_grad()
        outputs = discriminator(images)
        loss_d_real = criterion(outputs, real_labels)
        loss_d_real.backward()

        noise = torch.randn(images.size(0), 128)  # Adjust to match the actual batch size
        fake_images = generator(texts)
        outputs = discriminator(fake_images.detach())
        loss_d_fake = criterion(outputs, fake_labels)
        loss_d_fake.backward()
        optimizer_d.step()

        # Train Generator
        optimizer_g.zero_grad()
        outputs = discriminator(fake_images)
        loss_g = criterion(outputs, real_labels)
        loss_g.backward()
        optimizer_g.step()

    if epoch % 1000 == 0:
        print(f'Epoch [{epoch}/{num_epochs}] - Loss D: {loss_d_real + loss_d_fake}, Loss G: {loss_g}')
        # Save models or generate sample images


   serial_no    image_name                                           text_ocr  \
0          0   image_1.jpg  LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...   
1          1  image_2.jpeg  The best of #10 YearChallenge! Completed in le...   
2          2   image_3.JPG  Sam Thorne @Strippin ( Follow Follow Saw every...   
3          3   image_4.png              10 Year Challenge - Sweet Dee Edition   
4          4   image_5.png  10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...   

                                      text_corrected overall_sentiment  
0  LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...     very_positive  
1  The best of #10 YearChallenge! Completed in le...     very_positive  
2  Sam Thorne @Strippin ( Follow Follow Saw every...          positive  
3              10 Year Challenge - Sweet Dee Edition          positive  
4  10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...           neutral  




Epoch [0/10000] - Loss D: 3.7906062442061277e-36, Loss G: 98.67425537109375


In [None]:
# Function to generate images from text
def generate_image(text):
    input_ids, _ = preprocess_text(text)
    with torch.no_grad():
        generated_image = generator(input_ids)
    return transforms.ToPILImage()(generated_image.squeeze())

# Example: Generate an image
new_text = "A beautiful landscape with mountains"
generated_image = generate_image(new_text)
generated_image.show()
