In [1]:
import torch
from torch import nn
from pathlib import Path
from tqdm import tqdm
from torchvision import transforms
import torchvision
from torch.utils.data import DataLoader, Dataset
import os
import numpy as np
import pandas as pd
from skimage import io
from PIL import Image

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
BATCHS = 32

In [3]:
csv_path = Path("results.csv")
image_path = "data/"
data = pd.read_csv(csv_path, sep="|")

In [4]:
data.head(8)

Unnamed: 0,image_name,comment_number,comment
0,1000092795.jpg,0,Two young guys with shaggy hair look at their...
1,1000092795.jpg,1,"Two young , White males are outside near many..."
2,1000092795.jpg,2,Two men in green shirts are standing in a yard .
3,1000092795.jpg,3,A man in a blue shirt standing in a garden .
4,1000092795.jpg,4,Two friends enjoy time spent together .
5,10002456.jpg,0,Several men in hard hats are operating a gian...
6,10002456.jpg,1,Workers look down from up above on a piece of...
7,10002456.jpg,2,Two men working on a machine wearing hard hats .


In [5]:
data.dtypes

image_name         object
 comment_number    object
 comment           object
dtype: object

In [6]:
len(data)

158915

In [7]:
data['image_name'].head(8)

0    1000092795.jpg
1    1000092795.jpg
2    1000092795.jpg
3    1000092795.jpg
4    1000092795.jpg
5      10002456.jpg
6      10002456.jpg
7      10002456.jpg
Name: image_name, dtype: object

In [8]:
data[' comment'].head(8)

0     Two young guys with shaggy hair look at their...
1     Two young , White males are outside near many...
2     Two men in green shirts are standing in a yard .
3         A man in a blue shirt standing in a garden .
4              Two friends enjoy time spent together .
5     Several men in hard hats are operating a gian...
6     Workers look down from up above on a piece of...
7     Two men working on a machine wearing hard hats .
Name:  comment, dtype: object

In [9]:
with Image.open(f"{image_path}{data['image_name'][1000]}") as im:
    im.show()

In [10]:
max(data[' comment'].str.len())

407.0

In [11]:
import nltk

from nltk.tokenize import word_tokenize
all_tokens = []
for text in data[' comment'].astype(str).fillna(''):
    tokens = word_tokenize(text)
    all_tokens.extend(tokens)

In [12]:
tokens = list(set(all_tokens))
len(tokens)

23436

In [13]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(tokens)
integer_encoded

array([11780,  4408, 16051, ..., 15237,  3035, 20969])

In [14]:
decoded_tokens = label_encoder.inverse_transform(integer_encoded)
decoded_tokens

array(['gemmed', 'SquarePants', 'ox', ..., 'mounts', 'Michael', 'swarm'],
      dtype='<U27')

In [15]:
words_to_num = {char: idx for idx, char in enumerate(set(tokens))}
num_to_words = {idx: char for idx, char in enumerate(set(tokens))}

encode = lambda s: [words_to_num[c] for c in word_tokenize(s)]
def encode2(l):

    l=str(l)
    value = [words_to_num[c] for c in word_tokenize(l)]
    while len(value) <82:
        value.append(0)
    return torch.tensor(value, dtype=torch.float32)

    
def decoder(l):
    return ' '.join([num_to_words.get(i,'<UNK>') for i in l])

In [16]:
n=0

for i in data[' comment']:
    # print("String:->",i)
    v=encode2(i)
    if n < len(v):
        n=len(v)

n

82

In [17]:
encode2(data[' comment'][1])

tensor([10469.,  9724.,  3290.,  8510., 22589., 21234.,  4342., 15516., 13888.,
        21463., 22996.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,     0.,
            0.])

In [18]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data['image_name'], data[' comment'], test_size=0.1)

In [19]:
X_train[0],y_train[0]

('1000092795.jpg',
 ' Two young guys with shaggy hair look at their hands while hanging out in the yard .')

In [20]:
class DatasetsCustom(Dataset):
    def __init__(self, x, y, img_path, transform=None):
        self.transform = transform
        self.X = x.reset_index(drop=True)  # Reset index to avoid indexing issues
        self.y = y.reset_index(drop=True)  # Reset index to avoid indexing issues
        self.img_path = img_path

    def load_image(self, idx):
        image_path = f"{self.img_path}/{self.X.iloc[idx]}"
        image = Image.open(image_path).convert('RGB')  # Ensure image is in RGB format
        return imagez

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        img = self.load_image(idx)
        if self.transform:
            img = self.transform(img)
        output = self.y.iloc[idx]
        output = encode2(output)
        # output = torch.tensor(output, dtype=torch.float32)  # Ensure output is a tensor
        return img, output

In [21]:
transform1 = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(), 
    transforms.RandomRotation(degrees=45),  
    transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
    transforms.ToTensor(),
])

train_datasets = DatasetsCustom(x=X_train,y=y_train,img_path=image_path,transform=transform1)
test_datasets = DatasetsCustom(x=X_test,y=y_test,img_path=image_path,transform=transform1)

In [22]:
len(train_datasets)

143023

In [23]:
train_dataloader = DataLoader(dataset=train_datasets, batch_size=32, num_workers=0, shuffle=True)
test_dataloader = DataLoader(dataset=test_datasets, batch_size=32, num_workers=0, shuffle=False)

In [24]:
train_dataloader

<torch.utils.data.dataloader.DataLoader at 0x70f9ac04b350>

In [25]:
class Model(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super(Model, self).__init__()
        self.resnet = torchvision.models.resnet18(pretrained=True)
        self.resnet.fc = nn.Identity()  # Remove the fully connected layer
        self.lstm = nn.LSTM(input_size=input_shape, hidden_size=hidden_units, batch_first=True)
        self.fc = nn.Linear(hidden_units, output_shape)

    def forward(self, x: torch.Tensor):
        batch_size = x.size(0)
        features = self.resnet(x)  # Extract features using ResNet
        features = features.view(batch_size, 1, -1)  # Reshape for LSTM input
        lstm_out, (h_n, c_n) = self.lstm(features)
        lstm_out_last = lstm_out[:, -1, :]  # Get the last output of the LSTM
        output = self.fc(lstm_out_last)
        return output


In [26]:
model=Model(input_shape=512,output_shape=82,hidden_units=10).to(device)



In [None]:
EPOCHS = 10
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(EPOCHS):
    train_loss = 0
    test_loss = 0
    model.train()
    for batch in tqdm(train_dataloader):
        inputs, labels = batch

        inputs, labels = inputs.to(device), labels.to(device)
        output = model(inputs)
        loss = loss_fn(output, labels)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.inference_mode():
        for batch in test_dataloader:
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)
            output = model(inputs)
            loss = loss_fn(output, labels)
            test_loss += loss.item()

    print(f"Epoch {epoch}, train loss {train_loss / len(train_dataloader)}, test loss {test_loss / len(test_dataloader)}")
        
    MODEL_PATH = Path("models")
    MODEL_NAME = "model_train.pth"
    MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
    
    
    MODEL_PATH.mkdir(parents=True, exist_ok=True)
    #Save
    torch.save(obj=model.state_dict(),f=MODEL_SAVE_PATH)


100%|█████████████████████████████████████| 4470/4470 [6:41:31<00:00,  5.39s/it]


Epoch 0, train loss 552590.890072707, test loss 546420.9652288732


 53%|██████████████████▍                | 2354/4470 [3:30:07<3:30:39,  5.97s/it]