In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch.optim as optim
import os
import csv
import json

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/Data/IC_dataset')

Mounted at /content/drive


In [None]:
from model_A import ImageCaptioner,EncoderCNN, DecoderRNN
import os
import csv

In [None]:
feature_size=1024
hidden_size=feature_size
learning_rate=3e-4

In [None]:
class TrainData(Dataset):
    def __init__(self, img_path, caption_path):
        # self.img_folder=image_path
        file_list = os.listdir(img_path)
        file_list = sorted(file_list, key=lambda x: int(x[6:-4]))
        self.image_path = []
        for file in file_list:
          path=os.path.join(img_path,file)
          self.image_path.append(path)

        self.captions=[]
        with open(caption_path, mode='r') as file:
          reader = csv.reader(file)
          for row in reader:
            self.captions.append(row)
        self.captions=self.captions[1:]
        self.captions=[arr[2] for arr in self.captions]
        # self.captions=self.captions[:10]

        self.processor = transforms.Compose([
          transforms.Resize((224, 224)),  # Resize image to match model input size
          transforms.ToTensor(),           # Convert image to tensor
          transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize image
        ])

        words = [word for string in self.captions for word in string.split()]
        unique_words = list(set(words))
        unique_words.sort()
        unique_words.append('<START>')
        unique_words.append('<EOS>')
        self.vocabulary=unique_words.copy()
        self.vocabulary={i:word for i,word in enumerate(self.vocabulary)}
        with open('/content/drive/MyDrive/Colab Notebooks/Data/IC_dataset/vocabulary.json', 'w') as json_file:
          json.dump(self.vocabulary, json_file)
        self.encoder=EncoderCNN()
        # self.vocabulary

    def __len__(self):
      return len(self.captions)

    def __getitem__(self, idx):
        img_name=self.image_path[idx]
        input_image = Image.open(img_name)
        if input_image.mode != 'RGB':
          input_image = input_image.convert('RGB')
        input_tensor = self.processor(input_image)
        input_batch = input_tensor.unsqueeze(0)
        with torch.no_grad():
          output = self.encoder.forward(input_batch)

        captions=self.captions[idx].split()

        indexes={word:i for i,word in self.vocabulary.items()}
        # print(indexes)
        start_token=indexes['<START>']
        caption_idx=[]
        caption_idx.append(start_token)
        word_idx_map=[indexes[word] for word in captions]
        caption_idx=caption_idx+word_idx_map
        caption_idx.append(indexes['<EOS>'])
        token = torch.tensor(caption_idx)
        token = token.unsqueeze(0)

        return {
            'feature':output,
            'caption':token
        }

In [None]:
caption_path="/content/drive/MyDrive/Colab Notebooks/Data/IC_dataset/train.csv"
img_path=r"/content/drive/MyDrive/Colab Notebooks/Data/IC_dataset/train"
train_data=TrainData(img_path,caption_path)
voc=train_data.vocabulary
data=train_data.__getitem__(87)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 138MB/s]


In [None]:
train_dataloader = DataLoader(train_data, batch_size = 1, shuffle = False)
data_dict={}
j=0
for i,batch in enumerate(train_dataloader):
  print(i ,"  shapes :- ",batch['feature'][0].shape," ",batch['caption'][0].shape)
  output_list = batch['feature'][0].tolist()
  token_list = batch['caption'][0].tolist()
  # Create a dictionary
  data = {
    'feature': output_list,
    'caption': token_list
  }
  data_dict[i]=data
  # Save the dictionary to a JSON file
  if i!=0 and i%1000==0:
    j=j+1
    with open(f'/content/drive/MyDrive/Colab Notebooks/Data/IC_dataset/train_data_{i/1000}.json', 'w') as json_file:
      json.dump(data_dict, json_file)
    data_dict={}
with open(f'/content/drive/MyDrive/Colab Notebooks/Data/IC_dataset/train_data_{j+1}.json', 'w') as json_file:
  json.dump(data_dict, json_file)


In [None]:
import json
merged_data = {}
# Loop through each JSON file
for i in range(1, 7):
    with open(f'/content/drive/MyDrive/Colab Notebooks/Data/IC_dataset/train_data_{i}.json', 'r') as file:
        data = json.load(file)  # Load JSON content into a dictionary
        merged_data.update(data)  # Merge the dictionaries
print(len(merged_data.keys()))
# Write the merged data into a new JSON file
with open('/content/drive/MyDrive/Colab Notebooks/Data/IC_dataset/train_data.json', 'w') as outfile:
    json.dump(merged_data, outfile, indent=4)  # Write merged data to the file with indentation