In [1]:
import numpy as np
import pandas as pd
import spacy
from collections import Counter

In [59]:
class Vocabolary:
    def __init__(self, threshold=5):
        self.itos = {0:"<PAD>",1:"<SOS>",2:"<EOS>",3:"<UNK>"}
        self.stoi = {v:k for k,v in self.itos.items()}
        self.freq_threshold = threshold
        self.spacy_eng = spacy.load("en_core_web_lg")
        
    def __len__(self):
        return len(self.itos)
    
    def tokenize(self, text):
        tokenized_text = self.spacy_eng.tokenizer(text)
        tokeinzed_text_lower = []
        for token in tokenized_text:
            tokeinzed_text_lower.append(token.text.lower())
        return tokeinzed_text_lower
    
    def build_vocab(self, text_list):
        frequencies = Counter()
        idx = 4
        for text in text_list:
            tokenized_text = self.tokenize(text)
            for word in tokenized_text:
                frequencies[word] +=1
                if frequencies[word] == self.freq_threshold :
                    self.stoi[word] = idx
                    self.itos[idx] = word
                    idx += 1
    
    def string_to_numerical(self, text):
        tokenized_text = self.tokenize(text)
        numeric_list = []
        for token in tokenized_text:
            if token in self.stoi:
                numeric_list.append(self.stoi[token])
            else:
                numeric_list.append(self.stoi['<UNK>'])
        return numeric_list

In [None]:
class LoadData:
    

In [3]:
data_df = pd.read_csv('../data/captions.txt')

Unnamed: 0,image,caption
0,1000268201_693b08cb0e.jpg,A child in a pink dress is climbing up a set o...
1,1000268201_693b08cb0e.jpg,A girl going into a wooden building .
2,1000268201_693b08cb0e.jpg,A little girl climbing into a wooden playhouse .
3,1000268201_693b08cb0e.jpg,A little girl climbing the stairs to her playh...
4,1000268201_693b08cb0e.jpg,A little girl in a pink dress going into a woo...


In [60]:
captions = data_df['caption'].tolist()

In [61]:
vocab_builder = Vocabolary()

In [62]:
vocab_builder.build_vocab(captions)

In [63]:
vocab_builder.string_to_numerical(captions[0])

[4, 28, 8, 4, 195, 151, 17, 32, 67, 4, 353, 11, 711, 8, 24, 3, 496, 5]

In [64]:
captions[0]

'A child in a pink dress is climbing up a set of stairs in an entry way .'

In [68]:
import torch

In [82]:
x = np.array([[
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
]])
y = x
print(x, x.shape)

[[[1 2 3]
  [4 5 6]
  [7 8 9]]] (1, 3, 3)


In [83]:
x_ten = torch.tensor(x)
y_ten = torch.tensor(y)
print(x_ten, x_ten.shape)

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]], dtype=torch.int32) torch.Size([1, 3, 3])


In [80]:
x_ten_sq = x_ten.unsqueeze(0)
y_ten_sq = y_ten.unsqueeze(0)
print(x_ten_sq, x_ten_sq.shape)

tensor([[[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]], dtype=torch.int32) torch.Size([1, 1, 3, 3])


In [81]:
d_cat = torch.cat([x_ten_sq, y_ten_sq], dim=0)
print(d_cat, d_cat.shape)

tensor([[[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]],


        [[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]], dtype=torch.int32) torch.Size([2, 1, 3, 3])
