In [1]:
import pandas as pd

import numpy as np

import random
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import string

In [17]:
vocab_size = 1000
max_len = 15

In [18]:
df = pd.read_csv("flickr1k/captions.csv")

In [19]:
df.head(5)

Unnamed: 0,image,caption
0,1000268201_693b08cb0e.jpg,A child in a pink dress is climbing up a set o...
1,1000268201_693b08cb0e.jpg,A girl going into a wooden building .
2,1000268201_693b08cb0e.jpg,A little girl climbing into a wooden playhouse .
3,1000268201_693b08cb0e.jpg,A little girl climbing the stairs to her playh...
4,1000268201_693b08cb0e.jpg,A little girl in a pink dress going into a woo...


In [20]:
df["caption"] = df.caption.apply(lambda line:  word_tokenize(line.lower()) )

In [21]:
df.head(5)

Unnamed: 0,image,caption
0,1000268201_693b08cb0e.jpg,"[a, child, in, a, pink, dress, is, climbing, u..."
1,1000268201_693b08cb0e.jpg,"[a, girl, going, into, a, wooden, building, .]"
2,1000268201_693b08cb0e.jpg,"[a, little, girl, climbing, into, a, wooden, p..."
3,1000268201_693b08cb0e.jpg,"[a, little, girl, climbing, the, stairs, to, h..."
4,1000268201_693b08cb0e.jpg,"[a, little, girl, in, a, pink, dress, going, i..."


In [22]:
words = []

for line in df.caption:
    words.extend(line)

In [23]:
len(words)

59960

In [24]:
words, counts = np.unique(words, return_counts=True)

In [25]:
len(words)

3290

In [26]:
words = words[counts.argsort()[-vocab_size:]]

In [27]:
bad = stopwords.words("english") + list(string.punctuation)

In [28]:
words = [word for word in words if word not in bad]

In [29]:
vocab_size = len(words)

In [30]:
vocab_size

927

In [31]:
vocab = dict(zip(words, range(1, vocab_size + 1)))

In [32]:
rev_vocab = dict(zip(range(1, vocab_size + 1), words))

In [33]:
sent = df.caption[2000]

In [34]:
" ".join(sent[1:-1])

'climber in an orange helmet is ascending attached to a rope whilst climbing a rock face'

In [35]:
" ".join([word for word in sent[1:-1] if word in vocab])

'climber orange helmet attached rope whilst climbing rock face'

In [36]:
# line = random.choice(df.caption)
# line = [vocab[word] for word in line if word in vocab]

# line

In [37]:
def data_gen(lines, batch_size=32):
    
    n = 0
    
    X, Y = [], []
    
    while True:
        
        line = random.choice(lines)
        line = [vocab[word] for word in line if word in vocab]
        if len(line) > 4:
            index = random.randint(0, len(line)-1)
            X.append(line[:index])
            Y.append(to_categorical(line[index], num_classes=vocab_size+1))
            n += 1

            if n % batch_size == 0:
                X_mod = pad_sequences(X, maxlen=max_len)
                y_mod = np.array(Y)
                X, Y = [], []
                yield [X_mod, y_mod]

    

In [38]:
gen = data_gen(df.caption)

In [39]:
data = next(gen)

In [40]:
data[1].shape

(32, 928)

In [41]:
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding
from tensorflow.keras.models import Model

In [42]:
in_layer = Input(shape=(max_len,))
embedding = Embedding(input_dim=vocab_size+1, output_dim=10)(in_layer)
rnn = LSTM(units=500)(embedding)
d1 = Dense(units=100, activation="tanh")(rnn)
d2 = Dense(units=10, activation="tanh")(d1)
out = Dense(units=vocab_size+1, activation="softmax")(d2)

In [43]:
model = Model(inputs=in_layer, outputs=out)

In [44]:
model.compile(optimizer="rmsprop", loss="categorical_crossentropy")

In [45]:
gen = data_gen(df.caption, batch_size=2000)

for i in range(5):
    data = next(gen)
    model.fit(data[0], data[1])


Train on 2000 samples
Train on 2000 samples
Train on 2000 samples
Train on 2000 samples
Train on 2000 samples


In [65]:
line = df.caption[500][:4]


In [66]:
line

['a', 'black', 'and', 'white']

In [67]:
line = [vocab[word] for word in line if word in vocab]

In [68]:
X_test = pad_sequences([line], maxlen=max_len)

In [69]:
model.predict(X_test).argmax(axis=1)

array([926])

In [70]:
rev_vocab[926]

'man'