# On Colab

### Download and install the Necessaries

In [1]:
!pip install transformers -q

[K     |████████████████████████████████| 1.8MB 8.1MB/s 
[K     |████████████████████████████████| 3.2MB 35.9MB/s 
[K     |████████████████████████████████| 890kB 53.5MB/s 
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone


In [2]:
!pip install SentencePiece

Collecting SentencePiece
[?25l  Downloading https://files.pythonhosted.org/packages/14/67/e42bd1181472c95c8cda79305df848264f2a7f62740995a46945d9797b67/sentencepiece-0.1.95-cp36-cp36m-manylinux2014_x86_64.whl (1.2MB)
[K     |████████████████████████████████| 1.2MB 8.3MB/s 
[?25hInstalling collected packages: SentencePiece
Successfully installed SentencePiece-0.1.95


In [3]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [4]:
with open('/gdrive/My Drive/foo.txt', 'w') as f:
  f.write('Hello Google Drive!')
!cat '/gdrive/My Drive/foo.txt'

Hello Google Drive!

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Import Necessaries

In [6]:
import pandas as pd
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration,Adafactor

### Finetune the Transformer on Food Data (Yummly28k)

In [None]:


train_df = pd.read_json('/gdrive/My Drive/Colab Notebooks/Files/yum28k_cap_eval_result.json')
train_df=train_df.rename(columns={'Actual Caption': 'ctext', 'Generated Caption': 'text'})
train_df=train_df.iloc[  :27000,:]
train_df=train_df.sample(frac = 1)
batch_size=8
num_of_batches=len(train_df)/batch_size

if torch.cuda.is_available():
   dev = torch.device("cuda:0")
   print("Running on the GPU")
else:
   dev = torch.device("cpu")
   print("Running on the CPU")
   
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base',return_dict=True)#moving the model to GPU
model.to(dev)

optimizer = Adafactor(model.parameters(),lr=1e-3,
                      eps=(1e-30, 1e-3),
                      clip_threshold=1.0,
                      decay_rate=-0.8,
                      beta1=None,
                      weight_decay=0.0,
                      relative_step=False,
                      scale_parameter=False,
                      warmup_init=False)


from IPython.display import HTML, display
def progress(loss,value, max=100):
 return HTML(""" Batch loss :{loss}      <progress    
value='{value}'max='{max}',style='width: 100%'>{value}
      </progress>             
              
              """.format(loss=loss,value=value, max=max))



#Sets the module in training mode
model.train()

num_of_epochs=20

loss_per_10_steps=[]
for epoch in range(1,num_of_epochs+1):
  print('Running epoch: {}'.format(epoch))
  
  running_loss=0

  out = display(progress(1, num_of_batches+1), display_id=True)
  for i in range(int(num_of_batches)):
    inputbatch=[]
    labelbatch=[]
    new_df=train_df[i*batch_size:i*batch_size+batch_size]
    for indx,row in new_df.iterrows():
      input = row['text']+'</s>' 
      labels = row['ctext']+'</s>'   
      inputbatch.append(input)
      labelbatch.append(labels)
    inputbatch=tokenizer.batch_encode_plus(inputbatch,padding=True,max_length=400,return_tensors='pt')["input_ids"]
    labelbatch=tokenizer.batch_encode_plus(labelbatch,padding=True,max_length=400,return_tensors="pt") ["input_ids"]
    inputbatch=inputbatch.to(dev)
    labelbatch=labelbatch.to(dev)

    # clear out the gradients of all Variables 
    optimizer.zero_grad()

    # Forward propogation
    outputs = model(input_ids=inputbatch, labels=labelbatch)
    loss = outputs.loss
    loss_num=loss.item()
    logits = outputs.logits
    running_loss+=loss_num
    if i%10 ==0:      
      loss_per_10_steps.append(loss_num)
    out.update(progress(loss_num,i, num_of_batches+1))

    # calculating the gradients
    loss.backward()

    #updating the params
    optimizer.step()
    
  running_loss=running_loss/int(num_of_batches)
  print('Epoch: {} , Running loss: {}'.format(epoch,running_loss))
 

ValueError: ignored

### Save the model

In [None]:
torch.save(model.state_dict(),'/gdrive/My Drive/Colab Notebooks/models/pytorch_model.bin')

### Load Saved Model

In [7]:
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model =T5ForConditionalGeneration.from_pretrained('/content/drive/My Drive/Colab Notebooks/models/', 
                                                return_dict=True)
def generate(text,model,tokenizer):
   model.eval()
   input_ids = tokenizer.encode("{} </s>".format(text), 
                               return_tensors="pt")  
   outputs = model.generate(input_ids)
   return tokenizer.decode(outputs[0])

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…




### NLG on Yummly28k

In [None]:
df = pd.read_json (r'/content/drive/MyDrive/Colab Notebooks/Files/yum28k_cap_eval_result.json')

In [None]:
df

Unnamed: 0,Photo_id,Actual Caption,Predicted Caption,Predicted Label,Generated Caption
0,img00001.jpg,<start> mushroom risotto <end>,egg noodles with cranberry rice <end>,Mushroom risotto,egg noodles with cranberry rice <end> Mushroom...
1,img00002.jpg,<start> filipino bbq pork skewers <end>,toasts shells chicken two <end>,Baked potatoes,toasts shells chicken two <end> Baked potatoes
2,img00003.jpg,<start> mushroom and roasted garlic risotto <end>,barbecued almonds a sprouts <end>,Mushroom risotto,barbecued almonds a sprouts <end> Mushroom ris...
3,img00004.jpg,<start> gratin dauphinois scalloped potatoes w...,spaghetti with garlic spread allrecipes <end>,Cheese pasta,spaghetti with garlic spread allrecipes <end> ...
4,img00005.jpg,<start> delicious grilled hamburgers allrecipe...,to smoked with chipotle sparkling <end>,Chickpeas Hamburger,to smoked with chipotle sparkling <end> Chickp...
...,...,...,...,...,...
27633,img27634.jpg,<start> saltimbocca alla romana <end>,praline peas garbanzo <end>,Boiled beef with green sauce,praline peas garbanzo <end> Boiled beef with g...
27634,img27635.jpg,<start> <unk> myrecipes <end>,slowly myrecipes <end>,Carrots soup,slowly myrecipes <end> Carrots soup
27635,img27636.jpg,<start> brioche <end>,onions pies down cake epicurious <end>,Toasted bread with taleggio and gorgonzola cheese,onions pies down cake epicurious <end> Toasted...
27636,img27637.jpg,<start> mexican hot chocolate doughnuts <end>,italian tiramisu <end>,Vegetable barley soup,italian tiramisu <end> Vegetable barley soup


'<pad> <unk> start> tapenade <unk> end></s>'

In [None]:
df["NLG"]=""
for ind in df.index:
  df["NLG"][ind]=generate(str(df["Generated Caption"][ind]),model,tokenizer)
  print(ind)

  f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated eos tokens being added."


0


  f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated eos tokens being added."


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
22638
22639
22640
22641
22642
22643
22644
22645
22646
22647
22648
22649
22650
22651
22652
22653
22654
22655
22656
22657
22658
22659
22660
22661
22662
22663
22664
22665
22666
22667
22668
22669
22670
22671
22672
22673
22674
22675
22676
22677
22678
22679
22680
22681
22682
22683
22684
22685
22686
22687
22688
22689
22690
22691
22692
22693
22694
22695
22696
22697
22698
22699
22700
22701
22702
22703
22704
22705
22706
22707
22708
22709
22710
22711
22712
22713
22714
22715
22716
22717
22718
22719
22720
22721
22722
22723
22724
22725
22726
22727
22728
22729
22730
22731
22732
22733
22734
22735
22736
22737
22738
22739
22740
22741
22742
22743
22744
22745
22746
22747
22748
22749
22750
22751
22752
22753
22754
22755
22756
22757
22758
22759
22760
22761
22762
22763
22764
22765
22766
22767
22768
22769
22770
22771
22772
22773
22774
22775
22776
22777
22778
22779
22780
22781
22782
22783
22784
22785
22786
22787
22788
22789
22790
22791
22792
22793

In [None]:
df

Unnamed: 0,Photo_id,Actual Caption,Predicted Caption,Predicted Label,Generated Caption,NLG
0,img00001.jpg,<start> mushroom risotto <end>,egg noodles with cranberry rice <end>,Mushroom risotto,egg noodles with cranberry rice <end> Mushroom...,<pad> <unk> start> egg noodles with cranberry ...
1,img00002.jpg,<start> filipino bbq pork skewers <end>,toasts shells chicken two <end>,Baked potatoes,toasts shells chicken two <end> Baked potatoes,<pad> <unk> start> filipino bbq pork skewers <...
2,img00003.jpg,<start> mushroom and roasted garlic risotto <end>,barbecued almonds a sprouts <end>,Mushroom risotto,barbecued almonds a sprouts <end> Mushroom ris...,<pad> <unk> start> mushroom and roasted garlic...
3,img00004.jpg,<start> gratin dauphinois scalloped potatoes w...,spaghetti with garlic spread allrecipes <end>,Cheese pasta,spaghetti with garlic spread allrecipes <end> ...,<pad> <unk> start> gratin dauphinois scalloped...
4,img00005.jpg,<start> delicious grilled hamburgers allrecipe...,to smoked with chipotle sparkling <end>,Chickpeas Hamburger,to smoked with chipotle sparkling <end> Chickp...,<pad> <unk> start> delicious grilled hamburger...
...,...,...,...,...,...,...
27633,img27634.jpg,<start> saltimbocca alla romana <end>,praline peas garbanzo <end>,Boiled beef with green sauce,praline peas garbanzo <end> Boiled beef with g...,<pad> <unk> start> saltimbocca alla romana <un...
27634,img27635.jpg,<start> <unk> myrecipes <end>,slowly myrecipes <end>,Carrots soup,slowly myrecipes <end> Carrots soup,<pad> <unk> start> croutons myrecipes <unk> en...
27635,img27636.jpg,<start> brioche <end>,onions pies down cake epicurious <end>,Toasted bread with taleggio and gorgonzola cheese,onions pies down cake epicurious <end> Toasted...,<pad> <unk> start> apple upside down cake epic...
27636,img27637.jpg,<start> mexican hot chocolate doughnuts <end>,italian tiramisu <end>,Vegetable barley soup,italian tiramisu <end> Vegetable barley soup,<pad> <unk> start> italian tiramisu <unk> end>...


In [None]:
#Save as json file
df.to_json(r'/content/drive/MyDrive/Colab Notebooks/Files/nlg_yummly_out.json')

### NLG on Yelp Captioned Images


In [8]:
dfyelpcap = pd.read_json (r'/content/drive/MyDrive/Colab Notebooks/ResultsDMA/yelp_cap_data_result.json')

In [9]:
dfyelpcap

Unnamed: 0,Photo_id,Actual Caption,Predicted Caption,Predicted Label,Generated Caption
0,002gqWfVal1D2DT6oA-bVQ.jpg,<start> 12oz boneless ribeye grilled to perfec...,magazine battered topped <end>,Red wine braised beef with potatoes,magazine battered topped <end> Red wine braise...
1,002GtTSYyQ4swIZyEnfF5A.jpg,<start> the most yummy tarts chantilly guava s...,biscuits egg filet <end>,Carrots with bay leaves,biscuits egg filet <end> Carrots with bay leaves
2,004a5VhdS6w9PaH0QmlX9g.jpg,<start> shrimp <end>,meatballs garlic and green beef <end>,Beef stew,meatballs garlic and green beef <end> Beef stew
3,005Pl2rEa-y9OfU8Nbcy4Q.jpg,<start> shrimp mussels crawfish crab claws pot...,pepperoni with was with grilled green cheesebu...,Pasta with mussels,pepperoni with was with grilled green cheesebu...
4,007FeerwcQQRx7ck_xA0QA.jpg,<start> surf and turf with 1 <unk> lobster and...,chocolate 'n soup bagel and stuffed compliment...,Raw vegetable buffet,chocolate 'n soup bagel and stuffed compliment...
...,...,...,...,...,...
58392,ZZ_WrOChc6eB3yPgWhbuWA.jpg,<start> fresh food everyday <end>,chocolate 19 my <end>,Fish cous cous,chocolate 19 my <end> Fish cous cous
58393,zZWWip7yNQIlLaTbYJC_IA.jpg,<start> creamed mussel <end>,green mcdonald's jerk it <unk> banh cookie bro...,Pasta with mussels,green mcdonald's jerk it <unk> banh cookie bro...
58394,zzXARW5UJhgRDZ20lb1CPg.jpg,<start> arancini classico <end>,burger but best bowl <end>,Chickpeas Hamburger,burger but best bowl <end> Chickpeas Hamburger
58395,zZykbO6ggfCpR5_Z8F5OHw.jpg,<start> khao soi poutine 9 95 <end>,burger tart fried and pepper <end>,Lemon chicken strips,burger tart fried and pepper <end> Lemon chick...


In [11]:
dfyelpcap["Generated Caption"]=""
dfyelpcap["Generated Caption"]=dfyelpcap["Predicted Caption"]+" "+dfyelpcap["Predicted Label"]

dfyelpcap['Generated Caption']=dfyelpcap['Generated Caption'].str.replace('<start>', '')

dfyelpcap['Generated Caption']=dfyelpcap['Generated Caption'].str.replace('<end>', '')

dfyelpcap['Generated Caption']=dfyelpcap['Generated Caption'].str.replace('<unk>', '')

In [12]:
dfyelpcap["NLG"]=""
for ind in dfyelpcap.index:
  dfyelpcap["NLG"][ind]=generate(str(dfyelpcap["Generated Caption"][ind]),model,tokenizer)
  print(ind)

  f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated eos tokens being added."


0


  f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated eos tokens being added."


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269


KeyboardInterrupt: ignored

In [13]:
dfyelpcap

Unnamed: 0,Photo_id,Actual Caption,Predicted Caption,Predicted Label,Generated Caption,NLG
0,002gqWfVal1D2DT6oA-bVQ.jpg,<start> 12oz boneless ribeye grilled to perfec...,magazine battered topped <end>,Red wine braised beef with potatoes,magazine battered topped Red wine braised bee...,<pad> <unk> start> delayed hand topped ribs <u...
1,002GtTSYyQ4swIZyEnfF5A.jpg,<start> the most yummy tarts chantilly guava s...,biscuits egg filet <end>,Carrots with bay leaves,biscuits egg filet Carrots with bay leaves,<pad> <unk> start> fudge egg satay <unk> end></s>
2,004a5VhdS6w9PaH0QmlX9g.jpg,<start> shrimp <end>,meatballs garlic and green beef <end>,Beef stew,meatballs garlic and green beef Beef stew,<pad> <unk> start> beef bourguignonne <unk> en...
3,005Pl2rEa-y9OfU8Nbcy4Q.jpg,<start> shrimp mussels crawfish crab claws pot...,pepperoni with was with grilled green cheesebu...,Pasta with mussels,pepperoni with was with grilled green cheesebu...,<pad> <unk> start> carbonara with pecorino and...
4,007FeerwcQQRx7ck_xA0QA.jpg,<start> surf and turf with 1 <unk> lobster and...,chocolate 'n soup bagel and stuffed compliment...,Raw vegetable buffet,chocolate 'n soup bagel and stuffed compliment...,<pad> <unk> start> chocolate dipped shortcakes...
...,...,...,...,...,...,...
58392,ZZ_WrOChc6eB3yPgWhbuWA.jpg,<start> fresh food everyday <end>,chocolate 19 my <end>,Fish cous cous,chocolate 19 my Fish cous cous,
58393,zZWWip7yNQIlLaTbYJC_IA.jpg,<start> creamed mussel <end>,green mcdonald's jerk it <unk> banh cookie bro...,Pasta with mussels,green mcdonald's jerk it banh cookie broth P...,
58394,zzXARW5UJhgRDZ20lb1CPg.jpg,<start> arancini classico <end>,burger but best bowl <end>,Chickpeas Hamburger,burger but best bowl Chickpeas Hamburger,
58395,zZykbO6ggfCpR5_Z8F5OHw.jpg,<start> khao soi poutine 9 95 <end>,burger tart fried and pepper <end>,Lemon chicken strips,burger tart fried and pepper Lemon chicken st...,


In [14]:
#Save as json file
dfyelpcap.to_json(r'/content/drive/MyDrive/Colab Notebooks/ResultsDMA/nlg_yelp_cap_out.json')

### NLG on Yelp Test Uncaptioned Images

In [15]:
dfyelptest = pd.read_json (r'/content/drive/MyDrive/Colab Notebooks/ResultsDMA/yelp_test_data_result.json')

In [16]:
dfyelptest

Unnamed: 0,Photo_id,Predicted Caption,Predicted Label,Generated Caption
0,003Isvt6NCJ567PdAEwrfA.jpg,chocolate french toast martha stewart <end>,Pork loin with apples,chocolate french toast martha stewart <end> Po...
1,006ZHlSqUxlUDaBN94O7Cw.jpg,strawberry crusted creamy alla apples <end>,Black beans,strawberry crusted creamy alla apples <end> Bl...
2,00aHWjZkA25JE4A_g74clA.jpg,chinese shells sauce winter recipes <end>,Red wine braised beef with potatoes,chinese shells sauce winter recipes <end> Red ...
3,00bbCUC3NZbS22ar-DKVQw.jpg,maple glazed pork blueberry epicurious <end>,Seasoned roasted pork,maple glazed pork blueberry epicurious <end> S...
4,00ezKE1-uUQBkgE_jxuf7g.jpg,barbecued thai <end>,Fish cous cous,barbecued thai <end> Fish cous cous
...,...,...,...,...
60195,Zzw6EWnLt-G05oAZrh0Yyg.jpg,coconut meatball and creme fagioli <end>,Tortellini with cream,coconut meatball and creme fagioli <end> Torte...
60196,zZW8jW-CcHNZmy7A6T1w9w.jpg,beef semifreddo <end>,Steamed green beans,beef semifreddo <end> Steamed green beans
60197,zZyaqif0CC1ymTir1gPx-Q.jpg,french croque roasted garlic recipe for cheesy...,Vegetable strudel,french croque roasted garlic recipe for cheesy...
60198,ZzYwp8nfpMxeeyU3iyAUmw.jpg,pizza glazed pecans my recipes <end>,Grilled scamorza cheese,pizza glazed pecans my recipes <end> Grilled s...


In [17]:
dfyelptest["NLG"]=""
for ind in dfyelptest.index:
  dfyelptest["NLG"][ind]=generate(str(dfyelptest["Generated Caption"][ind]),model,tokenizer)
  print(ind)

  f"This sequence already has {self.eos_token}. In future versions this behavior may lead to duplicated eos tokens being added."


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

KeyboardInterrupt: ignored

In [18]:
dfyelptest

Unnamed: 0,Photo_id,Predicted Caption,Predicted Label,Generated Caption,NLG
0,003Isvt6NCJ567PdAEwrfA.jpg,chocolate french toast martha stewart <end>,Pork loin with apples,chocolate french toast martha stewart <end> Po...,<pad> <unk> start> chocolate french toast mart...
1,006ZHlSqUxlUDaBN94O7Cw.jpg,strawberry crusted creamy alla apples <end>,Black beans,strawberry crusted creamy alla apples <end> Bl...,<pad> <unk> start> strawberry blueberry coconu...
2,00aHWjZkA25JE4A_g74clA.jpg,chinese shells sauce winter recipes <end>,Red wine braised beef with potatoes,chinese shells sauce winter recipes <end> Red ...,<pad> <unk> start> chinese soya sauce <unk> en...
3,00bbCUC3NZbS22ar-DKVQw.jpg,maple glazed pork blueberry epicurious <end>,Seasoned roasted pork,maple glazed pork blueberry epicurious <end> S...,<pad> <unk> start> apple glazed pork ribs epic...
4,00ezKE1-uUQBkgE_jxuf7g.jpg,barbecued thai <end>,Fish cous cous,barbecued thai <end> Fish cous cous,<pad> <unk> start> pad thai <unk> end></s>
...,...,...,...,...,...
60195,Zzw6EWnLt-G05oAZrh0Yyg.jpg,coconut meatball and creme fagioli <end>,Tortellini with cream,coconut meatball and creme fagioli <end> Torte...,
60196,zZW8jW-CcHNZmy7A6T1w9w.jpg,beef semifreddo <end>,Steamed green beans,beef semifreddo <end> Steamed green beans,
60197,zZyaqif0CC1ymTir1gPx-Q.jpg,french croque roasted garlic recipe for cheesy...,Vegetable strudel,french croque roasted garlic recipe for cheesy...,
60198,ZzYwp8nfpMxeeyU3iyAUmw.jpg,pizza glazed pecans my recipes <end>,Grilled scamorza cheese,pizza glazed pecans my recipes <end> Grilled s...,


In [19]:
#Save as json file
dfyelptest.to_json(r'/content/drive/MyDrive/Colab Notebooks/ResultsDMA/nlg_yelp_test_out.json')