In [None]:
!pip install transformers
!pip install gradio


In [17]:
import gradio as gr
from io import BytesIO
from transformers import pipeline ,VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer
import torch
from PIL import Image
import os
import numpy as np
import nltk

In [None]:
#getting the modules 
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
feature_extractor = ViTFeatureExtractor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")

#checking if cuda is present to use it
device= torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

max_length = 16
num_beams = 7 #how many candidate sequences at each step
num_return_sequences = 4 # number of captions to generate by the decoder
gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "num_return_sequences": num_return_sequences}
#this a dictionary containing the arguments for the generate method of gp

def predict_step(images):
  pixel_values = feature_extractor(images=images, return_tensors="pt").pixel_values
  pixel_values = pixel_values.to(device)

  
  output_ids = model.generate(pixel_values, **gen_kwargs)

  preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
  preds = [pred.strip() for pred in preds]
  return "\n".join(preds)


In [None]:
nltk.download('stopwords')
from nltk.corpus import stopwords
hashtags=pipeline("summarization")

def generate_hashtags(captions):
  #little preprocessing befor summarizing
  cleaned_text = captions.replace('\n', '. ')
  joined_text = cleaned_text.rstrip('. ') + '.'
  #passing it to the summarizer
  hash=hashtags(joined_text, max_length=10, min_length=2, do_sample=False)
  hash=hash[0]['summary_text']
  hash = ' '.join([word for word in hash.split() if word.lower() not in stopwords.words('english')])
  return hash

def predict_and_generate(input_image):
    captions = predict_step(input_image)
    # cleaned_captions = clean_text(captions)
    hashtags = generate_hashtags(captions)
    return (captions,("#"+hashtags).replace(" "," #"))



In [21]:
input_image = gr.inputs.Image()
output_captions = gr.outputs.Textbox(label="Captions")
output_hashtags = gr.outputs.Textbox(label="Hashtags")
gr.Interface(fn=predict_and_generate, inputs=input_image, outputs=[output_captions, output_hashtags]).launch(debug=True,share=True)





Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://9f096b3aa44f61f99e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://9f096b3aa44f61f99e.gradio.live


