# BLIP-2.3
In this notebook we combine BLIP2 with GPT3.

In [2]:
from utils import *

import requests
from PIL import Image

import os
import torch
from transformers import AutoProcessor, Blip2ForConditionalGeneration

import openai

os.environ["CUDA_VISIBLE_DEVICES"]="0"
device = "cuda" if torch.cuda.is_available() else "cpu"

  from .autonotebook import tqdm as notebook_tqdm


#### Load model and processor
We can instantiate the model and its corresponding processor from the [hub](https://huggingface.co/models?other=blip-2).

In [3]:
name = "Salesforce/blip2-opt-2.7b"
processor = AutoProcessor.from_pretrained(name)
model = Blip2ForConditionalGeneration.from_pretrained(name, device_map="auto", load_in_8bit=True) # load in int8




Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/lcur1642/.conda/envs/dl2/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda113.so
CUDA SETUP: CUDA runtime path found: /home/lcur1642/.conda/envs/dl2/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 113
CUDA SETUP: Loading binary /home/lcur1642/.conda/envs/dl2/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda113.so...


Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)
Loading checkpoint shards: 100%|██████████| 2/2 [01:51<00:00, 55.99s/it]


#### Setup GPT3 API

In [4]:
openai_api_key = "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"
openai.api_key = openai_api_key

#### Load Image

In [5]:
url = 'https://storage.googleapis.com/sfr-vision-language-research/LAVIS/assets/merlion.png' 
image = Image.open(requests.get(url, stream=True).raw).convert('RGB')   
# display(image.resize((596, 437)))

### Prompt image

In [9]:
def BLIP2_image_captioning(image, model, processor, device, prompt=None):
    """"
    Takes as input an image, model, processor, device and (optionally) a prompt and
    returns a (prompted) caption for this image, created by BLIP2.
    """

    if prompt:
        inputs = processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
    else:
        inputs = processor(image, return_tensors="pt").to(device, torch.float16)

    generated_ids = model.generate(**inputs, max_new_tokens=20)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
    
    return generated_text


def prompt_instruct_gpt(prompt, max_tokens=64, temperature=0, stop=None):
  """
  Helper function for prompting the GPT3 instruct language model
  """

  response = openai.Completion.create(engine="text-davinci-002", prompt=prompt, max_tokens=max_tokens, temperature=temperature, stop=stop)
  return response["choices"][0]["text"].strip()

def GPT3_image_captioning(image, model, processor,device, prompt=None):
    """"
    Takes as input an image, model, processor, device and (optionally) a prompt and
    returns a (prompted) caption for this image, improved by GPT3.
    """
    generated_text = BLIP2_image_captioning(image, model, processor, device, prompt)

    prompt_GPT = f"Instruction: augment or improve the answer. If the given answer is factually wrong, correct it \
    in a similar answer style. \n Context: {prompt}. {generated_text}"

    return prompt_instruct_gpt(prompt_GPT)


def prompt_chat_gpt(prompt, max_tokens=64, temperature=0.4, stop=None):
  """
  Helper function for prompting the GPT3 chat-based language model
  """


  response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are an intelligent chatbot and rewrite answers of another question answering chatbot in a similar style"},
        {"role": "user", "content": ("User question: 'Can I wear this for my trip to Canada in December?' \
                chatbot answer: yes, it's a nice shirt and shorts, but it's a little too casual for a trip to Canada. ")},
        {"role": "assistant", "content": "No, a shirt and shorts are not suitable for the freezing temperatures in Canada during December."},
        {"role": "user", "content": prompt}
        ],
    max_tokens = 64)

  return response["choices"][0]['message']["content"].strip()




def chatGPT3_image_captioning(image, model, processor,device, prompt=None, example_input = None):
    """"
    Takes as input an image, model, processor, device and (optionally) a prompt and
    returns a (prompted) caption for this image, improved by GPT3.
    """

    if prompt:
      print(prompt)
      generated_text = BLIP2_image_captioning(image, model, processor, device, prompt)
      prompt_GPT = f"User question: {prompt}. Chatbot answer: {generated_text}"
    elif example_input:
      print(example_input)
      prompt_GPT = example_input  

    return prompt_chat_gpt(prompt_GPT)

In [10]:
prompt = "Question: Write a famous qiote said by this person Answer:"
example_input = ("User question: 'Write a famous quote said by this person' \
                chatbot answer: albert einstein - the world is a book, and those who do not travel read \
                only one page. ")
#caption_BLIP = BLIP2_image_captioning(image, model, processor, device, prompt)
#caption_GPT3 = GPT3_image_captioning(image, model, processor, device, prompt)

caption_chatGPT3 = chatGPT3_image_captioning(image, model, processor, device, example_input = example_input)

User question: 'Write a famous quote said by this person'                 chatbot answer: albert einstein - the world is a book, and those who do not travel read                 only one page. 


In [11]:
print(caption_GPT3)

This is Singapore.


In [12]:
print(caption_BLIP)

singapore


In [11]:
print(caption_chatGPT3)

Albert Einstein once said, "Logic will get you from A to B. Imagination will take you everywhere."
