In [1]:
import os
import io
from IPython.display import Image, display, HTML
from PIL import Image
import base64 
import requests, json

from dotenv import load_dotenv, find_dotenv
# read local .env file
_ = load_dotenv(find_dotenv()) 
hf_api_key = os.environ['HF_API_KEY']

In [2]:
#######################################################
# Here we are going to call multiple endpoints!
# - inputs: The data to be sent to the API.
# - parameters: Additional parameters that can be 
#   included in the API request (optional).
# - ENDPOINT_URL: The URL of the API endpoint 
#   (optional).
# 
# Note:
# - The function get_completion is designed to send a 
#   POST request to a specified API endpoint 
#   (ENDPOINT_URL). 
# - The term "multiple endpoints" might be used in the 
#   comment to imply that this function could be 
#   adapted to work with different API endpoints by 
#   calling it with different URLs.
#######################################################


In [2]:
import requests
import json

def get_completion(inputs, parameters=None, ENDPOINT_URL=""):
    # Set up headers for the API request, including the Authorization token
    headers = {
        "Authorization": f"Bearer {hf_api_key}",
        "Content-Type": "application/json"
    }

    # Prepare the data payload for the POST request
    data = {"inputs": inputs}

    # If parameters are provided, add them to the data payload
    if parameters is not None:
        data.update({"parameters": parameters})

    # Send a POST request to the specified endpoint URL
    response = requests.post(ENDPOINT_URL, headers=headers, data=json.dumps(data))

    # Check if the response is JSON or binary content
    try:
        # Try to parse and return the JSON response
        return response.json()
    except json.JSONDecodeError:
        # If it's binary data, handle it as needed
        return response.content  # Return raw binary if JSON decoding fails



In [4]:
#######################################################
# Step 1: text-to-image
#######################################################


In [3]:
TTI_ENDPOINT = os.environ['HF_API_TTI_BASE']
# image-to-text
ITT_ENDPOINT = os.environ['HF_API_ITT_BASE']

In [6]:
#######################################################
# Step 2: Building your game with `gr.Blocks()`
#######################################################


In [7]:
def image_to_base64_str(pil_image):
    byte_arr = io.BytesIO()
    pil_image.save(byte_arr, format='PNG')
    byte_arr = byte_arr.getvalue()
    return str(base64.b64encode(byte_arr).
          decode('utf-8'))

# def base64_to_pil(img_base64):
#     base64_decoded = base64.b64decode(img_base64)
#     byte_stream = io.BytesIO(base64_decoded)
#     pil_image = Image.open(byte_stream)
#     return pil_image

def base64_to_pil(img_base64):
    try:
        # Decode the base64 string
        base64_decoded = base64.b64decode(img_base64)
        # Load it into a BytesIO stream
        byte_stream = BytesIO(base64_decoded)
        # Attempt to open as an image
        pil_image = Image.open(byte_stream)
        pil_image.verify()  # Verify the image integrity
        pil_image = Image.open(byte_stream)  # Re-open for actual use if verified
        return pil_image
    except (base64.binascii.Error, PIL.UnidentifiedImageError) as e:
        print("Error decoding or identifying image:", e)
        return None  # Return None if there is an issue

# Lession 2: Image Captioning
def captioner(image):
    base64_image = image_to_base64_str(image)
    result = get_completion(base64_image, 
          None, ITT_ENDPOINT)
    print(result)
    return result[0]['generated_text']

# Lession 3: Image Generation App
def generate(prompt):
    output = get_completion(prompt, None, TTI_ENDPOINT)
    result_image = base64_to_pil(output)
    return result_image



In [8]:
#######################################################
# Step 4: First attempt, just captioning
#######################################################


In [9]:
import gradio as gr 
with gr.Blocks() as demo:
    gr.Markdown("# Describe-and-Generate game")
    image_upload = gr.Image(label=
            "Your first image",type="pil")
    btn_caption = gr.Button("Generate caption")
    caption = gr.Textbox(label="Generated caption")
    
    btn_caption.click(fn=captioner, 
          inputs=[image_upload], outputs=[caption])

gr.close_all()
demo.launch(share=True, server_port=
          int(os.environ['PORT1']))


Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://71d7001adfa333b37f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [10]:
#######################################################
# Step 5: Let's add generation
#######################################################


In [11]:
with gr.Blocks() as demo:
    gr.Markdown("# Describe-and-Generate game ")
    image_upload = gr.Image(label=
          "Your first image",type="pil")
    btn_caption = gr.Button("Generate caption")
    caption = gr.Textbox(label="Generated caption")
    btn_image = gr.Button("Generate image")
    image_output = gr.Image(label="Generated Image")
    btn_caption.click(fn=captioner, 
          inputs=[image_upload], outputs=[caption])
    btn_image.click(fn=generate, 
          inputs=[caption], outputs=[image_output])

gr.close_all()
demo.launch(share=True, 
          server_port=int(os.environ['PORT2']))

Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://b7ee4d27008fe025b3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Traceback (most recent call last):
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/site-packages/gradio/queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/site-packages/gradio/blocks.py", line 1935, in process_api
    result = await self.call_function(
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/site-packages/gradio/blocks.py", line 1520, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/

In [12]:
#######################################################
# Step 6: Doing it all at once! 
#        Lession 2: Image Captioning
#        Lession 3: Image Generation App
#######################################################


In [13]:
import gradio as gr 

def image_to_base64_str(pil_image):
    byte_arr = io.BytesIO()
    pil_image.save(byte_arr, format='PNG')
    byte_arr = byte_arr.getvalue()
    return str(base64.b64encode(byte_arr).
          decode('utf-8'))

def base64_to_pil(img_base64):
    base64_decoded = base64.b64decode(img_base64)
    byte_stream = io.BytesIO(base64_decoded)
    pil_image = Image.open(byte_stream)
    return pil_image

def captioner(image):
    base64_image = image_to_base64_str(image)
    result = get_completion(base64_image, 
          None, ITT_ENDPOINT)
    print(result[0])
    return result[0]['generated_text']

# Define the generate function to create an image based on the caption
def generate(caption):
    output = get_completion(caption, None, TTI_ENDPOINT)
    result_image = base64_to_pil(output)
    return result_image


# Function to generate caption and image based on uploaded image
def caption_and_generate(image):
    caption = captioner(image)  # Generate caption
    generated_image = generate(caption)  # Generate image based on caption
    return [caption, generated_image]

with gr.Blocks() as demo:
    gr.Markdown("# Describe-and-Generate game")
    image_upload = gr.Image(label=
          "Your first image",type="pil")
    btn_all = gr.Button("Caption and generate")
    caption = gr.Textbox(label="Generated caption")
    image_output = gr.Image(label="Generated Image")

    btn_all.click(fn=caption_and_generate, 
      inputs=[image_upload], 
          outputs=[caption, image_output])

gr.close_all()
demo.launch(share=True, 
          server_port=int(os.environ['PORT3']))

Running on local URL:  http://127.0.0.1:7863
Running on public URL: https://83c9602de3553a1096.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Traceback (most recent call last):
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/site-packages/gradio/queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/site-packages/gradio/blocks.py", line 1935, in process_api
    result = await self.call_function(
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/site-packages/gradio/blocks.py", line 1520, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/Users/jubaidatasnim/.pyenv/versions/3.9.9/lib/python3.9/

In [1]:
#test
import os
import io
import base64
import requests
import json
from PIL import Image
from dotenv import load_dotenv, find_dotenv
import gradio as gr

# Load API keys from .env file
_ = load_dotenv(find_dotenv())
hf_api_key = os.environ['HF_API_KEY']
TTI_ENDPOINT = os.environ['HF_API_TTI_BASE']
ITT_ENDPOINT = os.environ['HF_API_ITT_BASE']

# Helper function to make API request and handle JSON/binary responses
def get_completion(inputs, parameters=None, ENDPOINT_URL=""):
    headers = {
        "Authorization": f"Bearer {hf_api_key}",
        "Content-Type": "application/json"
    }
    data = {"inputs": inputs}
    if parameters is not None:
        data.update({"parameters": parameters})
    
    response = requests.post(ENDPOINT_URL, headers=headers, data=json.dumps(data))
    
    # Check if response is JSON or binary content
    try:
        return response.json()
    except json.JSONDecodeError:
        # If binary, return the raw content
        return response.content

# Function to convert PIL image to base64 string
def image_to_base64_str(pil_image):
    byte_arr = io.BytesIO()
    pil_image.save(byte_arr, format='PNG')
    byte_arr = byte_arr.getvalue()
    return base64.b64encode(byte_arr).decode('utf-8')

# Function to convert base64 string to PIL image
def base64_to_pil(img_base64):
    try:
        base64_decoded = base64.b64decode(img_base64)
        byte_stream = io.BytesIO(base64_decoded)
        pil_image = Image.open(byte_stream)
        return pil_image
    except (base64.binascii.Error, PIL.UnidentifiedImageError) as e:
        print("Error decoding image:", e)
        return None

# Image Captioning Function
def captioner(image):
    base64_image = image_to_base64_str(image)
    result = get_completion(base64_image, None, ITT_ENDPOINT)
    
    # Check if response is valid JSON and has 'generated_text' field
    if isinstance(result, list) and 'generated_text' in result[0]:
        return result[0]['generated_text']
    else:
        print("Error in captioning response:", result)
        return "Error generating caption."

# Image Generation Function
def generate(prompt):
    output = get_completion(prompt, None, TTI_ENDPOINT)
    
    # Check if the output is in JSON format or binary content
    if isinstance(output, bytes):
        try:
            # Directly interpret the binary response as an image
            byte_stream = io.BytesIO(output)
            result_image = Image.open(byte_stream)
            return result_image
        except (IOError, PIL.UnidentifiedImageError) as e:
            # Handle the error if the binary data is not a valid image
            print("Error interpreting binary image data:", e)
            return "Error: Unexpected binary data."
    elif isinstance(output, dict):
        # Check if the expected base64 image key is in the dictionary
        if 'generated_image_base64' in output:
            img_base64 = output['generated_image_base64']
            return base64_to_pil(img_base64)
        else:
            print("Error in image generation response:", output)
            return "Error generating image."
    else:
        print("Error: Unexpected response format.")
        return "Error: Unexpected response format from API."




# Gradio interface
def caption_and_generate(image):
    caption = captioner(image)
    generated_image = generate(caption)
    return [caption, generated_image]

with gr.Blocks() as demo:
    gr.Markdown("# Describe-and-Generate game")
    image_upload = gr.Image(label="Your first image", type="pil")
    btn_all = gr.Button("Caption and generate")
    caption = gr.Textbox(label="Generated caption")
    image_output = gr.Image(label="Generated Image")

    btn_all.click(fn=caption_and_generate, inputs=[image_upload], outputs=[caption, image_output])

demo.launch(share=True, server_port=int(os.environ['PORT3']))


Running on local URL:  http://127.0.0.1:7863
Running on public URL: https://5748e4d091d379c1cc.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Error: Response is binary but not in base64 format.


In [14]:
gr.close_all()

In [15]:
import requests

HF_API_KEY = "your_actual_huggingface_api_key_here"
API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base"
headers = {"Authorization": f"Bearer {HF_API_KEY}"}

response = requests.get(API_URL, headers=headers)
if response.status_code == 200:
    print("API Key is valid")
else:
    print(f"API Key issue: {response.text}")


API Key is valid
[{'generated_text': 'the virgin mountains covered in snow'}]
[{'generated_text': 'zion trail in zion canyon'}]
{'generated_text': 'the red rocks of zion zion zion zion zion zion zion zion zion zion zion zion zion zion zion'}
