In [None]:
import requests
import gradio as gr 
from PIL import Image, ImageDraw
import random

ENDPOINT = ""
API_KEY = ""
''' 
가능한 parameter 목록 
features : tags, read, caption, denseCaptions, smartCrops, objects, people
language : en, es, ja, pt, zh, ko(tags만 가능)
Content-Type : application/octet-stream, multipart/form-data, application/json
smartcrops-aspect-ratios : 스마트크롭시 수치 0.5 ~ 2.0
'''
def request_vision(features, image_path, language, smart_crop_ratio=0) :
    endpoint = f"{ENDPOINT}/computervision/imageanalysis:analyze"
    api_key = API_KEY

    params = {
        "api-version" : "2024-02-01",
        "features" : ','.join(features),
        "language" : language
        }
    
    if smart_crop_ratio >= 0.5 :
        params.update({"smartcrops-aspect-ratios" : smart_crop_ratio})
    
    headers = {
        "Ocp-Apim-Subscription-Key" : api_key,
        "Content-Type" : "application/octet-stream"
    }
    with open(image_path, "rb") as image_to_read :
        image_data = image_to_read.read()
    
    response = requests.post(endpoint, params=params, headers=headers, data=image_data)
    
    if response.status_code == 200 :
        response_json = response.json()
        return response_json
    else : 
        return None


In [None]:
def caption_result(response_json) :
    text = response_json['captionResult']['text']
    confidence = response_json['captionResult']['confidence']
    results = []
    results.append((text, confidence))
    return results

def tags_result(response_json) :
    values = response_json['tagsResult']['values']
    results = []
    for i in range(len(values)) :
        name = values[i]['name']
        confidence = values[i]['confidence']
        results.append((name, confidence))
    return results

def dense_result(response_json) :
    values = response_json['denseCaptionsResult']['values']
    results = []
    for i in range(len(values)) :
        text = values[i]['text']
        confidence = values[i]['confidence']
        boundingbox = values[i]['boundingBox']
        results.append((text, confidence, boundingbox))       
    return results 

def smartcrops_result(response_json) :
    print(response_json)
    aspectratio = response_json['smartCropsResult']['values'][0]['aspectRatio']
    boundingbox = response_json['smartCropsResult']['values'][0]['boundingBox']
    results = []
    results.append((aspectratio, boundingbox))
    return results

def object_result(response_json) :
    values = response_json['objectsResult']['values']
    results = []
    for i in range(len(values)) :
        name = values[i]['tags'][0]['name']
        confidence = values[i]['tags'][0]['confidence']
        boundingbox = values[i]['boundingBox']
        results.append((name, confidence, boundingbox))
    return results

def people_result(response_json) :
    values = response_json['peopleResult']['values']
    results = []
    for i in range(len(values)) :
        confidence = values[i]['confidence']
        boundingbox = values[i]['boundingBox']
        results.append((confidence, boundingbox))
    return results 
    

In [None]:
def random_color() :
    return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))

In [None]:
def draw_image(polygonlist, image_path) :    
    image = Image.open(image_path)
    draw = ImageDraw.Draw(image)
      
    # 폴리곤 그리기 
    for polygonbox in polygonlist :    # {'x': 0, 'y': 10, 'h': 20, 'w': 30},
        line_color = random_color()
        a = polygonbox['x'], polygonbox['y']
        b = polygonbox['x']+polygonbox['w'], polygonbox['y']
        c = polygonbox['x'], polygonbox['y']+polygonbox['h']
        d = polygonbox['x']+polygonbox['w'], polygonbox['y']+polygonbox['h']
        points = [a, b, d, c]   
        draw.polygon(points, outline=line_color, width=10) 
    
    return image

In [None]:

# 이미지 자르기 
def crop_image(image_path, image, crop_list) :
    if image_path :
        image = Image.open(image_path)
    #  ({'x': 126, 'y': 168, 'w': 2772, 'h': 3696})
    x1 = crop_list[0]['x']
    x2 = crop_list[0]['x'] + crop_list[0]['w']
    y1 = crop_list[0]['y']
    y2 = crop_list[0]['y'] + crop_list[0]['h']
    croped_image = image.crop((x1, y1, x2, y2))
    
    return croped_image


In [None]:
def features_selects(response_json, selects) :    
    results_sum = dict()
    polygonlist = []
    crop_list = []
    for select in selects :        
        if select == "caption" :
            results = caption_result(response_json)
            results_sum['caption'] = results        
        elif select == "tags" :
            results = tags_result(response_json)
            results_sum["tags"] = results                 
        elif select == "denseCaptions" :
            results = dense_result(response_json)
            results_sum["denseCaptions"] = [item[0:2] for item in results]     # (text, confidence, boundingbox)
            polygonlist.extend([item[-1] for item in results])    
        elif select == "smartCrops" :
            results = smartcrops_result(response_json)
            results_sum["smartCrops"] = [item[0] for item in results]      # (aspectratio, boundingbox)            
            crop_list.extend([item[-1] for item in results])  
        elif select == "objects" :
            results = object_result(response_json)                 # (name, confidence, boundingbox)
            results_sum["objects"] = [item[:2] for item in results]
            polygonlist.extend([item[-1] for item in results])
        elif select == "people" :
            results = people_result(response_json)
            results_sum["people"] = [item[0] for item in results]
            polygonlist.extend([item[-1] for item in results])      # (confidence, boundingbox)
        
    return results_sum, polygonlist, crop_list  


In [None]:
def results_refactoring(results_sum) :
    tags_result = '' 
    densecaptions_result = '' 
    objects_result = ''
    caption_result = '' 

    for key, value in results_sum.items() :
        if key == 'tags' :
            for items in value :
                tags_result += f'- {items[0]} (신뢰도 {items[1]:.3f})\n'
        elif key == 'denseCaptions' :
            for items in value :
                densecaptions_result += f'- {items[0]} (신뢰도 {items[1]:.3f})\n'
        elif key == 'objects' :
            for items in value :
                objects_result += f'- {items[0]} (신뢰도 {items[1]:.3f})\n'
        elif key == 'caption' :
            caption_result += f'- {value[0][0]} (신뢰도 {value[0][1]:.3f})\n'

    return tags_result, densecaptions_result, objects_result, caption_result

In [None]:
languages = ["en", "ko"]
features_en = ["tags", "caption", "denseCaptions", "smartCrops", "objects", "people"]
features_ko = ["tags"]

theme = gr.themes.Origin(
    primary_hue="rose",
    secondary_hue="red",
    neutral_hue="slate",
    text_size=gr.themes.Size(lg="17px", md="15px", sm="13px", xl="24px", xs="12px", xxl="28px", xxs="10px"),
    radius_size="lg",
    font=[gr.themes.GoogleFont('Gowun Batang'), gr.themes.GoogleFont('IBM Plex Sans KR '), gr.themes.GoogleFont('42dot Sans '), 'sans-serif'],
    font_mono=[gr.themes.GoogleFont('Gowun Batang'), gr.themes.GoogleFont('IBM Plex Sans KR '), gr.themes.GoogleFont('42dot Sans '), 'monospace'],
).set(
    body_background_fill='*background_fill_secondary',
    body_background_fill_dark='*neutral_800',    
    body_text_color='*neutral_700',
    body_text_size='*text_md',
    embed_radius='*radius_md',
    block_radius='*radius_md',
    block_title_radius='*radius_md',
    block_title_text_size='*text_md',
    container_radius='*radius_md',
    input_text_size='*text_sm',
    button_large_text_size='*text_md',
    form_gap_width='0px'     
)

with gr.Blocks(theme=theme) as demo :
    gr.Markdown("## AI vision studio demo", height='40px')
    ## 선택지
    with gr.Row() :
        with gr.Column() :
            gr.Markdown("#### 기능 선택", height='25px')
            select_language = gr.Radio(label="언어 선택", choices=languages, value="en")
            languages_detect = gr.Textbox(label=None, show_label=False, value="영어는 모든 기능이 가능합니다. 여러 기능을 선택할 수 있습니다.", visible=True)    
            select_features = gr.CheckboxGroup(label="기능 선택", choices=features_en, interactive=True)
            smart_crop_ratio = gr.Number(label="Smartcrop의 원하는 비율을 입력하세요. (0.5 ~ 2.0)", minimum=0.0, maximum=2.0, visible=False, value=None)
            clear_btn = gr.Button("전체 지우기")
        # 이미지 업로드
        with gr.Column() :
            gr.Markdown("#### 이미지 업로드", height='25px')                
            upload_image = gr.Image(label="이미지 업로드", width="500px", type="filepath")
            
    gr.Markdown("### 결과 확인", height='35px')     
    with gr.Row() :        
        # 결과
        with gr.Column() :        
            gr.Markdown("#### 이미지 바운딩 결과", height='25px')
            output_image = gr.Image(label="결과 이미지", type="pil")    
        with gr.Column() :
            gr.Markdown("#### 텍스트 결과", height='25px')
            # output_text = gr.Textbox()
            output_tags = gr.Textbox(label="Tag 결과", visible=False)
            output_dense = gr.Textbox(label="denseCations 결과", visible=False)
            output_objects = gr.Textbox(label="Obeject 결과", visible=False) 
            output_caption = gr.Textbox(label="Caption 결과", visible=False)     
                   



    def language_selects(select) :
        if select == "en" :
            return gr.update(choices=features_en), gr.update(value="영어는 모든 기능이 가능합니다. 여러 기능을 선택할 수 있습니다.", visible=True)                    
        else :
            return gr.update(choices=features_ko), gr.update(value="한국어는 tags 기능만 가능합니다.", visible=True)
        
    def select_smart_crop(select) :
        if "smartCrops" in select :
            return gr.update(visible=True)
        else :
            return gr.update(visible=False)
        
    def change_image(image_path, selects, language, smart_crop_ratio) :
        if image_path :
            response_json = request_vision(selects, image_path, language, smart_crop_ratio=smart_crop_ratio)
            resultssum, polygonlist, crop_list = features_selects(response_json, selects) 
            if crop_list and (not polygonlist):
                image = crop_image(image_path, None, crop_list)
            elif crop_list and polygonlist :                
                image = draw_image(polygonlist, image_path) 
                image = crop_image(None, image, crop_list)
            elif polygonlist and (not crop_list) :
                image = draw_image(polygonlist, image_path) 
            else :
                image = Image.open(image_path)
            tags_result, densecaptions_result, objects_result, caption_result = results_refactoring(resultssum)

            if tags_result :
                a = gr.update(visible=True)
            else :
                a = gr.update(visible=False)
            if densecaptions_result :
                b = gr.update(visible=True)
            else :
                b = gr.update(visible=False)
            if objects_result :
                c = gr.update(visible=True)
            else : 
                c = gr.update(visible=False)
            if caption_result :
                d = gr.update(visible=True)
            else :
                d = gr.update(visible=False)
                        
            return image, a, tags_result, b, densecaptions_result, c, objects_result, d, caption_result
        else :
            return None, None, None, None, None, None, None, None, None
    
    def textbox_visible_to_none(output_tags, output_dense, output_objects, output_caption) :
        if output_tags :
            a = gr.update(visible=True)
        else :
            a = gr.update(visible=False)
        if output_dense :
            b = gr.update(visible=True)
        else :
            b = gr.update(visible=False)
        if output_objects :
            c = gr.update(visible=True)
        else : 
            c = gr.update(visible=False)
        if output_caption :
            d = gr.update(visible=True)
        else :
            d = gr.update(visible=False)
        return a, b, c, d 
    
    def clear_total() :
        return None, None, None, None, None, None, None
    
    select_language.change(language_selects, inputs=[select_language], outputs=[select_features, languages_detect])
    select_features.change(select_smart_crop, inputs=[select_features], outputs=[smart_crop_ratio])
    upload_image.change(change_image, inputs=[upload_image, select_features, select_language, smart_crop_ratio], outputs=[output_image, output_tags, output_tags, output_dense, output_dense, output_objects, output_objects,  output_caption, output_caption])
    clear_btn.click(clear_total, outputs=[select_features, upload_image, output_image, output_tags, output_dense, output_objects, output_caption])
demo.launch()  