In [116]:
from ultralytics import YOLO, RTDETR
from pathlib import Path 
import cv2
from settings import ROOT_DIR
import torch
from dotenv import load_dotenv
import os
import requests
import urllib
from io import BytesIO
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets

%matplotlib inline

PROJECT_DIR = Path(ROOT_DIR)
os.chdir(PROJECT_DIR)
_ = load_dotenv()

In [2]:
# model_name = 'yolo_tile_best'
# model_path = PROJECT_DIR / 'best_wts' / f'{model_name}.torchscript'
# model = torch.jit.load(model_path)
yolo = YOLO('./best_wts/yolo_tile_best.pt')
detr = RTDETR('./best_wts/detr_full_frz_best.pt')


In [110]:
DEFAULTS = {
    'size': 640,
    'heading': 140,
    'pitch': 10,
    'fov': 50
}

In [114]:
def get_sv_img(
    location: str,  
    key_name: str = 'GOOGLE_KEY',
    size: int = DEFAULTS['size'],
    heading: int = DEFAULTS['heading'],
    pitch: int = DEFAULTS['pitch'],
    fov: int = DEFAULTS['fov']
) -> Image:
    img_params = {
        'location': location,
        'size': f'{size}x{size}',
        'heading': heading,
        'pitch': pitch,
        'fov': fov,
        'key': os.getenv(key_name)
    }
    r = requests.get('https://maps.googleapis.com/maps/api/streetview', params = urllib.parse.urlencode(img_params))
    img = Image.open(BytesIO(r.content))
    return img

def show_img(img, title = None, conv = None) -> None:
    if conv is not None:
        img = cv2.cvtColor(img, conv)
    fig = plt.imshow(img)
    if title is not None:
        fig.title(title)
    fig.tight_layout()
    fig.axis('off')
    # plt.show()
    return fig
    
def label_img(img, model) -> Image:
    pred = model.predict(img, device = 'cpu')[0].plot(labels = False)
    pred = cv2.cvtColor(pred, cv2.COLOR_BGR2RGB)
    return Image.fromarray(pred)

def compare_imgs(img1, img2, size = 640):
    cvs = Image.new('RGB', (size * 2, size))
    cvs.paste(img1, (0, 0))
    cvs.paste(img2, (size, 0))
    # cvs.show()
    return cvs

In [121]:
output = widgets.Output()
lbl_style = {'description_width': '200px'}
layout = widgets.Layout(width='600px')
location = widgets.Text(value='33rd & Loch Raven Baltimore MD',
                        description='Location',
                        layout=layout,
                        style=lbl_style)
size = widgets.IntSlider(value=DEFAULTS['size'],
                         min=100,
                         max=1024,
                         layout=layout,
                         style=lbl_style,
                         description='Image size')
heading = widgets.IntSlider(value=DEFAULTS['heading'],
                            min=0,
                            max=360,
                            layout=layout,
                            style=lbl_style,
                            description='Heading (rotation)')
pitch = widgets.IntSlider(value=DEFAULTS['pitch'],
                          min=0,
                          max=40,
                          layout=layout,
                          style=lbl_style,
                          description='Pitch (tilt)')
fov = widgets.IntSlider(value=DEFAULTS['fov'],
                        min=10,
                        max=120,
                        layout=layout,
                        style=lbl_style,
                        description='Field of view (zoom)')

button = widgets.Button(description='Get image')

display(location, size, heading, pitch, fov, button, output)


def button_click(b):
    img = get_sv_img(location=location.value,
                     size=size.value,
                     heading=heading.value,
                     pitch=pitch.value,
                     fov=fov.value)
    if img is not None:
        detr_pred = label_img(img, detr)
        yolo_pred = label_img(img, yolo)
        with output:
            output.clear_output()
            margin = 20
            cvs = Image.new('RGB', (size.value * 3, size.value))
            # draw = ImageDraw.Draw(cvs)
            # draw.text((0, 0), 'Original image')
            # draw.text((size.value, 0), 'DETR prediction')
            # draw.text((size.value * 2, 0), 'YOLO prediction')
            cvs.paste(img, (0, 0))
            cvs.paste(detr_pred, (size.value, 0))
            cvs.paste(yolo_pred, (size.value * 2, 0))
            print('Original image, DETR prediction, YOLO prediction')
            display(cvs)
            
    return img

sv_img = button.on_click(button_click)


Text(value='33rd & Loch Raven Baltimore MD', description='Location', layout=Layout(width='600px'), style=TextS…

IntSlider(value=640, description='Image size', layout=Layout(width='600px'), max=1024, min=100, style=SliderSt…

IntSlider(value=140, description='Heading (rotation)', layout=Layout(width='600px'), max=360, style=SliderStyl…

IntSlider(value=10, description='Pitch (tilt)', layout=Layout(width='600px'), max=40, style=SliderStyle(descri…

IntSlider(value=50, description='Field of view (zoom)', layout=Layout(width='600px'), max=120, min=10, style=S…

Button(description='Get image', style=ButtonStyle())

Output()


0: 640x640 2 surveillances, 478.0ms
Speed: 1.5ms preprocess, 478.0ms inference, 0.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 surveillance, 269.8ms
Speed: 1.2ms preprocess, 269.8ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)
