In [6]:
# Рекомендуемое окружение: Python 3.8+
!pip install torch torchvision transformers pillow numpy ultralytics tqdm
# Если планируете BLIP VQA из репозитория salesforce/BLIP:
!pip install git+https://github.com/salesforce/BLIP.git@main
# (опционально) если хотите yolov8:
!pip install ultralytics


pip is disabled in bash. Please use %pip magic.


Exception: Process exited with code 1

In [7]:
#!/usr/bin/env python3
# multimodel_llava_pipeline.py
# Объединяет: rotation correction (resnet50), object detection (DETR / optional YOLO),
# environment crop (нижняя часть под машиной), BLIP captioning + optional BLIP VQA (если есть).

import os
import sys
import json
from pathlib import Path
from typing import Dict, Any, List, Optional

import numpy as np
from PIL import Image
from tqdm import tqdm

import torch

# Transformers / BLIP
from transformers import BlipProcessor, BlipForConditionalGeneration, DetrImageProcessor, DetrForObjectDetection

# Optional modules
try:
    from ultralytics import YOLO
    _HAS_YOLO = True
except Exception:
    YOLO = None
    _HAS_YOLO = False

# Optional BLIP VQA from local BLIP repo (if installed via pip from github)
try:
    # from models.blip_vqa import blip_vqa  # this will work if BLIP repo is in python path
    from models.blip_vqa import blip_vqa  # type: ignore
    _HAS_BLIP_VQA = True
except Exception:
    blip_vqa = None
    _HAS_BLIP_VQA = False

# Optional rotation model (resnet50)
try:
    import torch.nn as nn
    from torchvision import models, transforms
    _HAS_TORCHVISION = True
except Exception:
    _HAS_TORCHVISION = False

# ========== Настройки (отредактируйте пути) ==========
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {DEVICE}")

IMAGE_FOLDER = "/home/jupyter/project/Ilya/dataset_images"  # <- поменяйте под себя
OUTPUT_JSON = "/home/jupyter/project/Ilya/multimodel_results.json"

# Если локально есть модель поворота (resnet50 trained with 4 classes 0/90/180/270)
ROTATION_MODEL_PATH = "../MODELS/resnet50_rotation_car_99.76.pth"  # если нет — пропустит шаг поворота

# Если у вас есть yolov8 модель (опционально) — укажите путь
YOLO_MODEL_PATH = "../MODELS/yolov8x-oiv7.pt"  # optional

# DETR threshold
DETR_THRESHOLD = 0.5

# Какие COCO классы считаем "машиной"
VEHICLE_CLASSES = {"car", "truck", "bus", "motorcycle", "train", "van", "taxi"}

# Вопрос для VQA (пример)
QUESTION = "Describe only the surroundings near the car, such as buildings, trees, street lights, signs, or pavement. Do not mention the car, vehicle, sky, or clouds."

# ======================================================

# ========== Загрузка моделей трансформеров ==========
print("Loading BLIP captioning...")
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(DEVICE)

print("Loading DETR (object detection)...")
detr_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
detr_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50").to(DEVICE)

# Попытка загрузить YOLO, если есть и файл указан
yolo_model = None
if _HAS_YOLO and YOLO is not None and os.path.exists(YOLO_MODEL_PATH):
    try:
        print("Loading YOLO model (optional)...")
        yolo_model = YOLO(YOLO_MODEL_PATH)
    except Exception as e:
        print("Не удалось загрузить YOLO:", e)
        yolo_model = None
else:
    if _HAS_YOLO:
        print("YOLO установлен, но путь к модели не найден или модель не указана:", YOLO_MODEL_PATH)
    else:
        print("ultralytics (YOLO) не установлен — пропускаем.")

# Попытка загрузить BLIP VQA (опционально)
blip_vqa_model = None
if _HAS_BLIP_VQA and blip_vqa is not None:
    try:
        print("Loading BLIP VQA (optional) from BLIP repo...")
        # пример download URL - если вы хотите использовать готовую pretrained weight, укажите его как в вашем примере
        # model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_vqa_capfilt_large.pth'
        # model = blip_vqa(pretrained=model_url, image_size=480, vit='base', med_config=...)
        # В этом обобщённом скрипте попытаемся создать модель без weights (пользователь обязан настроить)
        blip_vqa_model = None
        # Если вы хотите использовать BLIP VQA — загрузите вручную модель и замените blip_vqa_model = model
        print("BLIP VQA доступен, но автоматическая загрузка весов не настроена. Если хотите — укажите путь/URL и раскомментируйте загрузку.")
    except Exception as e:
        print("BLIP VQA не удалось инициализировать:", e)
        blip_vqa_model = None
else:
    print("BLIP VQA недоступен (BLIP repo не найден). VQA будет выполнен через BLIP captioning fallback.")

# ========== Rotation model (опционально) ==========
rotation_model = None
rotation_transform = None
angle_values = {0: 0, 1: 90, 2: 180, 3: 270}

if _HAS_TORCHVISION and os.path.exists(ROTATION_MODEL_PATH):
    try:
        print("Loading rotation model (resnet50)...")
        rotation_model = models.resnet50(weights=None)
        num_features = rotation_model.fc.in_features
        rotation_model.fc = nn.Linear(num_features, 4)
        rotation_model.load_state_dict(torch.load(ROTATION_MODEL_PATH, map_location=DEVICE))
        rotation_model = rotation_model.to(DEVICE)
        rotation_model.eval()

        rotation_transform = transforms = __import__("torchvision.transforms", fromlist=["transforms"]).transforms.Compose([
            __import__("torchvision.transforms", fromlist=["transforms"]).transforms.Resize((224, 224)),
            __import__("torchvision.transforms", fromlist=["transforms"]).transforms.ToTensor(),
            __import__("torchvision.transforms", fromlist=["transforms"]).transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                                           std=[0.229, 0.224, 0.225])
        ])
    except Exception as e:
        print("Не удалось загрузить rotation model:", e)
        rotation_model = None
else:
    print("Rotation model не найден или torchvision отсутствует — шаг поворота пропускается.")

# ========== Функции обработки ==========
def correct_rotation_if_available(pil_image: Image.Image) -> Image.Image:
    """Если модель поворота доступна, предсказывает класс и разворачивает изображение."""
    if rotation_model is None or rotation_transform is None:
        return pil_image
    try:
        input_tensor = rotation_transform(pil_image).unsqueeze(0).to(DEVICE)
        with torch.no_grad():
            out = rotation_model(input_tensor)
            pred = int(torch.argmax(out, dim=1).item())
        angle = angle_values.get(pred, 0)
        if angle != 0:
            return pil_image.rotate(-angle, expand=True)
        return pil_image
    except Exception as e:
        print("Rotation correction failed:", e)
        return pil_image

def detect_vehicles_detr(pil_image: Image.Image, threshold: float = DETR_THRESHOLD) -> List[np.ndarray]:
    """Возвращает список bbox'ов (xmin, ymin, xmax, ymax) обнаруженных как vehicles по DETR."""
    try:
        inputs = detr_processor(images=pil_image, return_tensors="pt").to(DEVICE)
        with torch.no_grad():
            outputs = detr_model(**inputs)
        target_sizes = torch.tensor([pil_image.size[::-1]]).to(DEVICE)
        results = detr_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=threshold)[0]
        boxes = []
        for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
            class_name = detr_model.config.id2label[int(label.item())]
            if class_name.lower() in VEHICLE_CLASSES:
                boxes.append(box.cpu().numpy())
        return boxes
    except Exception as e:
        print("DETR detection failed:", e)
        return []

def detect_vehicles_yolo(pil_image: Image.Image) -> List[np.ndarray]:
    """Если есть yolov8, возвращает list of boxes (xmin, ymin, xmax, ymax) для vehicle-like classes."""
    if yolo_model is None:
        return []
    try:
        results = yolo_model(pil_image, verbose=False)
        r = results[0]
        boxes = []
        for box in r.boxes:
            cls_id = int(box.cls)
            cls_name = r.names[cls_id].lower()
            if cls_name in VEHICLE_CLASSES or "car" in cls_name or "vehicle" in cls_name:
                xyxy = box.xyxy.cpu().numpy().tolist()  # [xmin, ymin, xmax, ymax]
                boxes.append(np.array(xyxy))
        return boxes
    except Exception as e:
        print("YOLO detection failed:", e)
        return []

def crop_to_environment_from_boxes(pil_image: Image.Image, vehicle_boxes: List[np.ndarray]) -> Image.Image:
    """Обрезает изображение, оставляя область под самым нижним автомобилем (или нижнюю половину, если нет box)."""
    arr = np.array(pil_image)
    h, w = arr.shape[:2]
    if not vehicle_boxes:
        return Image.fromarray(arr[h//2:, :])

    # Найдём самый нижний y_max
    ymaxs = [box[3] for box in vehicle_boxes]
    max_ymax = max(ymaxs)
    crop_y1 = int(max_ymax)
    if crop_y1 >= h:
        crop_y1 = h // 2
    cropped = arr[crop_y1:, :]
    if cropped.size == 0:
        cropped = arr[h//2:, :]
    return Image.fromarray(cropped)

def generate_captions(env_image: Image.Image) -> Dict[str, str]:
    """Генерирует три варианта подписи через BLIP."""
    try:
        inputs = blip_processor(env_image, return_tensors="pt").to(DEVICE)

        with torch.no_grad():
            outputs = blip_model.generate(**inputs, max_length=50, num_beams=5, early_stopping=True)
            base = blip_processor.decode(outputs[0], skip_special_tokens=True)

            detailed_outputs = blip_model.generate(**inputs, max_length=100, num_beams=7, length_penalty=2.0, early_stopping=True)
            detailed = blip_processor.decode(detailed_outputs[0], skip_special_tokens=True)

            alt_outputs = blip_model.generate(**inputs, max_length=60, do_sample=True, temperature=0.9, top_p=0.9)
            alt = blip_processor.decode(alt_outputs[0], skip_special_tokens=True)

        return {"base": base, "detailed": detailed, "alternative": alt}
    except Exception as e:
        print("BLIP caption failed:", e)
        return {"base": "Ошибка", "detailed": "Ошибка", "alternative": "Ошибка"}

def answer_vqa(env_image: Image.Image, question: str) -> str:
    """Пытается ответить на вопрос. Если BLIP VQA доступен — использует её, иначе выполняет fallback (BLIP caption + prompt)."""
    # 1) Если BLIP VQA модель доступна (пользователь должен настроить загрузку) — вызвать её.
    if blip_vqa_model is not None:
        try:
            # Placeholder: конкретный API зависит от реализации blip_vqa interface
            # Пример: answer = blip_vqa_model(image_tensor, [question], train=False, inference='generate')
            # return answer[0]
            return "VQA через BLIP VQA (локально) — ответ не настроен в примере."
        except Exception as e:
            print("BLIP VQA failed:", e)

    # 2) Fallback: сконкатенировать question с prompt и прогнать BLIP captioning (генеративно)
    try:
        # Для более качественного ответа формируем подсказку: добавим instruction + question
        instruction = f"Answer the question about the environment near the car. Question: {question}"
        # BLIP captioning expects just an image; мы попробуем передать instruction в decoder как prompt (форсируем через input_ids)
        # Простая стратегия: сгенерировать несколько caption'ов и вернуть наиболее подходящий (detailed)
        caps = generate_captions(env_image)
        # Простейший "fake VQA": вернуть detailed caption — пользователь может захотеть более точную реализацию
        return caps["detailed"]
    except Exception as e:
        print("VQA fallback failed:", e)
        return "Ошибка при попытке ответить на VQA."

# ========== Main loop ==========
def process_folder(image_folder: str, output_json: str):
    image_folder = Path(image_folder)
    assert image_folder.exists(), f"Папка не найдена: {image_folder}"

    image_extensions = {'.png', '.jpg', '.jpeg', '.bmp'}
    files = [p for p in sorted(image_folder.iterdir()) if p.suffix.lower() in image_extensions]
    print(f"Найдено изображений: {len(files)}")

    results: Dict[str, Any] = {}

    for i, p in enumerate(tqdm(files, desc="Processing")):
        fname = p.name
        try:
            pil = Image.open(p).convert("RGB")
        except Exception as e:
            print(f"Cannot open {p}: {e}")
            continue

        # 1) rotation
        pil_rot = correct_rotation_if_available(pil)

        # 2) detect vehicles (yolo optional, then DETR fallback)
        boxes = []
        if yolo_model is not None:
            boxes = detect_vehicles_yolo(pil_rot)
        if not boxes:
            boxes = detect_vehicles_detr(pil_rot)

        # 3) crop environment
        env_img = crop_to_environment_from_boxes(pil_rot, boxes)

        # 4) captions
        captions = generate_captions(env_img)

        # 5) vqa
        vqa_answer = answer_vqa(env_img, QUESTION)

        # 6) aggregate
        results[fname] = {
            "captions": captions,
            "vqa_answer": vqa_answer,
            "detected_vehicle_boxes": [box.tolist() for box in boxes],
            "rotated": pil_rot.size if pil_rot is not None else None,
            "env_crop_size": env_img.size
        }

        # краткий прогресс в stdout
        print(f"[{i+1}/{len(files)}] {fname}")
        print("  Base:", captions["base"])
        print("  VQA:", vqa_answer)
        print()

    # сохранить json
    os.makedirs(os.path.dirname(output_json), exist_ok=True)
    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

    print("Done. Results saved to:", output_json)
    return results

if __name__ == "__main__":
    process_folder(IMAGE_FOLDER, OUTPUT_JSON)


Device: cuda
Loading BLIP captioning...


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 8c545b3d-7987-4e8c-9fdc-1e3dc6616f68)')' thrown while requesting HEAD https://huggingface.co/Salesforce/blip-image-captioning-base/resolve/main/preprocessor_config.json
Retrying in 1s [Retry 1/5].


Loading DETR (object detection)...
YOLO установлен, но путь к модели не найден или модель не указана: ../MODELS/yolov8x-oiv7.pt
BLIP VQA недоступен (BLIP repo не найден). VQA будет выполнен через BLIP captioning fallback.
Loading rotation model (resnet50)...
Найдено изображений: 179


Processing:   1%|          | 1/179 [00:01<04:18,  1.45s/it]

[1/179] img_00000.jpeg
  Base: a white van parked on the side of a road
  VQA: a white van parked on the side of a road



Processing:   1%|          | 2/179 [00:02<03:51,  1.31s/it]

[2/179] img_00001.jpeg
  Base: a bus driving down a street next to a curb
  VQA: a white bus driving down the road



Processing:   2%|▏         | 3/179 [00:03<03:45,  1.28s/it]

[3/179] img_00002.jpeg
  Base: a white van parked on the side of the road
  VQA: a white van parked on the side of the road



Processing:   2%|▏         | 4/179 [00:05<03:53,  1.34s/it]

[4/179] img_00003.jpeg
  Base: a white van parked on the side of the road
  VQA: a white van parked on the side of the road



Processing:   3%|▎         | 5/179 [00:06<03:57,  1.36s/it]

[5/179] img_00004.jpeg
  Base: an abstract image of a tree in the woods
  VQA: a black and white photo of a man in a suit



Processing:   3%|▎         | 6/179 [00:08<03:58,  1.38s/it]

[6/179] img_00005.jpeg
  Base: a car parked on the side of a road
  VQA: a car is parked on the side of the road



Processing:   4%|▍         | 7/179 [00:09<04:11,  1.46s/it]

[7/179] img_00006.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:   4%|▍         | 8/179 [00:11<03:58,  1.40s/it]

[8/179] img_00007.jpeg
  Base: a car parked on the side of a road
  VQA: a car parked on the side of a road



Processing:   5%|▌         | 9/179 [00:12<03:59,  1.41s/it]

[9/179] img_00008.jpeg
  Base: a man in a suit and tie walking down a street
  VQA: a man in a suit and tie walking down a street



Processing:   6%|▌         | 10/179 [00:13<03:58,  1.41s/it]

[10/179] img_00010.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:   6%|▌         | 11/179 [00:15<03:59,  1.42s/it]

[11/179] img_00011.jpeg
  Base: a blurry image of a tree in the woods
  VQA: a black and white photo of a man in a suit



Processing:   7%|▋         | 12/179 [00:16<03:57,  1.42s/it]

[12/179] img_00012.jpeg
  Base: a person riding a bike down a street
  VQA: a person riding a bike on a city street



Processing:   7%|▋         | 13/179 [00:18<03:57,  1.43s/it]

[13/179] img_00013.jpeg
  Base: a woman in a black dress standing in front of a wall
  VQA: a woman with long hair standing in front of a wall



Processing:   8%|▊         | 14/179 [00:19<04:02,  1.47s/it]

[14/179] img_00014.jpeg
  Base: a person walking down the street with their feet on the ground
  VQA: a person walking down the street with their feet on the ground



Processing:   8%|▊         | 15/179 [00:21<04:13,  1.55s/it]

[15/179] img_00016.jpeg
  Base: a police car is parked on the side of the road
  VQA: a police car is parked on the side of the road



Processing:   9%|▉         | 16/179 [00:22<04:04,  1.50s/it]

[16/179] img_00017.jpeg
  Base: an image of a black background with a white border
  VQA: a close up of a black surface with a white background



Processing:   9%|▉         | 17/179 [00:24<04:03,  1.50s/it]

[17/179] img_00018.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  10%|█         | 18/179 [00:25<03:52,  1.45s/it]

[18/179] img_00019.jpeg
  Base: a car parked on the side of a road
  VQA: a car parked on the side of a road



Processing:  11%|█         | 19/179 [00:27<03:50,  1.44s/it]

[19/179] img_00020.jpeg
  Base: a close up of a black curtain with light coming through it
  VQA: a close up of a black curtain with light coming through



Processing:  11%|█         | 20/179 [00:28<03:50,  1.45s/it]

[20/179] img_00021.jpeg
  Base: a car driving down a street at night
  VQA: an image of a man in a suit and tie



Processing:  12%|█▏        | 21/179 [00:29<03:44,  1.42s/it]

[21/179] img_00022.jpeg
  Base: a black and white background with vertical lines
  VQA: a black and white wallpaper with vertical lines



Processing:  12%|█▏        | 22/179 [00:31<03:48,  1.45s/it]

[22/179] img_00023.jpeg
  Base: a red car parked on the side of a road
  VQA: a red car parked on the side of a road



Processing:  13%|█▎        | 23/179 [00:32<03:45,  1.44s/it]

[23/179] img_00024.jpeg
  Base: a black and white photo of a forest
  VQA: a black and white abstract background with vertical lines



Processing:  13%|█▎        | 24/179 [00:34<03:37,  1.41s/it]

[24/179] img_00025.jpeg
  Base: a car driving down the road in a tunnel
  VQA: a car driving down the road in a tunnel



Processing:  14%|█▍        | 25/179 [00:35<03:41,  1.44s/it]

[25/179] img_00026.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  15%|█▍        | 26/179 [00:37<03:36,  1.41s/it]

[26/179] img_00027.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  15%|█▌        | 27/179 [00:38<03:41,  1.46s/it]

[27/179] img_00030.jpeg
  Base: a person walking down a sidewalk with their feet on the ground
  VQA: a skateboarder rides down the street on his skateboard



Processing:  16%|█▌        | 28/179 [00:39<03:32,  1.41s/it]

[28/179] img_00031.jpeg
  Base: a person riding a skateboard down a street
  VQA: a skateboarder rides down the street in the city



Processing:  16%|█▌        | 29/179 [00:41<03:32,  1.42s/it]

[29/179] img_00032.jpeg
  Base: a man in a suit standing in front of a wall
  VQA: a man in a suit standing in front of a wall



Processing:  17%|█▋        | 30/179 [00:42<03:38,  1.47s/it]

[30/179] img_00033.jpeg
  Base: a man in a suit and tie standing next to a wall
  VQA: a man in a suit and tie standing next to a wall



Processing:  17%|█▋        | 31/179 [00:44<03:39,  1.48s/it]

[31/179] img_00034.jpeg
  Base: a man in a suit standing in front of a wall
  VQA: a man in a suit standing in front of a wall



Processing:  18%|█▊        | 32/179 [00:46<03:43,  1.52s/it]

[32/179] img_00035.jpeg
  Base: a man in a suit and tie standing next to a wall
  VQA: a man in a suit and tie standing next to a wall



Processing:  18%|█▊        | 33/179 [00:47<03:31,  1.45s/it]

[33/179] img_00036.jpeg
  Base: a black and white background with a silver stripe
  VQA: a black and white background with a silver stripe



Processing:  19%|█▉        | 34/179 [00:48<03:28,  1.44s/it]

[34/179] img_00037.jpeg
  Base: a man riding a skateboard down a street
  VQA: the shadow of a person on a skateboard



Processing:  20%|█▉        | 35/179 [00:50<03:22,  1.41s/it]

[35/179] img_00038.jpeg
  Base: a man riding a skateboard down a street
  VQA: a man riding a skateboard down a street



Processing:  20%|██        | 36/179 [00:51<03:30,  1.47s/it]

[36/179] img_00039.jpeg
  Base: an image of a man in the middle of a forest
  VQA: an image of a man in the middle of a forest



Processing:  21%|██        | 37/179 [00:53<03:27,  1.46s/it]

[37/179] img_00040.jpeg
  Base: a close up of a piece of wood on the ground
  VQA: a close up of a piece of wood next to a brick



Processing:  21%|██        | 38/179 [00:54<03:22,  1.43s/it]

[38/179] img_00041.jpeg
  Base: a black and white photo of a man in the woods
  VQA: a black and white photo of a forest with trees



Processing:  22%|██▏       | 39/179 [00:56<03:25,  1.47s/it]

[39/179] img_00042.jpeg
  Base: a man in a suit and tie standing in front of a wall
  VQA: a man in a suit and tie standing in front of a wall



Processing:  22%|██▏       | 40/179 [00:57<03:25,  1.48s/it]

[40/179] img_00043.jpeg
  Base: a close up view of the surface of the moon
  VQA: a close up view of the surface of the moon



Processing:  23%|██▎       | 41/179 [00:58<03:19,  1.44s/it]

[41/179] img_00044.jpeg
  Base: a car parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  23%|██▎       | 42/179 [01:00<03:20,  1.46s/it]

[42/179] img_00045.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  24%|██▍       | 43/179 [01:01<03:16,  1.45s/it]

[43/179] img_00046.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  25%|██▍       | 44/179 [01:03<03:09,  1.41s/it]

[44/179] img_00047.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  25%|██▌       | 45/179 [01:04<03:10,  1.42s/it]

[45/179] img_00048.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  26%|██▌       | 46/179 [01:05<03:05,  1.40s/it]

[46/179] img_00049.jpeg
  Base: a cat is sitting on the side of the road
  VQA: a car parked on the side of the road



Processing:  26%|██▋       | 47/179 [01:07<02:59,  1.36s/it]

[47/179] img_00050.jpeg
  Base: an abstract image of trees in the woods
  VQA: an abstract image of trees in the woods



Processing:  27%|██▋       | 48/179 [01:08<02:58,  1.36s/it]

[48/179] img_00052.jpeg
  Base: the shadow of a person standing on the ground
  VQA: the shadow of a person standing on a dirt road



Processing:  27%|██▋       | 49/179 [01:10<02:58,  1.38s/it]

[49/179] img_00053.jpeg
  Base: a car parked on the side of a road at night
  VQA: a car parked on the side of a road at night



Processing:  28%|██▊       | 50/179 [01:11<02:59,  1.39s/it]

[50/179] img_00054.jpeg
  Base: a black and white photo of a man in a suit and tie
  VQA: a brown paper bag on a black background



Processing:  28%|██▊       | 51/179 [01:12<03:00,  1.41s/it]

[51/179] img_00055.jpeg
  Base: a car driving down a street at night
  VQA: a car is parked on the side of the road



Processing:  29%|██▉       | 52/179 [01:14<02:59,  1.41s/it]

[52/179] img_00057.jpeg
  Base: a black and white photo of a woman's face
  VQA: a black and white photo of a woman's face



Processing:  30%|██▉       | 53/179 [01:15<03:00,  1.43s/it]

[53/179] img_00058.jpeg
  Base: a black and white photo of a black and white background
  VQA: a black and white photo of a black and white background



Processing:  30%|███       | 54/179 [01:17<02:58,  1.43s/it]

[54/179] img_00059.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  31%|███       | 55/179 [01:18<02:59,  1.44s/it]

[55/179] img_00062.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  31%|███▏      | 56/179 [01:20<03:00,  1.46s/it]

[56/179] img_00064.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  32%|███▏      | 57/179 [01:21<02:54,  1.43s/it]

[57/179] img_00065.jpeg
  Base: a person is walking down the stairs in the dark
  VQA: a person standing in the middle of a tunnel



Processing:  32%|███▏      | 58/179 [01:22<02:49,  1.40s/it]

[58/179] img_00066.jpeg
  Base: a man riding a bike down a city street
  VQA: a man riding a bike down a street



Processing:  33%|███▎      | 59/179 [01:24<02:47,  1.40s/it]

[59/179] img_00067.jpeg
  Base: an image of a man in a suit and tie
  VQA: a man in a suit and tie walking down the street



Processing:  34%|███▎      | 60/179 [01:25<02:45,  1.39s/it]

[60/179] img_00068.jpeg
  Base: a person riding a motorcycle down a road
  VQA: a person riding a motorcycle down a dirt road



Processing:  34%|███▍      | 61/179 [01:27<02:47,  1.42s/it]

[61/179] img_00069.jpeg
  Base: a black and white photo of a black and white photo
  VQA: a close up of a black and white background



Processing:  35%|███▍      | 62/179 [01:28<02:48,  1.44s/it]

[62/179] img_00070.jpeg
  Base: a close up of a metal tube
  VQA: a close up of a metal tube with a brown background



Processing:  35%|███▌      | 63/179 [01:29<02:40,  1.38s/it]

[63/179] img_00071.jpeg
  Base: a car is submerged in a flooded street
  VQA: a car is submerged in a flooded street



Processing:  36%|███▌      | 64/179 [01:31<02:41,  1.40s/it]

[64/179] img_00073.jpeg
  Base: a police car is parked on the side of the road
  VQA: a police car is parked on the side of the road



Processing:  36%|███▋      | 65/179 [01:32<02:41,  1.41s/it]

[65/179] img_00074.jpeg
  Base: a car is parked on the side of the road
  VQA: a car parked on the side of a road at night



Processing:  37%|███▋      | 66/179 [01:34<02:38,  1.40s/it]

[66/179] img_00076.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  37%|███▋      | 67/179 [01:35<02:37,  1.40s/it]

[67/179] img_00078.jpeg
  Base: a man riding a skateboard down a street
  VQA: a person riding on a skateboard down a street



Processing:  38%|███▊      | 68/179 [01:37<02:37,  1.42s/it]

[68/179] img_00079.jpeg
  Base: a car parked on the side of a road
  VQA: a car is parked on the side of the road



Processing:  39%|███▊      | 69/179 [01:38<02:33,  1.40s/it]

[69/179] img_00080.jpeg
  Base: a car parked on the side of a road
  VQA: a truck parked on the side of a road



Processing:  39%|███▉      | 70/179 [01:39<02:36,  1.43s/it]

[70/179] img_00081.jpeg
  Base: a car parked on the side of a road
  VQA: a black car parked on the side of a road



Processing:  40%|███▉      | 71/179 [01:41<02:35,  1.44s/it]

[71/179] img_00082.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  40%|████      | 72/179 [01:42<02:33,  1.43s/it]

[72/179] img_00084.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  41%|████      | 73/179 [01:44<02:41,  1.52s/it]

[73/179] img_00085.jpeg
  Base: a dark blue background with horizontal lines
  VQA: a purple and black background with a white border



Processing:  41%|████▏     | 74/179 [01:45<02:34,  1.47s/it]

[74/179] img_00086.jpeg
  Base: an image of the moon in the night sky
  VQA: an image of the moon in the night sky



Processing:  42%|████▏     | 75/179 [01:47<02:28,  1.43s/it]

[75/179] img_00087.jpeg
  Base: the shadow of a person walking down a street
  VQA: the shadow of a person walking down a street



Processing:  42%|████▏     | 76/179 [01:48<02:24,  1.40s/it]

[76/179] img_00088.jpeg
  Base: an asteroid is seen in this image taken by nasa astronauts
  VQA: an asteroid is seen in this image taken from nasa



Processing:  43%|████▎     | 77/179 [01:49<02:24,  1.41s/it]

[77/179] img_00089.jpeg
  Base: a man riding a skateboard down a street
  VQA: a man riding a skateboard down the side of a road



Processing:  44%|████▎     | 78/179 [01:51<02:24,  1.43s/it]

[78/179] img_00090.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  44%|████▍     | 79/179 [01:52<02:24,  1.45s/it]

[79/179] img_00091.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  45%|████▍     | 80/179 [01:54<02:20,  1.42s/it]

[80/179] img_00092.jpeg
  Base: a man in a suit and tie standing in a dark room
  VQA: a man in a suit and tie with a tie



Processing:  45%|████▌     | 81/179 [01:55<02:17,  1.41s/it]

[81/179] img_00093.jpeg
  Base: a car is parked on the street at night
  VQA: a car with a license plate on it's back



Processing:  46%|████▌     | 82/179 [01:57<02:15,  1.40s/it]

[82/179] img_00094.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  46%|████▋     | 83/179 [01:58<02:09,  1.35s/it]

[83/179] img_00095.jpeg
  Base: an image of a black and brown background
  VQA: a black and white background with a brown stripe



Processing:  47%|████▋     | 84/179 [01:59<02:09,  1.37s/it]

[84/179] img_00096.jpeg
  Base: a truck is parked on the side of the road
  VQA: a car that is parked on the side of the road



Processing:  47%|████▋     | 85/179 [02:01<02:08,  1.37s/it]

[85/179] img_00097.jpeg
  Base: a car parked on the side of a road
  VQA: a car is parked on the side of the road



Processing:  48%|████▊     | 86/179 [02:02<02:08,  1.38s/it]

[86/179] img_00098.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  49%|████▊     | 87/179 [02:03<02:09,  1.41s/it]

[87/179] img_00099.jpeg
  Base: a car parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  49%|████▉     | 88/179 [02:05<02:09,  1.42s/it]

[88/179] img_00100.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  50%|████▉     | 89/179 [02:06<02:07,  1.42s/it]

[89/179] img_00101.jpeg
  Base: a white car parked on the side of the road
  VQA: a white car parked on the side of the road



Processing:  50%|█████     | 90/179 [02:08<02:02,  1.38s/it]

[90/179] img_00102.jpeg
  Base: the shadow of a person walking on a sidewalk
  VQA: the shadow of a person walking on a sidewalk



Processing:  51%|█████     | 91/179 [02:09<01:58,  1.34s/it]

[91/179] img_00103.jpeg
  Base: a tire on the side of a road
  VQA: a tire on the side of a road



Processing:  51%|█████▏    | 92/179 [02:10<01:55,  1.33s/it]

[92/179] img_00104.jpeg
  Base: a car driving through a tunnel in the rain
  VQA: a car driving through a tunnel in the rain



Processing:  52%|█████▏    | 93/179 [02:12<01:55,  1.35s/it]

[93/179] img_00105.jpeg
  Base: a black cat sitting on the side of a road
  VQA: a black cat sitting on the side of a road



Processing:  53%|█████▎    | 94/179 [02:13<01:56,  1.37s/it]

[94/179] img_00106.jpeg
  Base: a red car parked on the side of a road
  VQA: a red car parked on the side of a road



Processing:  53%|█████▎    | 95/179 [02:14<01:58,  1.41s/it]

[95/179] img_00107.jpeg
  Base: a car driving through a tunnel on a road
  VQA: a car driving through a tunnel on a road



Processing:  54%|█████▎    | 96/179 [02:16<01:58,  1.43s/it]

[96/179] img_00108.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  54%|█████▍    | 97/179 [02:17<01:56,  1.42s/it]

[97/179] img_00109.jpeg
  Base: a close up of a black and white carpet
  VQA: a person walking down a sidewalk with a cell phone



Processing:  55%|█████▍    | 98/179 [02:19<01:54,  1.42s/it]

[98/179] img_00110.jpeg
  Base: a close up of a skateboard on the ground
  VQA: a close up of a skateboard on the ground



Processing:  55%|█████▌    | 99/179 [02:20<01:54,  1.43s/it]

[99/179] img_00111.jpeg
  Base: a person walking down a sidewalk with their feet on the ground
  VQA: a person walking down a sidewalk with their feet on the ground



Processing:  56%|█████▌    | 100/179 [02:22<01:51,  1.41s/it]

[100/179] img_00112.jpeg
  Base: a close up of a skateboard on the ground
  VQA: a close up of a skateboard on the ground



Processing:  56%|█████▋    | 101/179 [02:23<01:49,  1.41s/it]

[101/179] img_00113.jpeg
  Base: a car parked on the side of a road
  VQA: a car is parked on the side of the road



Processing:  57%|█████▋    | 102/179 [02:24<01:49,  1.43s/it]

[102/179] img_00114.jpeg
  Base: a skateboarder riding down the street with his board
  VQA: a skateboarder riding down the street with his board



Processing:  58%|█████▊    | 103/179 [02:26<01:47,  1.41s/it]

[103/179] img_00115.jpeg
  Base: a person standing on a sidewalk next to a car
  VQA: a person standing on a sidewalk next to a car



Processing:  58%|█████▊    | 104/179 [02:27<01:45,  1.41s/it]

[104/179] img_00116.jpeg
  Base: two bags sitting on the grass next to each other bags
  VQA: a man sitting in the grass next to a bag



Processing:  59%|█████▊    | 105/179 [02:29<01:42,  1.39s/it]

[105/179] img_00117.jpeg
  Base: an image of a black and white striped background
  VQA: a close up of a black and white striped background



Processing:  59%|█████▉    | 106/179 [02:30<01:41,  1.40s/it]

[106/179] img_00118.jpeg
  Base: an image of a black and white striped background
  VQA: a black and white image of a black and white image



Processing:  60%|█████▉    | 107/179 [02:31<01:39,  1.38s/it]

[107/179] img_00119.jpeg
  Base: a black cat is laying on the ground
  VQA: a black and white cat laying on the ground



Processing:  60%|██████    | 108/179 [02:33<01:41,  1.42s/it]

[108/179] img_00120.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a person on a skateboard



Processing:  61%|██████    | 109/179 [02:34<01:37,  1.39s/it]

[109/179] img_00121.jpeg
  Base: a man riding a motorcycle down a street
  VQA: a man riding a motorcycle down a street



Processing:  61%|██████▏   | 110/179 [02:36<01:39,  1.44s/it]

[110/179] img_00122.jpeg
  Base: a man in a suit and tie standing in front of a wall
  VQA: a man in a suit and tie standing in front of a wall



Processing:  62%|██████▏   | 111/179 [02:37<01:34,  1.39s/it]

[111/179] img_00123.jpeg
  Base: a water fountain in the middle of a park
  VQA: a water fountain in the middle of a park



Processing:  63%|██████▎   | 112/179 [02:38<01:34,  1.41s/it]

[112/179] img_00124.jpeg
  Base: a man in a suit and tie walking down the street
  VQA: a man in a suit and tie walking down the street



Processing:  63%|██████▎   | 113/179 [02:40<01:31,  1.39s/it]

[113/179] img_00125.jpeg
  Base: a man riding a skateboard down a street
  VQA: a person riding on a skateboard down a street



Processing:  64%|██████▎   | 114/179 [02:41<01:29,  1.38s/it]

[114/179] img_00126.jpeg
  Base: a car driving down a street next to a tall building
  VQA: a car driving through a tunnel on a road



Processing:  64%|██████▍   | 115/179 [02:43<01:29,  1.40s/it]

[115/179] img_00128.jpeg
  Base: a man in a suit standing in front of a door
  VQA: a man in a suit standing in front of a door



Processing:  65%|██████▍   | 116/179 [02:44<01:29,  1.43s/it]

[116/179] img_00129.jpeg
  Base: a car that is upside on the side of the road
  VQA: a car that is upside on the side of the road



Processing:  65%|██████▌   | 117/179 [02:45<01:28,  1.42s/it]

[117/179] img_00130.jpeg
  Base: a white motorcycle parked on the side of a road
  VQA: a motorcycle parked on the side of a road at night



Processing:  66%|██████▌   | 118/179 [02:47<01:29,  1.47s/it]

[118/179] img_00131.jpeg
  Base: a man in a suit and tie standing in a dark room
  VQA: a man in a suit and tie standing in a dark room



Processing:  66%|██████▋   | 119/179 [02:48<01:27,  1.46s/it]

[119/179] img_00132.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  67%|██████▋   | 120/179 [02:50<01:25,  1.45s/it]

[120/179] img_00133.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  68%|██████▊   | 121/179 [02:51<01:23,  1.44s/it]

[121/179] img_00134.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  68%|██████▊   | 122/179 [02:53<01:20,  1.42s/it]

[122/179] img_00135.jpeg
  Base: a car parked on the side of a road
  VQA: a car parked on the side of a road



Processing:  69%|██████▊   | 123/179 [02:54<01:17,  1.39s/it]

[123/179] img_00136.jpeg
  Base: a dark brown metal texture background
  VQA: a dark brown background with some light spots



Processing:  69%|██████▉   | 124/179 [02:56<01:19,  1.44s/it]

[124/179] img_00137.jpeg
  Base: a man in a suit and tie standing in front of a window
  VQA: a man in a suit standing in front of a black background



Processing:  70%|██████▉   | 125/179 [02:57<01:18,  1.45s/it]

[125/179] img_00139.jpeg
  Base: a light shining through a window in a dark room
  VQA: a light shines through a window in a dark room



Processing:  70%|███████   | 126/179 [02:58<01:15,  1.43s/it]

[126/179] img_00140.jpeg
  Base: a heart shaped hole in the wall of a building
  VQA: a heart shaped hole in the wall of a building



Processing:  71%|███████   | 127/179 [03:00<01:14,  1.43s/it]

[127/179] img_00141.jpeg
  Base: a heart shaped shadow on the wall of a building
  VQA: an image of a heart in the middle of a wall



Processing:  72%|███████▏  | 128/179 [03:01<01:11,  1.40s/it]

[128/179] img_00142.jpeg
  Base: a black and white striped background
  VQA: an abstract black and white background with vertical lines



Processing:  72%|███████▏  | 129/179 [03:03<01:09,  1.39s/it]

[129/179] img_00143.jpeg
  Base: an image of the moon in the night sky
  VQA: an image of the moon in the night sky



Processing:  73%|███████▎  | 130/179 [03:04<01:10,  1.44s/it]

[130/179] img_00144.jpeg
  Base: a car is parked on the side of the road
  VQA: a car that is parked on the side of the road



Processing:  73%|███████▎  | 131/179 [03:05<01:08,  1.42s/it]

[131/179] img_00145.jpeg
  Base: a car parked on the side of a road
  VQA: a black car parked on the side of a road



Processing:  74%|███████▎  | 132/179 [03:07<01:09,  1.48s/it]

[132/179] img_00146.jpeg
  Base: a car parked on the side of a road
  VQA: a car that is parked on the side of the road



Processing:  74%|███████▍  | 133/179 [03:09<01:08,  1.50s/it]

[133/179] img_00147.jpeg
  Base: a car is parked on the side of the road
  VQA: a car that is parked on the side of the road



Processing:  75%|███████▍  | 134/179 [03:10<01:05,  1.46s/it]

[134/179] img_00148.jpeg
  Base: a close up of a black and red curtain
  VQA: a black and red striped background with a white stripe



Processing:  75%|███████▌  | 135/179 [03:12<01:13,  1.67s/it]

[135/179] img_00149.jpeg
  Base: a man in a suit and tie walking down a street
  VQA: a man in a suit and tie walking down a street



Processing:  76%|███████▌  | 136/179 [03:14<01:08,  1.60s/it]

[136/179] img_00150.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  77%|███████▋  | 137/179 [03:15<01:05,  1.56s/it]

[137/179] img_00151.jpeg
  Base: a person riding a skateboard down a street
  VQA: a person riding a skateboard down a city street



Processing:  77%|███████▋  | 138/179 [03:16<01:01,  1.51s/it]

[138/179] img_00152.jpeg
  Base: a car parked on the side of a road
  VQA: a black car parked on the side of a road



Processing:  78%|███████▊  | 139/179 [03:18<00:59,  1.50s/it]

[139/179] img_00153.jpeg
  Base: an empty street in the middle of a city
  VQA: an empty street in the middle of a city



Processing:  78%|███████▊  | 140/179 [03:19<00:58,  1.50s/it]

[140/179] img_00154.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a person in a dark room



Processing:  79%|███████▉  | 141/179 [03:21<00:55,  1.46s/it]

[141/179] img_00155.jpeg
  Base: a black car parked on the side of a road
  VQA: a black car parked on the side of a road



Processing:  79%|███████▉  | 142/179 [03:22<00:53,  1.44s/it]

[142/179] img_00156.jpeg
  Base: a car parked on the side of a road
  VQA: a white van parked on the side of a road



Processing:  80%|███████▉  | 143/179 [03:24<00:51,  1.44s/it]

[143/179] img_00157.jpeg
  Base: a man riding a motorcycle down a city street
  VQA: a man riding a motorcycle down a city street



Processing:  80%|████████  | 144/179 [03:25<00:49,  1.41s/it]

[144/179] img_00160.jpeg
  Base: a red, white and blue striped background
  VQA: a red, white and blue striped background



Processing:  81%|████████  | 145/179 [03:26<00:47,  1.40s/it]

[145/179] img_00161.jpeg
  Base: a black and white photo of a man in a suit
  VQA: an image of a black and white striped wallpaper



Processing:  82%|████████▏ | 146/179 [03:28<00:47,  1.44s/it]

[146/179] img_00162.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white image of a man's face



Processing:  82%|████████▏ | 147/179 [03:29<00:46,  1.45s/it]

[147/179] img_00163.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  83%|████████▎ | 148/179 [03:31<00:44,  1.43s/it]

[148/179] img_00164.jpeg
  Base: an abstract image of trees in the woods
  VQA: an abstract image of a tree in the woods



Processing:  83%|████████▎ | 149/179 [03:32<00:42,  1.42s/it]

[149/179] img_00165.jpeg
  Base: a man is standing in the middle of a road
  VQA: a hole in the ground that has been dug



Processing:  84%|████████▍ | 150/179 [03:34<00:41,  1.44s/it]

[150/179] img_00168.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  84%|████████▍ | 151/179 [03:35<00:40,  1.45s/it]

[151/179] img_00169.jpeg
  Base: a motorcycle parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  85%|████████▍ | 152/179 [03:37<00:39,  1.48s/it]

[152/179] img_00170.jpeg
  Base: a black and white photograph of a man in a suit
  VQA: a black and white photograph of a man in a suit



Processing:  85%|████████▌ | 153/179 [03:38<00:37,  1.46s/it]

[153/179] img_00171.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  86%|████████▌ | 154/179 [03:40<00:36,  1.46s/it]

[154/179] img_00172.jpeg
  Base: a blue car parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  87%|████████▋ | 155/179 [03:41<00:34,  1.45s/it]

[155/179] img_00173.jpeg
  Base: a person walking down a sidewalk next to a building
  VQA: a person walking down a sidewalk next to a building



Processing:  87%|████████▋ | 156/179 [03:42<00:33,  1.47s/it]

[156/179] img_00174.jpeg
  Base: a car is parked on the side of the road
  VQA: a car is parked on the side of the road



Processing:  88%|████████▊ | 157/179 [03:44<00:32,  1.47s/it]

[157/179] img_00175.jpeg
  Base: a person riding a skateboard down a street
  VQA: a man riding a skateboard down a city street



Processing:  88%|████████▊ | 158/179 [03:45<00:30,  1.46s/it]

[158/179] img_00177.jpeg
  Base: a dog is standing on the sidewalk in the dark
  VQA: a black and white dog laying on the ground



Processing:  89%|████████▉ | 159/179 [03:47<00:29,  1.46s/it]

[159/179] img_00178.jpeg
  Base: a broken umbrella sitting on the side of a road
  VQA: a street sign sitting on the side of a road



Processing:  89%|████████▉ | 160/179 [03:48<00:28,  1.47s/it]

[160/179] img_00179.jpeg
  Base: a close up of a metal tube
  VQA: a close up of a metal tube with a white background



Processing:  90%|████████▉ | 161/179 [03:50<00:26,  1.45s/it]

[161/179] img_00180.jpeg
  Base: a man riding a skateboard down a street
  VQA: a person riding on a skateboard down a street



Processing:  91%|█████████ | 162/179 [03:51<00:24,  1.46s/it]

[162/179] img_00181.jpeg
  Base: a man in a suit and tie walking down the street
  VQA: a man in a suit and tie is walking down the stairs



Processing:  91%|█████████ | 163/179 [03:53<00:23,  1.46s/it]

[163/179] img_00182.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  92%|█████████▏| 164/179 [03:54<00:21,  1.45s/it]

[164/179] img_00183.jpeg
  Base: a man in a suit and tie walking down a street
  VQA: a man in a suit and tie walking down a street



Processing:  92%|█████████▏| 165/179 [03:56<00:20,  1.49s/it]

[165/179] img_00184.jpeg
  Base: a man in a suit and tie standing in front of a curtain
  VQA: a man in a suit and tie standing in front of a curtain



Processing:  93%|█████████▎| 166/179 [03:57<00:19,  1.48s/it]

[166/179] img_00185.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a man in a suit and tie standing on a stage



Processing:  93%|█████████▎| 167/179 [03:59<00:17,  1.47s/it]

[167/179] img_00187.jpeg
  Base: a dark room with a wooden floor and a window
  VQA: a dark room with a wooden floor and a large window



Processing:  94%|█████████▍| 168/179 [04:00<00:15,  1.44s/it]

[168/179] img_00188.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing:  94%|█████████▍| 169/179 [04:01<00:13,  1.37s/it]

[169/179] img_00189.jpeg
  Base: a man riding a skateboard down a street
  VQA: a man riding a skateboard down a street



Processing:  95%|█████████▍| 170/179 [04:03<00:12,  1.39s/it]

[170/179] img_00190.jpeg
  Base: an image of a black and green striped background
  VQA: a close up of a black and green striped background



Processing:  96%|█████████▌| 171/179 [04:04<00:10,  1.37s/it]

[171/179] img_00191.jpeg
  Base: a person riding a skateboard down a street
  VQA: a person riding a skateboard down a street



Processing:  96%|█████████▌| 172/179 [04:05<00:09,  1.37s/it]

[172/179] img_00192.jpeg
  Base: a motorcycle parked on the side of a road
  VQA: a motorcycle parked on the side of a road



Processing:  97%|█████████▋| 173/179 [04:07<00:08,  1.39s/it]

[173/179] img_00193.jpeg
  Base: a motorcycle parked on the side of a dirt road
  VQA: a motorcycle parked on the side of a dirt road



Processing:  97%|█████████▋| 174/179 [04:08<00:07,  1.40s/it]

[174/179] img_00194.jpeg
  Base: a car is parked on the side of the road
  VQA: a car parked on the side of a road



Processing:  98%|█████████▊| 175/179 [04:10<00:05,  1.38s/it]

[175/179] img_00195.jpeg
  Base: a car is parked in a parking lot
  VQA: a car parked on the side of a road



Processing:  98%|█████████▊| 176/179 [04:11<00:04,  1.38s/it]

[176/179] img_00196.jpeg
  Base: a car driving down a street at night
  VQA: a car driving down a dark street at night



Processing:  99%|█████████▉| 177/179 [04:12<00:02,  1.37s/it]

[177/179] img_00197.jpeg
  Base: a white car parked in a parking lot
  VQA: a white car is parked in a parking lot



Processing:  99%|█████████▉| 178/179 [04:14<00:01,  1.37s/it]

[178/179] img_00198.jpeg
  Base: a black and white photo of a man in a suit
  VQA: a black and white photo of a man in a suit



Processing: 100%|██████████| 179/179 [04:15<00:00,  1.43s/it]

[179/179] img_00199.jpeg
  Base: a man riding a bike down a dirt road
  VQA: a man riding a bike down a dirt road

Done. Results saved to: /home/jupyter/project/Ilya/multimodel_results.json





In [42]:
%pip install timm torchvision

Defaulting to user installation because normal site-packages is not writeable
Collecting triton==2.1.0 (from torch>=1.4->timm)
  Downloading triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)
Downloading triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (89.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.2/89.2 MB[0m [31m73.6 MB/s[0m  [33m0:00:01[0m6m0:00:01[0m00:01[0m
[0mInstalling collected packages: triton
  Attempting uninstall: triton
    Found existing installation: triton 3.5.0
    Uninstalling triton-3.5.0:
      Successfully uninstalled triton-3.5.0
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fastai 2.7.12 requires torch<2.1,>=1.7, but you have torch 2.1.2+cu118 which is incompatible.
torchdata 0.6.1 requires torch==2.0.1, but you have torch 2.1

In [43]:
%pip install --upgrade transformers

Defaulting to user installation because normal site-packages is not writeable
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [41]:
%python -c "import transformers; print(transformers.__version__)"

UsageError: Line magic function `%python` not found (But cell magic `%%python` exists, did you mean that instead?).


In [44]:
#!/usr/bin/env python3
# llava_integration.py
import os
import json
from pathlib import Path
from PIL import Image
import torch

# --- Настройки ---
IMAGES_DIR = Path("/home/jupyter/project/Ilya/dataset_images")
OUTPUT_DIR = Path("/home/jupyter/project/Ilya/llava_outputs")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Путь к ноутбуку/модулю с run_pipeline
NOTEBOOK_PATH = Path("/home/jupyter/project/github/car-scene-captioning/main.ipynb")
# или если вы скопировали/создали run_pipeline.py рядом - укажите его:
RUNTIME_MODULE_PATH = Path("/home/jupyter/project/github/car-scene-captioning/run_pipeline.py")

# Модель LLaVA — пример идентификатора; поменяйте на нужный
LLAVA_MODEL_ID = "llava-hf/llava-1.5-7b-hf"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# --- helper: получить run_pipeline из .py или .ipynb ---
def load_run_pipeline():
    # 1) Попробовать импортировать из .py модуля если есть
    if RUNTIME_MODULE_PATH.exists():
        import importlib.util
        spec = importlib.util.spec_from_file_location("run_pipeline_module", str(RUNTIME_MODULE_PATH))
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)
        if hasattr(module, "run_pipeline"):
            return module.run_pipeline

    # 2) Если есть ноутбук, собрать и выполнить код его кодовых ячеек
    if NOTEBOOK_PATH.exists():
        import nbformat
        nb = nbformat.read(str(NOTEBOOK_PATH), as_version=4)
        code_cells = [c.source for c in nb.cells if c.cell_type == "code"]
        code = "\n\n".join(code_cells)
        ns = {}
        # Выполним код ноутбука в изолированном namespace
        exec(compile(code, str(NOTEBOOK_PATH), "exec"), ns)
        if "run_pipeline" in ns:
            return ns["run_pipeline"]

    raise RuntimeError("Не удалось найти функцию run_pipeline ни в run_pipeline.py, ни в main.ipynb. Скопируйте/экспортируйте её в доступный модуль.")

# --- Загрузка LLaVA (inference) ---
def load_llava(model_id=LLAVA_MODEL_ID, device=DEVICE):
    # попытка импортировать типичный API, используемый в примерах LLaVA
    try:
        # некоторые реализации предоставляют классы напрямую
        from llava.modeling import LlavaForConditionalGeneration
        from transformers import AutoProcessor
        model = LlavaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True)
        processor = AutoProcessor.from_pretrained(model_id)
    except Exception:
        # fallback — часто можно импортировать напрямую из transformers or llava-hf wrappers
        try:
            from transformers import AutoProcessor
            from llava import LlavaForConditionalGeneration  # возможные импорты в разных реализациях
            model = LlavaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True)
            processor = AutoProcessor.from_pretrained(model_id)
        except Exception:
            # более общий вариант — использовать AutoModel/AutoTokenizer (пользователь может поправить по своему стеку)
            from transformers import AutoProcessor, AutoModelForCausalLM
            processor = AutoProcessor.from_pretrained(model_id)
            model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True)

    model.to(device)
    model.eval()
    return processor, model

# --- Формирование промпта: комбинируем результаты вашего pipeline и вопрос для LLaVA ---
def build_prompt(pipeline_result: list):
    # pipeline_result — список строк/словарей, которые возвращает ваш run_pipeline для одного изображения
    # Пример шаблона — вы можете адаптировать под задачу:
    header = "Информация, полученная preprocessing pipeline:\n"
    body_lines = []
    for item in pipeline_result:
        # если это словарь — представим его красиво
        if isinstance(item, dict):
            body_lines.append(json.dumps(item, ensure_ascii=False))
        else:
            body_lines.append(str(item))
    body = "\n".join(body_lines)
    # вопрос/задача для LLaVA
    question = (
        "\n\nЗадача: на основании изображения и информации выше опишите кратко автомобиль, "
        "окрестность и перечислите обнаруженные объекты. Выдайте ответ в двух частях:\n"
        "1) Краткая сводка (1-2 предложения)\n"
        "2) JSON с полями: objects (список), scene (строка), potential_hazards (строка, если есть)\n"
    )
    return header + body + question

# --- Основной run: перебор изображений, запуск pipeline, запуск llava и сохранение результата ---
def main():
    run_pipeline = load_run_pipeline()
    processor, model = load_llava()
    # Список картинок
    image_paths = sorted([p for p in IMAGES_DIR.iterdir() if p.suffix.lower() in [".jpg", ".jpeg", ".png"]])

    batch_paths = [str(p) for p in image_paths]
    # Запускаем ваш pipeline (он сам инициализирует модели внутри себя)
    print("Запуск вашего run_pipeline для всех изображений...")
    pipeline_results = run_pipeline(batch_paths, source=True)  # ожидается list[list[...]]
    print("Pipeline завершён.")

    for idx, img_path in enumerate(image_paths):
        img = Image.open(img_path).convert("RGB")

        # pipeline_results[idx] — список результатов для конкретного изображения
        prompt = build_prompt(pipeline_results[idx])

        # Подготовка входа для LLaVA: processor соединит изображение и текст
        inputs = processor(images=img, text=prompt, return_tensors="pt")
        # Переносим на устройство
        inputs = {k: v.to(DEVICE) for k, v in inputs.items()}

        # Генерация — параметры подберите по ресурсам
        with torch.no_grad():
            gen = model.generate(**inputs, max_new_tokens=256, do_sample=False, num_beams=4)

        # Декодирование — у processor/токенайзера может быть метод decode
        try:
            generated_text = processor.tokenizer.decode(gen[0], skip_special_tokens=True)
        except Exception:
            # fallback если processor не содержит tokenizer
            from transformers import AutoTokenizer
            tok = AutoTokenizer.from_pretrained(LLAVA_MODEL_ID)
            generated_text = tok.decode(gen[0], skip_special_tokens=True)

        # Сохраняем
        out = {
            "image": str(img_path),
            "pipeline": pipeline_results[idx],
            "llava_output": generated_text
        }
        out_path = OUTPUT_DIR / (img_path.stem + "_llava.json")
        with open(out_path, "w", encoding="utf-8") as f:
            json.dump(out, f, ensure_ascii=False, indent=2)

        print(f"Обработано {img_path.name} -> {out_path}")

if __name__ == "__main__":
    main()


init


RuntimeError: Failed to import transformers.models.mask2former.image_processing_mask2former because of the following error (look up to see its traceback):
No module named 'transformers.models.mask2former.image_processing_mask2former'

In [46]:
%pip -q install --upgrade "transformers>=4.35.0" "timm>=0.6.0" "torchvision>=0.15.0"

Traceback (most recent call last):
  File "/kernel/lib/python3.10/site-packages/ml_kernel/_vendor/packaging/requirements.py", line 35, in __init__
    parsed = _parse_requirement(requirement_string)
  File "/kernel/lib/python3.10/site-packages/ml_kernel/_vendor/packaging/_parser.py", line 64, in parse_requirement
    return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
  File "/kernel/lib/python3.10/site-packages/ml_kernel/_vendor/packaging/_parser.py", line 73, in _parse_requirement
    name_token = tokenizer.expect(
  File "/kernel/lib/python3.10/site-packages/ml_kernel/_vendor/packaging/_tokenizer.py", line 140, in expect
    raise self.raise_syntax_error(f"Expected {expected}")
  File "/kernel/lib/python3.10/site-packages/ml_kernel/_vendor/packaging/_tokenizer.py", line 165, in raise_syntax_error
    raise ParserSyntaxError(
ml_kernel._vendor.packaging._tokenizer.ParserSyntaxError: Expected package name at the start of dependency specifier
    "transformers>=4.35.0"
  

In [29]:
# обновить до последней стабильной
%pip install -U transformers

# (опционально) если хотите конкретную более новую версию:
%pip install -U "transformers>=4.39.0"


Defaulting to user installation because normal site-packages is not writeable
Collecting transformers
  Using cached transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Using cached tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Using cached transformers-4.57.1-py3-none-any.whl (12.0 MB)
Using cached tokenizers-0.22.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[0mInstalling collected packages: tokenizers, transformers
[2K  Attempting uninstall: tokenizers
[2K    Found existing installation: tokenizers 0.13.3
[2K    Uninstalling tokenizers-0.13.3:
[2K      Successfully uninstalled tokenizers-0.13.3
[2K  Attempting uninstall: transformers━━━━━━━━━━━━[0m [32m0/2[0m [tokenizers]
[2K    Found existing installation: transformers 4.30.02m0/2[0m [tokenizers]
[2K    Uninstalling transformers-4.30.0:╺[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m1/2[0m [transfo

Traceback (most recent call last):
  File "/kernel/lib/python3.10/site-packages/ml_kernel/_vendor/packaging/requirements.py", line 35, in __init__
    parsed = _parse_requirement(requirement_string)
  File "/kernel/lib/python3.10/site-packages/ml_kernel/_vendor/packaging/_parser.py", line 64, in parse_requirement
    return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
  File "/kernel/lib/python3.10/site-packages/ml_kernel/_vendor/packaging/_parser.py", line 73, in _parse_requirement
    name_token = tokenizer.expect(
  File "/kernel/lib/python3.10/site-packages/ml_kernel/_vendor/packaging/_tokenizer.py", line 140, in expect
    raise self.raise_syntax_error(f"Expected {expected}")
  File "/kernel/lib/python3.10/site-packages/ml_kernel/_vendor/packaging/_tokenizer.py", line 165, in raise_syntax_error
    raise ParserSyntaxError(
ml_kernel._vendor.packaging._tokenizer.ParserSyntaxError: Expected package name at the start of dependency specifier
    "transformers>=4.39.0"
  

In [37]:
# вариант 1: через "!"
!python3 -m venv ~/venv_autocaption
!source ~/venv_autocaption/bin/activate && pip install --upgrade pip
!source ~/venv_autocaption/bin/activate && pip install torch torchvision pillow numpy


The virtual environment was not created successfully because ensurepip is not
available.  On Debian/Ubuntu systems, you need to install the python3-venv
package using the following command.

    apt install python3.10-venv

You may need to use sudo with that command.  After installing the python3-venv
package, recreate your virtual environment.

Failing command: /home/jupyter/venv_autocaption/bin/python3



Exception: Process exited with code 1

In [30]:
python - <<'PY'
from transformers import Mask2FormerImageProcessor, Mask2FormerForUniversalSegmentation
print("Mask2Former импортируется ✓")
PY


SyntaxError: invalid syntax (763689901.py, line 1)