In [1]:
import asyncio
import ast
import json
import os
from functools import partial
from time import sleep

import dashscope
import requests
from tqdm import tqdm

os.chdir('..')

from dataset.src.repository import ClipRepository

In [None]:
class ApiKeys:
    def __init__(self, items: list[str]):
        self.items = items
        self.index = 0

    def __next__(self):
        value = self.items[self.index]
        self.index = (self.index + 1) % len(self.items)
        return value

    def __bool__(self):
        return bool(self.items)

    def __len__(self):
        return len(self.items)

    def remove(self, item: str):
        if item not in self.items:
            return
        idx = self.items.index(item)
        self.items.remove(item)
        if not self.items:
            raise RuntimeError
        if idx < self.index:
            self.index -= 1
        self.index %= len(self.items)

    def __iter__(self):
        return self
    

api_keys = ApiKeys(json.load(open('dataset/qwen_api_keys.json')))

In [None]:
img_urls = []
for i in tqdm(range(100)):
    for attempt in range(3):
        try:
            response = requests.get(f'https://picsum.photos/1000/1000?random={i}', timeout=10)
            img_urls.append(response.url)
            break
        except requests.exceptions.SSLError as e:
            sleep(1)
        except requests.exceptions.RequestException as e:
            break
    else:
        print(f'Failed to load image {i} after 3 attempts')


100%|██████████| 100/100 [02:40<00:00,  1.61s/it]


In [4]:
prompt = '''
Cоздай список из 10 коротких описаний (3–7 слов каждое) на русском языке,
описывающих это изображение. Используй формат Python-списка строк, например:
["Собака бежит по снегу", "Пёс играет на улице", ..., "Активная прогулка в зимнем лесу"].
'''

In [28]:
semaphore = asyncio.Semaphore(len(api_keys))

async def get_label_of_image(img_url) -> str:
    async with semaphore:
        loop = asyncio.get_event_loop()
        current_key = next(api_keys)
        dashscope.api_key = current_key
        messages = [{
            'role': 'user',
            'content': [
                {'image': img_url},
                {'text': prompt},
            ]
        }]
        response = await loop.run_in_executor(
            None,
            partial(dashscope.MultiModalConversation.call,
                    model='qwen2.5-vl-72b-instruct',
                    messages=messages)
        )
        sleep(0.5)
        if response['status_code'] != 200:
            api_keys.remove(current_key)
            print(len(api_keys))
            return await get_label_of_image(img_url)
        await ClipRepository.add(img_url, ast.literal_eval(response.output.choices[0].message.content[0]['text']))

In [29]:
tasks = [get_label_of_image(url) for url in img_urls[88:]]
try:
    await asyncio.gather(*tasks)
except RuntimeError:
    print('Все api ключи упали')

37
37
36
36
35
35
34
34
33
33
32
32
31
31
30
30
29
29
