In [None]:
import requests
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
from typing import Optional, Dict, Any

In [None]:
model = SentenceTransformer("Qwen/Qwen3-Embedding-0.6B")

client = QdrantClient(
    url="https://f3e39d2d-4da7-435b-91cf-d02471428bc7.us-west-1-0.aws.cloud.qdrant.io:6333",
    api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.yU_jrBWe3-JiKKd-tpyXkytye6xVn3x7stiDqhcb2gA"
)

In [None]:
COLLECTION_NAME = "hh_vacancies"
DIMENSION = model.get_sentence_embedding_dimension()

In [None]:
def safe_get(data: Optional[Dict], *keys: str, default: Any = "") -> Any:
    if data is None:
        return default
        
    current = data
    for key in keys:
        if isinstance(current, dict):
            current = current.get(key, default)
        else:
            return default
    return current

def prepare_vacancy(vacancy: Dict) -> Optional[Dict]:
    try:
        vacancy_id = safe_get(vacancy, "id")
        if not vacancy_id:
            return None

        name = safe_get(vacancy, "name", default="Без названия")
        description = safe_get(vacancy, "description") or safe_get(vacancy, "snippet", "requirement", default="")
        
        salary = safe_get(vacancy, "salary", default={})
        salary_text = ""
        if salary:
            salary_text = f"{safe_get(salary, 'from', default='?')}-{safe_get(salary, 'to', default='?')} {safe_get(salary, 'currency', default='RUR')}"

        return {
            "id": vacancy_id,
            "text": f"{name}\n{description}",
            "salary": salary_text,
            "employer": safe_get(vacancy, "employer", "name", default="Неизвестно"),
            "url": safe_get(vacancy, "alternate_url", default=""),
            "experience": safe_get(vacancy, "experience", "name", default="Не указан"),
            "employment": safe_get(vacancy, "employment", "name", default="Не указана")
        }
    except:
        return None

def upload_vacancies(vacancies: list):
    points = []
    for vacancy in vacancies:
        prepared = prepare_vacancy(vacancy)
        if prepared:
            try:
                embedding = model.encode(prepared["text"])
                points.append(PointStruct(
                    id=int(prepared["id"]),
                    vector=embedding.tolist(),
                    payload={
                        "title": prepared["text"].split("\n")[0],
                        "salary": prepared["salary"],
                        "employer": prepared["employer"],
                        "url": prepared["url"],
                        "experience": prepared["experience"],
                        "employment": prepared["employment"]
                    }
                ))
            except:
                continue
    
    if points:
        client.upsert(
            collection_name=COLLECTION_NAME,
            points=points,
            wait=True
        )

In [None]:
response = requests.get(
    "https://api.hh.ru/vacancies",
    params={
        "text": "Data Science",
        "area": 1,
        "per_page": 50,
        "only_with_salary": True
    }
)

if response.status_code == 200:
    vacancies = response.json().get("items", [])
    
    try:
        client.get_collection(COLLECTION_NAME)
    except:
        client.create_collection(
            collection_name=COLLECTION_NAME,
            vectors_config=VectorParams(
                size=DIMENSION,
                distance=Distance.COSINE
            )
        )
    
    upload_vacancies(vacancies)
    
    # Тестовое резюме для поиска
    test_resume = """
    Опытный Data Scientist с 3 годами опыта.
    Навыки: Python, NumPy, Pandas, Scikit-learn, PyTorch.
    Исследовал большие языковые модели.
    """
    
    # Поиск 5 ближайших вакансий
    results = client.search(
        collection_name=COLLECTION_NAME,
        query_vector=model.encode(test_resume).tolist(),
        limit=5
    )
    
    # Вывод результатов
    print("\nТоп-5 подходящих вакансий:")
    for i, match in enumerate(results, 1):
        print(f"\n{i}. {match.payload['title']}")
        print(f"   Работодатель: {match.payload['employer']}")
        print(f"   Зарплата: {match.payload['salary']}")
        print(f"   Опыт: {match.payload['experience']}")
        print(f"   Занятость: {match.payload['employment']}")
        print(f"   Ссылка: {match.payload['url']}")
        print(f"   Схожесть: {match.score:.3f}")
else:
    print("Ошибка при получении вакансий с HH.ru")