In [None]:
!pip install aiogram python-dotenv

In [None]:
import os
from aiogram import Bot, Dispatcher, types
from aiogram.fsm.context import FSMContext
from aiogram.fsm.state import State, StatesGroup
from aiogram.fsm.storage.memory import MemoryStorage
from aiogram.filters import CommandStart, Command
from aiogram.types import BotCommand, ReplyKeyboardRemove
from dotenv import find_dotenv, load_dotenv

import nest_asyncio
import asyncio
import json
import numpy as np
import faiss

In [None]:
# –ü—Ä–∏–º–µ–Ω—è–µ–º nest_asyncio
nest_asyncio.apply()

# –ó–∞–≥—Ä—É–∑–∫–∞ –ø–µ—Ä–µ–º–µ–Ω–Ω—ã—Ö –æ–∫—Ä—É–∂–µ–Ω–∏—è
load_dotenv(find_dotenv())

# –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –±–æ—Ç–∞ –∏ –¥–∏—Å–ø–µ—Ç—á–µ—Ä–∞
bot = Bot(token="BOT_TOKEN")
storage = MemoryStorage()
dp = Dispatcher(storage=storage)

In [None]:
# –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö –∏ –º–æ–¥–µ–ª–µ–π
job_data = None
specialists_data = None
skill_db = None
normalized_skill_map = None
original_skill_map_norm = None
model = None  # SentenceTransformer –º–æ–¥–µ–ª—å
faiss_index = None  # FAISS –∏–Ω–¥–µ–∫—Å

# –°–æ—Å—Ç–æ—è–Ω–∏—è –¥–ª—è FSM (Finite State Machine)
class Form(StatesGroup):
    waiting_for_requirements = State()

# –ö–æ–º–∞–Ω–¥—ã –±–æ—Ç–∞
private_commands = [
    BotCommand(command='start', description='–°—Ç–∞—Ä—Ç'),
    BotCommand(command='help', description='–ü–æ–º–æ—â—å'),
    BotCommand(command='find', description='–ù–∞–π—Ç–∏ –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤'),
]

async def initialize_system():
    """–ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è —Å–∏—Å—Ç–µ–º—ã - –∑–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö –∏ –º–æ–¥–µ–ª–µ–π"""
    global job_data, specialists_data, skill_db, normalized_skill_map, original_skill_map_norm, model, faiss_index

    # –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö
    job_data, specialists_data, skill_db, normalized_skill_map, original_skill_map_norm = load_data()

    # –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –º–æ–¥–µ–ª–µ–π
    model = SentenceTransformer(SENTENCE_TRANSFORMER_MODEL)

    # –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ FAISS –∏–Ω–¥–µ–∫—Å–∞
    faiss_index = prepare_faiss_index(specialists_data)

# –û–±—Ä–∞–±–æ—Ç—á–∏–∫ –∫–æ–º–∞–Ω–¥—ã /start
@dp.message(CommandStart())
async def start_cmd(message: types.Message):
    await message.answer(
        "–ü—Ä–∏–≤–µ—Ç! –Ø –±–æ—Ç –¥–ª—è –ø–æ–∏—Å–∫–∞ –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤. "
        "–û—Ç–ø—Ä–∞–≤—å –º–Ω–µ —Ç—Ä–µ–±–æ–≤–∞–Ω–∏—è –≤–∞–∫–∞–Ω—Å–∏–∏, –∏ —è –Ω–∞–π–¥—É –ø–æ–¥—Ö–æ–¥—è—â–∏—Ö —Å–ø–µ—Ü–∏–∞–ª–∏—Å—Ç–æ–≤.\n"
        "–ò—Å–ø–æ–ª—å–∑—É–π –∫–æ–º–∞–Ω–¥—É /find —á—Ç–æ–±—ã –Ω–∞—á–∞—Ç—å –ø–æ–∏—Å–∫."
    )

# –û–±—Ä–∞–±–æ—Ç—á–∏–∫ –∫–æ–º–∞–Ω–¥—ã /help
@dp.message(Command('help'))
async def help_cmd(message: types.Message):
    commands_list = "\n".join([f"/{cmd.command} - {cmd.description}" for cmd in private_commands])
    await message.answer(f"–î–æ—Å—Ç—É–ø–Ω—ã–µ –∫–æ–º–∞–Ω–¥—ã:\n{commands_list}")

# –û–±—Ä–∞–±–æ—Ç—á–∏–∫ –∫–æ–º–∞–Ω–¥—ã /find
@dp.message(Command('find'))
async def find_cmd(message: types.Message, state: FSMContext):
    await state.set_state(Form.waiting_for_requirements)
    await message.answer(
        "–ü–æ–∂–∞–ª—É–π—Å—Ç–∞, –≤–≤–µ–¥–∏—Ç–µ —Ç—Ä–µ–±–æ–≤–∞–Ω–∏—è –≤–∞–∫–∞–Ω—Å–∏–∏. "
        "–ù–∞–ø—Ä–∏–º–µ—Ä: '–û–ø—ã—Ç —Ä–∞–±–æ—Ç—ã —Å Python, –∑–Ω–∞–Ω–∏–µ SQL.'"
    )

def format_candidate_info(candidate):
    """–§–æ—Ä–º–∞—Ç–∏—Ä–æ–≤–∞–Ω–∏–µ –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –æ –∫–∞–Ω–¥–∏–¥–∞—Ç–µ –¥–ª—è –≤—ã–≤–æ–¥–∞"""
    info = (
        f"üë§ <b>{candidate['name']}</b>\n"
        f"üìä –û–±—â–∏–π –±–∞–ª–ª: {candidate['combined_score']:.2f}\n"
        f"üîç –°–æ–≤–ø–∞–¥–µ–Ω–∏–µ –Ω–∞–≤—ã–∫–æ–≤: {candidate['_scores']['skill']}%\n"
        f"üìà FAISS score: {candidate['_scores']['faiss']:.4f}\n"
    )

    if candidate['matching_skills']['direct']:
        info += f"‚úÖ –ü—Ä—è–º—ã–µ —Å–æ–≤–ø–∞–¥–µ–Ω–∏—è: {', '.join(candidate['matching_skills']['direct'])}\n"

    if candidate['matching_skills']['synonym']:
        syns = [f"{k}‚Üí{v}" for k, v in candidate['matching_skills']['synonym'].items()]
        info += f"üîÑ –°–∏–Ω–æ–Ω–∏–º—ã: {', '.join(syns)}\n"

    if candidate['missing_skills']:
        info += f"‚ùå –û—Ç—Å—É—Ç—Å—Ç–≤—É—é—â–∏–µ –Ω–∞–≤—ã–∫–∏: {', '.join(candidate['missing_skills'])}\n"

    return info

# –û–±—Ä–∞–±–æ—Ç—á–∏–∫ —Ç–µ–∫—Å—Ç–æ–≤—ã—Ö —Å–æ–æ–±—â–µ–Ω–∏–π (—Ç—Ä–µ–±–æ–≤–∞–Ω–∏–π –≤–∞–∫–∞–Ω—Å–∏–∏)
@dp.message(Form.waiting_for_requirements)
async def process_requirements(message: types.Message, state: FSMContext):
    requirements = message.text

    # –ü—Ä–æ–≤–µ—Ä—è–µ–º –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—é —Å–∏—Å—Ç–µ–º—ã
    if not all([job_data, specialists_data, model, faiss_index]):
        await message.answer("–°–∏—Å—Ç–µ–º–∞ –µ—â–µ –Ω–µ –≥–æ—Ç–æ–≤–∞. –ü–æ–∂–∞–ª—É–π—Å—Ç–∞, –ø–æ–ø—Ä–æ–±—É–π—Ç–µ –ø–æ–∑–∂–µ.")
        await state.clear()
        return

    try:
        # 1. –°–æ–∑–¥–∞–µ–º –≤—Ä–µ–º–µ–Ω–Ω—É—é –∑–∞–ø–∏—Å—å –≤–∞–∫–∞–Ω—Å–∏–∏
        job_id = f"user_{message.from_user.id}_{int(time.time())}"
        job_info = {
            'name': "–ü–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å—Å–∫–∞—è –≤–∞–∫–∞–Ω—Å–∏—è",
            'full_text': requirements,
            'required_experience': parse_experience_requirements_from_text(requirements, normalized_skill_map, skill_db)
        }

        # 2. –ò–∑–≤–ª–µ–∫–∞–µ–º –Ω–∞–≤—ã–∫–∏ –∏–∑ —Ç—Ä–µ–±–æ–≤–∞–Ω–∏–π
        req_skills = extract_job_skills_advanced(
            requirements,
            skill_db,
            normalized_skill_map,
            nlp,
            get_cached_skill_embeddings(tuple(sorted(skill_db.items())), model)

        # 3. –ü–æ–∏—Å–∫ –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤ —á–µ—Ä–µ–∑ FAISS
        faiss_scores, faiss_indices = search_candidates_faiss(faiss_index, requirements, k=100)

        if not faiss_indices:
            await message.answer("–ü–æ–¥—Ö–æ–¥—è—â–∏—Ö –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤ –Ω–µ –Ω–∞–π–¥–µ–Ω–æ.")
            await state.clear()
            return

        # 4. –û–±—Ä–∞–±–æ—Ç–∫–∞ –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤
        processed_candidates = []
        for i in range(len(faiss_indices)):
            idx = int(faiss_indices[i])
            if idx < 0 or idx >= len(specialists_data):
                continue

            spec = specialists_data[idx]

            # –ü—Ä–æ–≤–µ—Ä–∫–∞ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏—è –Ω–∞–≤—ã–∫–æ–≤
            skill_details = calculate_match_details(
                req_skills,
                spec['skills_list_norm'],
                list(job_info['required_experience'].keys()),
                original_skill_map_norm)

            # –ü—Ä–æ–≤–µ—Ä–∫–∞ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏—è –æ–ø—ã—Ç–∞
            exp_score, met_exp, unmet_exp = calculate_experience_match(
                job_info['required_experience'],
                spec['experience_per_skill_months'])

            # –†–∞—Å—á–µ—Ç –æ–±—â–µ–≥–æ –±–∞–ª–ª–∞
            skill_score_norm = skill_details['match_percent'] / 100.0
            safe_f_score = max(0.0, float(faiss_scores[i]))
            comb_score = (W_FAISS * safe_f_score + W_SKILL * skill_score_norm + W_EXPERIENCE * exp_score)

            # –§–æ—Ä–º–∞—Ç–∏—Ä–æ–≤–∞–Ω–∏–µ –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –≤—ã–≤–æ–¥–∞
            def fmt_s_dict(md):
                return {original_skill_map_norm.get(k,{}).get('ru',k):
                        original_skill_map_norm.get(v,{}).get('ru',v)
                        for k,v in md.items()}

            def fmt_s_list(nl):
                return sorted(list(set(original_skill_map_norm.get(n,{}).get('ru', n)
                                   for n in nl if n)))

            processed_candidates.append({
                'id': spec['id'],
                'name': spec['name'],
                '_scores': {
                    'faiss': round(safe_f_score, 4),
                    'skill': skill_details['match_percent'],
                    'experience': exp_score
                },
                'combined_score': round(comb_score, 4),
                'matching_skills': {
                    'direct': fmt_s_list(skill_details['matched_direct']),
                    'synonym': fmt_s_dict(skill_details['matched_synonym']),
                    'semantic': fmt_s_dict(skill_details['matched_semantic'])
                },
                'missing_skills': fmt_s_list(skill_details['missing']),
            })

        # 5. –°–æ—Ä—Ç–∏—Ä–æ–≤–∫–∞ –∏ –≤—ã–±–æ—Ä —Ç–æ–ø-5 –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤
        top_candidates = sorted(processed_candidates,
                               key=lambda x: x['combined_score'],
                               reverse=True)[:5]

        # 6. –§–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏–µ –æ—Ç–≤–µ—Ç–∞
        if not top_candidates:
            await message.answer("–ü–æ–¥—Ö–æ–¥—è—â–∏—Ö –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤ –Ω–µ –Ω–∞–π–¥–µ–Ω–æ.")
        else:
            response = "üîç <b>–ù–∞–π–¥–µ–Ω–Ω—ã–µ –∫–∞–Ω–¥–∏–¥–∞—Ç—ã:</b>\n\n"
            for candidate in top_candidates:
                response += format_candidate_info(candidate) + "\n"

            await message.answer(response, parse_mode="HTML")

    except Exception as e:
        logging.error(f"–û—à–∏–±–∫–∞ –æ–±—Ä–∞–±–æ—Ç–∫–∏ —Ç—Ä–µ–±–æ–≤–∞–Ω–∏–π: {e}")
        await message.answer("–ü—Ä–æ–∏–∑–æ—à–ª–∞ –æ—à–∏–±–∫–∞ –ø—Ä–∏ –æ–±—Ä–∞–±–æ—Ç–∫–µ –≤–∞—à–∏—Ö —Ç—Ä–µ–±–æ–≤–∞–Ω–∏–π. –ü–æ–∂–∞–ª—É–π—Å—Ç–∞, –ø–æ–ø—Ä–æ–±—É–π—Ç–µ –µ—â–µ —Ä–∞–∑.")

    await state.clear()

# –ó–∞–ø—É—Å–∫ –±–æ—Ç–∞
async def main():
    # –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è —Å–∏—Å—Ç–µ–º—ã –ø–µ—Ä–µ–¥ –∑–∞–ø—É—Å–∫–æ–º –±–æ—Ç–∞
    await initialize_system()

    await bot.delete_webhook(drop_pending_updates=True)
    await bot.set_my_commands(commands=private_commands,
                             scope=types.BotCommandScopeAllPrivateChats())
    await dp.start_polling(bot)

if __name__ == '__main__':
    asyncio.run(main())