# –ò–º–ø–æ—Ç—Ä—Ç—ã

In [None]:
import random

import numpy as np
import pandas as pd

import torch

torch.cuda.empty_cache()

def seed_all(seed: int) -> None:
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    random.seed(seed)

In [None]:
# –§–∏–∫—Å–∏—Ä—É–µ–º random seed
SEED = 42
seed_all(SEED)

In [None]:
from pathlib import Path

DATA_PATH = Path('../../data/')
DATA_PATH.mkdir(parents=True, exist_ok=True)

DATA_CACHE = DATA_PATH / Path('cache_dir/')
DATA_CACHE.mkdir(parents=True, exist_ok=True)

DATA_INPUT = DATA_PATH / Path('input/')
DATA_INPUT.mkdir(parents=True, exist_ok=True)

DATA_OUTPUT = DATA_PATH / Path('output/')
DATA_OUTPUT.mkdir(parents=True, exist_ok=True)

OFFLOAD_DIR = DATA_CACHE / Path('offload_weights/')
OFFLOAD_DIR.mkdir(parents=True, exist_ok=True)


import pandas as pd

pd.set_option('display.max_colwidth', 500) 

In [None]:
import sys
import os

project_path = os.path.abspath(os.path.join(os.getcwd(), "../.."))
sys.path.append(project_path)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

# –ù–∞–∑–≤–∞–Ω–∏–µ –º–æ–¥–µ–ª–∏
MODEL_NAME = "deepseek-ai/deepseek-llm-7b-chat"

# –ó–∞–≥—Ä—É–∑–∫–∞ —Ç–æ–∫–µ–Ω–∏–∑–∞—Ç–æ—Ä–∞ –∏ –º–æ–¥–µ–ª–∏
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir="./cache")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    offload_folder=OFFLOAD_DIR,
    cache_dir=DATA_CACHE
)

# –£—Å—Ç–∞–Ω–∞–≤–ª–∏–≤–∞–µ–º pad_token_id
model.generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
model.generation_config.pad_token_id = model.generation_config.eos_token_id

print("‚úÖ –ú–æ–¥–µ–ª—å —É—Å–ø–µ—à–Ω–æ –∑–∞–≥—Ä—É–∂–µ–Ω–∞!")


In [None]:
chat_history = []

def ask_model(user_input, max_tokens=512, temperature=0.1):
    global chat_history  # –ò—Å–ø–æ–ª—å–∑—É–µ–º –≥–ª–æ–±–∞–ª—å–Ω—É—é –∏—Å—Ç–æ—Ä–∏—é

    # –î–æ–±–∞–≤–ª—è–µ–º —Å–æ–æ–±—â–µ–Ω–∏–µ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è –≤ –∏—Å—Ç–æ—Ä–∏—é
    chat_history.append({"role": "user", "content": user_input})

    # –°–æ–∑–¥–∞–µ–º input tensor —Å —É—á–µ—Ç–æ–º –≤—Å–µ–π –∏—Å—Ç–æ—Ä–∏–∏
    input_tensor = tokenizer.apply_chat_template(
        chat_history, add_generation_prompt=True, return_tensors="pt"
    )

    # –ì–µ–Ω–µ—Ä–∞—Ü–∏—è –æ—Ç–≤–µ—Ç–∞
    outputs = model.generate(
        input_tensor.to(model.device),
        max_new_tokens=max_tokens,
        # temperature=temperature,
        # top_p=0.9,
        # repetition_penalty=1.1,
    )

    # –î–µ–∫–æ–¥–∏—Ä–æ–≤–∞–Ω–∏–µ –∏ –æ–±—Ä–∞–±–æ—Ç–∫–∞ –æ—Ç–≤–µ—Ç–∞
    response = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True).strip()

    # –î–æ–±–∞–≤–ª—è–µ–º –æ—Ç–≤–µ—Ç –º–æ–¥–µ–ª–∏ –≤ –∏—Å—Ç–æ—Ä–∏—é
    chat_history.append({"role": "assistant", "content": response})

    return response  # –í–æ–∑–≤—Ä–∞—â–∞–µ–º –æ—Ç–≤–µ—Ç


In [None]:
content = '''
–ß—Ç–æ —ç—Ç–æ –∑–∞ –¥–æ–∫—É–º–µ–Ω—Ç? –û–ø—Ä–µ–¥–µ–ª–∏ –ø–æ –∫–æ–Ω—Ç–µ–∫—Å—Ç—É:
'–°–æ–≥–ª–∞—à–µ–Ω–∏–µ –æ —Ä–∞—Å—Ç–æ—Ä–∂–µ–Ω–∏–∏\n–ö–æ–Ω—Ç—Ä–∞–∫—Ç–∞ ‚Ññ –°–¢-1383/22 –æ—Ç 31.01.2022 –≥.\n–Ω–∞ –ø–æ—Å—Ç–∞–≤–∫—É –Ω–∞–±–æ—Ä–æ–≤ –¥–ª—è –∫–∞—Ç–µ—Ç–µ—Ä–∏–∑–∞—Ü–∏–∏ –Ω–∞ 2022 –≥–æ–¥\n\n–ø–æ—Å. –ü–µ—Ä–≤–æ–º–∞–π—Å–∫–æ–µ ¬´30¬ª –¥–µ–∫–∞–±—Ä—è 2022 –≥.\n\n–ì–æ—Å—É–¥–∞—Ä—Å—Ç–≤–µ–Ω–Ω–æ–µ –±—é–¥–∂–µ—Ç–Ω–æ–µ —É—á—Ä–µ–∂–¥–µ–Ω–∏–µ –∑–¥—Ä–∞–≤–æ–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è –õ–µ–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–æ–π –ª–∞—Å—Ç–∏ ¬´–†–æ—â–∏–Ω—Å–∫–∞—è\n–º–µ–∂—Ä–∞–π–æ–Ω–Ω–∞—è –±–æ–ª—å–Ω–∏—Ü–∞¬ª (–ì–ë–£–ó –õ–û ¬´–†–æ—â–∏–Ω—Å–∫–∞—è –ú–ë¬ª), –∏–º–µ–Ω—É–µ–º–æ–µ –≤ –¥–∞–ª—å–Ω–µ–π—à–µ–º ¬´–ó–∞–∫–∞–∑—á–∏–∫¬ª, –≤ –ª–∏—Ü–µ\n—Ç–ª–∞–≤–Ω–æ–≥–æ –≤—Ä–∞—á–∞ –ö–∞–∑–∞—Ä–æ–≤–∞ –≠—Ä–Ω–µ—Å—Ç–∞ –≠–¥—É–∞—Ä–¥–æ–≤–∏—á–∞, –¥–µ–π—Å—Ç–≤—É—é—â–µ–≥–æ –Ω–∞ –æ—Å–Ω–æ–≤–∞–Ω–∏–∏ –£—Å—Ç–∞–≤–∞, –û–±—â–µ—Å—Ç–≤–æ —Å\n–æ–≥—Ä–∞–Ω–∏—á–µ–Ω–Ω–æ–π –æ—Ç–≤–µ—Ç—Å—Ç–≤–µ–Ω–Ω–æ—Å—Ç—å—é ¬´–ë–æ–ª—é—Å–º–µ–¥¬ª (–û–û–û ¬´–ë–æ–ª—é—Å–º–µ–¥¬ª), –∏–º–µ–Ω—É–µ–º–æ–µ –≤ –¥–∞–ª—å–Ω–µ–π—à–µ–º\n¬´–ü–æ—Å—Ç–∞–≤—â–∏–∫¬ª, –≤ –ª–∏—Ü–µ –≥–µ–Ω–µ—Ä–∞–ª—å–Ω–æ–≥–æ –¥–∏—Ä–µ–∫—Ç–æ—Ä–∞ –ë–µ–ª–æ–≤–æ–π –•—Ä–∏—Å—Ç–∏–Ω—ã –í–∏—Ç–∞–ª—å–µ–≤–Ω—ã, –¥–µ–π—Å—Ç–≤—É—é—â–µ–≥–æ –Ω–∞\n–æ—Å–Ω–æ–≤–∞–Ω–∏–∏ –£—Å—Ç–∞–≤–∞, —Å –¥—Ä—É–≥–æ–π —Å—Ç–æ—Ä–æ–Ω—ã, –∑–¥–µ—Å—å –∏ –¥–∞–ª–µ–µ –∏–º–µ–Ω—É–µ–º—ã–µ ¬´–°—Ç–æ—Ä–æ–Ω—ã¬ª, –∑–∞–∫–ª—é—á–∏–ª–∏ –Ω–∞—Å—Ç–æ—è—â–µ–µ\n–°–æ–≥–ª–∞—à–µ–Ω–∏–µ –∫ –ö–æ–Ω—Ç—Ä–∞–∫—Ç—É ‚Ññ –°–¢-1383/22 –æ—Ç 31.01.2022 –≥. –æ –Ω–∏–∂–µ—Å–ª–µ–¥—É—é—â–µ–º:\n\n1. –†—É–∫–æ–≤–æ–¥—Å—Ç–≤—É—è—Å—å —á–∞—Å—Ç—å—é 8 —Å—Ç–∞—Ç—å–∏ 95 –§–ó ‚Ññ44-–§–ó ¬´–û –∫–æ–Ω—Ç—Ä–∞–∫—Ç–Ω–æ–π —Å–∏—Å—Ç–µ–º–µ –≤ —Å—Ñ–µ—Ä–µ –∑–∞–∫—É–ø–æ–∫ —Ç–æ–≤–∞—Ä–æ–≤, —Ä–∞–±–æ—Ç,\n‚Äò—É—Å–ª—É–≥ –¥–ª—è –æ–±–µ—Å–ø–µ—á–µ–Ω–∏—è –≥–æ—Å—É–¥–∞—Ä—Å—Ç–≤–µ–Ω–Ω—ã—Ö –∏ –º—É–Ω–∏—Ü–∏–ø–∞–ª—å–Ω—ã—Ö –Ω—É–∂–¥¬ª, –ø—É–Ω–∫—Ç–æ–º 1 —Å—Ç–∞—Ç—å–∏ 450 –ì–ö–†–§ –∏ –ø—É–Ω–∫—Ç–∞–º–∏\n11.2, 11.3. –ö–æ–Ω—Ç—Ä–∞–∫—Ç–∞ ‚Ññ –°–¢-1383/22 –æ—Ç 31.01.2022 –≥, –°—Ç–æ—Ä–æ–Ω—ã –¥–æ–≥–æ–≤–æ—Ä–∏–ª–∏—Å—å —Ä–∞—Å—Ç–æ—Ä–≥–Ω—É—Ç—å –ö–æ–Ω—Ç—Ä–∞–∫—Ç\n‚Ññ –°–¢-1383/22 –æ—Ç 31.01.2022 –≥. –ø–æ —Å–æ–≥–ª–∞—à–µ–Ω–∏—é —Å—Ç–æ—Ä–æ–Ω.\n\n2. –¶–µ–Ω–∞ –ö–æ–Ω—Ç—Ä–∞–∫—Ç–∞ –Ω–∞ –º–æ–º–µ–Ω—Ç —Ä–∞—Å—Ç–æ—Ä–∂–µ–Ω–∏—è —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É–µ—Ç —Å—Ç–æ–∏–º–æ—Å—Ç–∏ —Ñ–∞–∫—Ç–∏—á–µ—Å–∫–∏ –ø–æ—Å—Ç–∞–≤–ª–µ–Ω–Ω–æ–≥–æ —Ç–æ–≤–∞—Ä–∞ –≤\n—Å—É–º–º–µ 108 150,00 —Ä—É–±. (–°—Ç–æ –≤–æ—Å–µ–º—å —Ç—ã—Å—è—á —Å—Ç–æ –ø—è—Ç—å–¥–µ—Å—è—Ç —Ä—É–±–ª–µ–π 00 –∫–æ–ø–µ–µ–∫), –ù–î–° –Ω–µ –æ–±–ª–∞–≥–∞–µ—Ç—Å—è.\n\n3. –û–ø–ª–∞—Ç–∞ –∑–∞ —Ñ–∞–∫—Ç–∏—á–µ—Å–∫–∏ –ø–æ—Å—Ç–∞–≤–ª–µ–Ω–Ω—ã–π —Ç–æ–≤–∞—Ä –ø—Ä–æ–∏–∑–≤–µ–¥–µ–Ω–∞ –ó–∞–∫–∞–∑—á–∏–∫–æ–º –≤ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤–∏–∏ —Å —É—Å–ª–æ–≤–∏—è–º–∏\n–ö–æ–Ω—Ç—Ä–∞–∫—Ç–∞ ‚Ññ –°–¢-1383/22 –æ—Ç 31.01.2022 –≥.\n\n4. –û–±—è–∑–∞—Ç–µ–ª—å—Å—Ç–≤–∞ –≤ –æ—Å—Ç–∞–≤—à–µ–π—Å—è —á–∞—Å—Ç–∏ –Ω–∞ —Å—É–º–º—É 557 830,00 —Ä—É–±, (–ü—è—Ç—å—Å–æ—Ç –ø—è—Ç—å–¥–µ—Å—è—Ç —Å–µ–º—å —Ç—ã—Å—è—á –≤–æ—Å–µ–º—å—Å–æ—Ç\n—Ç—Ä–∏–¥—Ü–∞—Ç—å —Ä—É–±–ª–µ–π 00 –∫–æ–ø–µ–µ–∫) —Å—á–∏—Ç–∞—é—Ç—Å—è –ø—Ä–µ–∫—Ä–∞—â–µ–Ω–Ω—ã–º–∏, –ø–æ—Å—Ç–∞–≤–∫–µ –∏ –æ–ø–ª–∞—Ç–µ –Ω–µ –ø–æ–¥–ª–µ–∂–∞—Ç.\n\n5. –í—Å–µ –æ—Å—Ç–∞–ª—å–Ω—ã–µ —É—Å–ª–æ–≤–∏—è –ö–æ–Ω—Ç—Ä–∞–∫—Ç–∞ ‚Ññ –°–¢-1383/22 –æ—Ç 31.01.2022 –≥. –Ω–µ –∑–∞—Ç—Ä–æ–Ω—É—Ç—ã–µ –Ω–∞—Å—Ç–æ—è—â–∏–º\n–°–æ–≥–ª–∞—à–µ–Ω–∏–µ–º, –æ—Å—Ç–∞—é—Ç—Å—è –Ω–µ–∏–∑–º–µ–Ω–Ω—ã–º–∏ –∏ –°—Ç–æ—Ä–æ–Ω—ã –ø–æ–¥—Ç–≤–µ—Ä–∂–¥–∞—é—Ç –ø–æ –Ω–∏–º —Å–≤–æ–∏ –æ–±—è–∑–∞—Ç–µ–ª—å—Å—Ç–≤–∞,\n\n6. –ù–∞—Å—Ç–æ—è—â–µ–µ –°–æ–≥–ª–∞—à–µ–Ω–∏–µ —Å–æ—Å—Ç–∞–≤–ª–µ–Ω–æ –≤ –¥–≤—É—Ö –ø–æ–¥–ª–∏–Ω–Ω—ã—Ö —ç–∫–∑–µ–º–ø–ª—è—Ä–∞—Ö, –ø–æ –æ–¥–Ω–æ–º—É —ç–∫–∑–µ–º–ø–ª—è—Ä—É –¥–ª—è –∫–∞–∂–¥–æ–π –∏–∑\n–°—Ç–æ—Ä–æ–Ω, –∏–º–µ—é—â–∏—Ö –æ–¥–∏–Ω–∞–∫–æ–≤—É—é —é—Ä–∏–¥–∏—á–µ—Å–∫—É—é —Å–∏–ª—É, –∏ —è–≤–ª—è–µ—Ç—Å—è –Ω–µ–æ—Ç—ä–µ–º–ª–µ–º–æ–π —á–∞—Å—Ç—å—é –ö–æ–Ω—Ç—Ä–∞–∫—Ç–∞\n‚Ññ –°–¢-1383/22 –æ—Ç 31.01.2022 –≥.\n\n7. –ê–¥—Ä–µ—Å–∞, —Ä–µ–∫–≤–∏–∑–∏—Ç—ã –∏ –ø–æ–¥–ø–∏—Å–∏ —Å—Ç–æ—Ä–æ–Ω:\n\n–ó–∞–∫–∞–∑—á–∏–∫: –ì–ë–£–ó –õ–û ¬´–†–æ—â–∏–Ω—Å–∫–∞—è –ú–ë¬ª –ü–æ—Å—Ç–∞–≤—â–∏–∫: –û–û–û ¬´–ë–æ–ª—é—Å–º–µ–¥¬ª\n–Æ—Ä–∏–¥–∏—á–µ—Å–∫–∏–π –∏ –ø–æ—á—Ç–æ–≤—ã–π –∞–¥—Ä–µ—Å: 188855, –Æ—Ä–∏–¥–∏—á–µ—Å–∫–∏–π –∞–¥—Ä–µ—Å: 194295, –≥ –°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥,\n–õ–µ–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å, –í—ã–±–æ—Ä–≥—Å–∫–∏–π —Ä–∞–π–æ–Ω, —É–ª. –ò–≤–∞–Ω–∞ –§–æ–º–∏–Ω–∞, –¥. 7, –∫–æ—Ä–ø. 3, –∫–≤. 37\n\n–ø. –ü–µ—Ä–≤–æ–º–∞–π—Å–∫–æ–µ, —É–ª. –õ–µ–Ω–∏–Ω–∞, –¥.54 ¬´–ê¬ª –ü–æ—á—Ç–æ–≤—ã–π –∞–¥—Ä–µ—Å: 194064, –≥. –°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥,\n–¢–µ–ª: 8(81378)68-509, –µ-—Ç–∞–π: –≥—Ç–æ@—Ç—å–æ—Ö.–≥–∏ –ø—Ä. –†–∞–µ–≤—Å–∫–æ–≥–æ, –¥. 14, –∫–æ—Ä–ø. 2, –ª–∏—Ç–µ—Ä –ê, –ø–æ–º.8-–ù,\n–ò–ù–ù 4704047468 –ö–ü–ü 470401001 –∫–æ–º–Ω.4–ë,5–ê,5–ë (–æ–±–æ—Å–æ–±–ª–µ–Ω–Ω–æ–µ –ø–æ–¥—Ä–∞–∑–¥–µ–ª–µ–Ω–∏–µ)\n–û–ì–†–ù 1034700879212 –¢–µ–ª: (812) 703-50-98, –µ-—Ç–∞–π: —Ç@–ë–æ–∞–µ—Ç–µ–¥ –ø\n–ë–∞–Ω–∫–æ–≤—Å–∫–∏–µ —Ä–µ–∫–≤–∏–∑–∏—Ç—ã:\n\n–û—Ç–¥–µ–ª–µ–Ω–∏–µ –õ–µ–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–æ–µ –ë–∞–Ω–∫–∞ –†–æ—Å—Å–∏–∏// –ò–ù–ù 7802664190 –ö–ü–ò 780201001\n\n–£–§–ö –ø–æ –õ–µ–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–æ–π –æ–±–ª–∞—Å—Ç–∏ –≥. –°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä; –û–ì–†–ù 1187847123947 –æ—Ç 27.04.2018\n\n–ö–æ–º–∏—Ç–µ—Ç —Ñ–∏–Ω–∞–Ω—Å–æ–≤ –õ–µ–Ω–∏–Ω–≥—Ä–∞–¥—Å–∫–æ–π –æ–±–ª–∞—Å—Ç–∏ –ë–∞–Ω–∫–æ–≤—Å–∫–∏–µ —Ä–µ–∫–≤–∏–∑–∏—Ç—ã:\n\n(–ì–ë–£–ó –õ–û ¬´–†–æ—â–∏–Ω—Å–∫–∞—è –ú–ë¬ª –ª/—Å 2245620150) –ü–ê–û ¬´–ë–∞–Ω–∫ ¬´–°–∞–Ω–∫—Ç-–ü–µ—Ç–µ—Ä–±—É—Ä–≥¬ª\n\n–ë–ò–ö 014106101 –ë–ò–ö 044030790\n\n–†/—Å—á 03224643410000004500 –†/—Å—á 40702810490700001210\n\n–ö/—Å—á 40102810745370000006 –ö/—Å—á 30101810900000000790\n\n_ –ì–µ–Ω–µ—Ä–∞–ª—å–Ω—ã–π –¥–∏—Ä–µ–∫—Ç–æ—Ä\n–û–∑¬´–ë–æ–ª—é—Å–º–µ–¥¬ª\n–æ–æ\n\n/ –•.–í. –ë–µ–ª–æ–≤–∞'
'''

In [None]:
answer = ask_model(content)
print(f"ü§ñ AI: {answer}")

In [None]:
content = '''
–°–æ—Å—Ç–∞–≤—å —Å–ø–∏—Å–æ–∫ –≤—Å–µ—Ö –≤–∞–∂–Ω—ã—Ö –ø–∞—Ä–∞–º–µ—Ç—Ä–æ–≤
'''

In [None]:
answer = ask_model(content)
print(f"ü§ñ AI: {answer}")