 # Създаване на локален ChatBot с Llama-3

In [1]:
!pip install torch transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
!nvidia-smi

Fri Apr 25 11:25:06 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   39C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
from huggingface_hub import notebook_login

notebook_login() # hf_API_KEY

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [8]:
try:
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # pipeline in HF is a high-level API that simplifies using pre-trained models for NLP tasks like text generation, translation, and summarization.
except ModuleNotFoundError as e:
    print("Грешка: Необходимите библиотеки не са инсталирани. Моля, инсталирайте:")
    print("!pip install torch")
    print("!pip install transformers")
    raise e

# Модел (примерно Llama-3, ако е изтеглен локално или се използва от HF Hub)
MODEL_NAME = "meta-llama/Llama-3.1-8B"  # meta-llama/Llama-2-7b-chat-hf

def load_model(model_name):
    """Зарежда модела и токенизатора"""
    tokenizer = AutoTokenizer.from_pretrained(model_name) # Loads a pretrained tokenizer for the given model, handling tokenization tasks like converting text to tokens.
    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") # Loads a pretrained causal language model (for text generation) based on the given model name, ready for inference or fine-tuning.
    #torch_dtype=torch.float16: Loads the model with 16-bit floating-point precision (half-precision) to save memory and improve performance.
    #device_map="auto": Automatically assigns model layers to available devices (CPU/GPU) for optimized execution.

    return tokenizer, model

def generate_response(prompt, tokenizer, model, max_length=200): # Limits the generated text to 200 tokens
    """Генерира отговор от модела въз основа на подаден prompt"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Converts input text to PyTorch tensors ("pt") and moves them to the same device (CPU/GPU) as the model
    output = model.generate(**inputs, max_length=max_length, pad_token_id=tokenizer.eos_token_id) # **inputs: Unpacks the dictionary of tokenized input arguments for model.generate()
    return tokenizer.decode(output[0], skip_special_tokens=True).replace(". ", ".\n") # Added by the tokenizer like the tokens <s> / </s> → Start and end of a sentence, <mask> etc.

# Зареждане на модела и токенизатора
try:
    tokenizer, model = load_model(MODEL_NAME)
except Exception as e:
    print(f"Грешка при зареждане на модела: {e}")
    raise e

# чат
while True: # Inf loop
    user_input = input("Hooman: ")
    if user_input.lower() in ["изход", "exit", "quit"]:
        print("Чатът приключи.")
        break
    try:
        response = generate_response(user_input, tokenizer, model)
        print(f"Bot: {response}")
    except Exception as e:
        print(f"Грешка при генериране на отговор: {e}") # Who discovered electricity?


tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/826 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]



Hooman: Who discovered electricity?
Bot: Who discovered electricity? Electricity was discovered by Benjamin Franklin in 1752.
He discovered that lightning is a form of electricity by flying a kite during a thunderstorm and collecting the electricity in a Leyden jar.
He also discovered that electricity could be produced by rubbing certain materials together.
1 Who was the first person to discover electricity?
2 Who discovered electricity and when?
3 Who invented electricity first?
4 Who invented electricity in 1800?
5 Who invented electricity in 1850?
6 Who invented electricity in 1840?
7 Who invented electricity in 1859?
8 Who invented electricity in 1860?
9 Who invented electricity in 1880?
10 Who invented electricity in 1885?
11 Who invented electricity in 1887?
12 Who invented electricity in 1890?
13 Who invented electricity in 1900?
14 Who invented electricity in 1910?
15 Who invented electricity in 1920?
16 Who invented electricity in 1930?
17 Who invented
Hooman: is the earth fla