# Using LLMs

## Transformer

In [None]:
! pip install transformers

**Authentication**

To use transoformer models you must use your HuggingFace API_key!

In [2]:
from transformers import pipeline

pipe = pipeline("text-generation", model="ministral/Ministral-3b-instruct")
messages = [
    {"role": "system", "content": "You are a funny standup comedian, ready to make jokes at every moment!"},
    {"role": "user", "content": "Tell me what is AI and what are it's capabalities?"},
]

pipe(messages)

config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/2.97G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/698M [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/2.96G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/510 [00:00<?, ?B/s]

Device set to use cuda:0


[{'generated_text': [{'role': 'system',
    'content': 'You are a funny standup comedian, ready to make jokes at every moment!'},
   {'role': 'user',
    'content': "Tell me what is AI and what are it's capabalities?"},
   {'role': 'assistant',
    'content': 'Ah, AI! A man man\'s dream of making a computer...\n\nWhat is it? Well, let me tell you about a man man, Alex.\n\nAlex, who is about to be his best of self and their company.\n\nThey say...\n\n"Artificial intelligence is an algorithm-based virtual intelligence system that can mimic human cognition and decision making."\n\nBut how doesn\'t it perform? Well, Alex says...\n\n"You see, Alex is a chatbot that can answer questions like questions without actually talking. He can also use the AI-based language model to answer questions.\n\nBut we can use this AI to help us make better decisions, like how to make it easier to find a dog doggy, or maybe a joke about pizza."\n\nAnd here\'s what they think about the AI:\n\n"AI is the ultimat

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct")
model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-4-mini-instruct")

messages = [
    {"role": "system", "content": "You are a funny standup comedian, ready to make jokes at every moment!"},
    {"role": "user", "content": "Tell me what is AI and what are it's capabalities?"},
]

inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,
    tokeinzer=True,
    return_dict=True,
    return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:]))

### BERT

In [7]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [8]:
from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForSequenceClassification

model_name = "bert-base-uncased"

config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
bert = AutoModel.from_pretrained(model_name)
bert.config.pad_token_id

bert = bert.to(device)

In [9]:
def split_into_chunks(text,max_length=512):
  tokens = tokenizer.tokenize(text)
  chuncks = []
  for i  in range(0,len(tokens),max_length):
    chunck = tokens[i:i + max_length]
    chuncks.append(tokenizer.convert_tokens_to_string(chunck))

  return chuncks

In [10]:
def embed_document(document):
  chunks = split_into_chunks(document)
  embeddings = []
  for chunk in chunks:
    inputs = tokenizer(chunk, return_tensors="pt", truncation=True,padding = True , max_length=512)
    inputs = {key: value.to(device) for key, value in inputs.items()}  # Move inputs to GPU
    with torch.no_grad():
      output = bert(**inputs)
      chunk_embedding = output.last_hidden_state.mean(dim=1)
      embeddings.append(chunk_embedding)
  document_embedding = torch.mean(torch.stack(embeddings), dim=0)
  return document_embedding

## LiteLLM

In [None]:
! pip install litellm

**Authentication**

- You must use your service provider key to access models!
- For free access to LLMs you can create an account in https://console.groq.com and use limited tokens daily for various models.

In [4]:
from litellm import completion
import os

os.environ['GROQ_API_KEY'] = "API_key"
response = completion(
    model="groq/llama3-8b-8192",
    messages=[
       {"role": "user", "content": "Hello there!"}
   ],
)

print(response)

ModelResponse(id='chatcmpl-a8911710-b289-4762-af63-09a4295ad52a', created=1754773207, model='llama3-8b-8192', object='chat.completion', system_fingerprint='fp_0fb809dba3', choices=[Choices(finish_reason='stop', index=0, message=Message(content="Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?", role='assistant', tool_calls=None, function_call=None, provider_specific_fields=None))], usage=Usage(completion_tokens=26, prompt_tokens=13, total_tokens=39, completion_tokens_details=None, prompt_tokens_details=None, queue_time=0.056553721, prompt_time=0.002328818, completion_time=0.021215335, total_time=0.023544153), usage_breakdown=None, x_groq={'id': 'req_01k28a4dz5evfra572p5yek300'}, service_tier='auto')
