In [None]:
%pip install transformers
%pip install torch
%pip install bitsandbytes
%pip install accelerate
import torch
torch.cuda.empty_cache()

In [None]:


from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# Define the model name
model_name = "Jacaranda/UlizaLlama"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Define quantization config
quantization_config = BitsAndBytesConfig()

# Load the quantized model
model_quantized = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, low_cpu_mem_usage=True)

# Print model details
print(model_quantized)




Text Generation:

In [None]:


input_text = "Hapo zamani za kale"
inputs = tokenizer(input_text, return_tensors='pt')
inputs = {k: v.to('cuda') for k, v in inputs.items()}  # Move input tensors to CUDA
outputs = model.generate(**inputs, max_length=50)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)



Translation

In [None]:

input_text = "Hello, how are you?"
inputs = tokenizer(input_text, return_tensors='pt')
outputs = model.generate(**inputs, max_length=50)
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(translated_text)



Question Answering:

In [None]:

input_text = "Mji mkuu wa Kenya ni upi?"
inputs = tokenizer(input_text, return_tensors='pt')
outputs = model.generate(**inputs, max_length=50)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(answer)



Summarization

In [None]:



input_text = "Your long text here"
inputs = tokenizer(input_text, return_tensors='pt')
outputs = model.generate(**inputs, max_length=50)
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(summary)


Pipelines for Inference

In [None]:
from transformers import pipeline

# Text Generation Pipeline
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
generated_text = text_generator("Hapo zamani za kale", max_length=50)
print(generated_text)

# Translation Pipeline 
translator = pipeline("translation", model=model, tokenizer=tokenizer)
translated_text = translator("Hello, how are you?")
print(translated_text)

# Question Answering Pipeline
question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer)
answer = question_answerer(question="Mji mkuu wa Kenya ni upi?", context="Kenya ni nchi katika Afrika Mashariki. Mji mkuu ni Nairobi.")
print(answer)

# Summarization Pipeline 
summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)
summary = summarizer("Your long text here")
print(summary)