In [None]:
!pip install transformers torch accelerate bitsandbytes -q

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
import warnings

warnings.filterwarnings("ignore")

model_id = "mistralai/Mistral-7B-Instruct-v0.2"

print(f"Loading model: '{model_id}'...")

#Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

#Load Model (4-bit quantized)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",      # Auto-map to GPU
    torch_dtype=torch.float16,
    load_in_4bit=True       # Enable 4-bit quantization
)

print("Model loaded successfully.")

#Create Text Generation Pipeline
print("Creating chatbot pipeline...")
chatbot = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,  # Max tokens (words) to generate
    temperature=0.7      # Model creativity (0.1=factual, 0.9=creative)
)

print("--- MISTRAL 7B READY! ---")

In [None]:
def ask(question):
    print(f"\nYou: {question}")
    print("Just a sec...")

    # Create message list for Mistral-Instruct format
    messages = [{"role": "user", "content": question}]

    # Apply the correct chat template
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    # Get the full output from the pipeline
    output = chatbot(prompt)[0]['generated_text']

    # Extract only the answer (text after the [/INST] tag)
    answer = output.split("[/INST]")[-1].strip()

    return answer

# Test 1
question_1 = "An eraser and a pen cost $1.10 in total. The eraser costs $1.00 more than the pen. How much does the eraser cost?"
answer_1 = ask(question_1)
print(f"Mistral: {answer_1}")

# Test 2
question_2 = "8659-4350=?"
answer_2 = ask(question_2)
print(f"Mistral: {answer_2}")

# Test 3
question_3 = "Explain 'Science and Art' course with just four keywors."
answer_3 = ask(question_3)
print(f"Mistral: {answer_3}")