In [None]:
from google.colab import drive
drive.mount('/content/drive')
# Load model from Google Drive
base_path = "/content/drive/MyDrive/SmolLM2-1.7B-UltraChat_200k"

In [None]:
%pip install transformers
%pip install peft
%pip install accelerate
%pip install bitsandbytes

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftConfig
import torch

peft_model_path = f"{base_path}/final_model_1.7B"

model = AutoModelForCausalLM.from_pretrained(
    peft_model_path,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(peft_model_path)

device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)

In [None]:
user_message = "How far away is the sun?"
chatml_template = [
    {"role": "user", "content": user_message},
]
prompt = tokenizer.apply_chat_template(chatml_template, tokenize=False, add_generation_prompt=True)
print(prompt)
inputs = tokenizer(prompt, return_tensors="pt").to(device)  # Move inputs to same device as model

output = model.generate(
    **inputs,
    max_new_tokens=512,
    temperature=0.2,
    top_p=0.95,
    top_k=50,
    repetition_penalty=1.1,
    do_sample=True,
    pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(output[0], skip_special_tokens=True)
print(response)