**Mount drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

**Install unsloth**

In [None]:
%pip install unsloth

**Import Necessary Libraries**

In [None]:
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
import torch
from torch.cuda.amp import autocast

**Load the model**

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="/content/drive/MyDrive/checkpoint-729",
    max_seq_length=2048,
    dtype=torch.bfloat16,
    load_in_4bit=True,
)

model.eval()

**System prompt**

In [None]:
SYSTEM_PROMPT = """
Respond in the following format:
<reasoning>
</reasoning>
<answer>
</answer>
"""

user_query = """ Vad är avgörande vid vårdnadstvister?. You are a legal expert specializing in the Swedish legal system. Your task is to generate a well thought of response to the given legal question. Your response should also include a detailed chain-of-thought (CoT) annotation that explains step-by-step how you arrived at the answer. Your explanation should include:

1.⁠ ⁠*Identification of Key Legal Concepts:* Begin by identifying and explaining the main legal terms and principles involved.
2.⁠ ⁠*Explanation of Relevant Statutes:* Don't list the relevant section/s (e.g., Section 29 of the Contracts Act) and describe their legal significance.
3.⁠ ⁠*Logical Reasoning Steps:* Outline the reasoning process in a clear, logical progression. Explain how the legal principles connect to the conclusion.
4.⁠ ⁠*Consideration of Exceptions or Nuances:* If applicable, note any potential exceptions or further nuances that could affect the interpretation.
5.⁠ ⁠*Summary Conclusion:* Conclude with a brief summary that ties together the reasoning and justifies the final answer.

Ensure that your response including reasoning as well as your answer, are in Swedish. Your response should be structured, clear, and use numbered or bullet points if necessary. Ensure that your explanation is comprehensive and would be understandable to someone with a legal background.

Do not include any extraneous commentary; focus solely on the reasoning process.

Your entire response must be written in correct, legal Swedish. Use a formal tone appropriate for Swedish legal professionals and legal academics. Do not use English at any point.
"""
input_prompt = f"{SYSTEM_PROMPT}\n\n{user_query}"

**Tokenize**

In [None]:
inputs = tokenizer(
    input_prompt,
    return_tensors="pt",
    padding=True,
    truncation=True,
    max_length=2048
)
inputs = {k: v.to("cuda") for k, v in inputs.items()}

**Run evaluation**

In [None]:
with torch.no_grad(), torch.amp.autocast("cuda", dtype=torch.float16):
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=2048,
        do_sample=True,
        temperature=0.05,
        top_p=0.95,
        top_k=40,
        repetition_penalty=1.1,
    )

**Decode response and print**

In [None]:
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)