In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig
from peft import LoraConfig, TaskType

dev = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gemma1b_gen_config = GenerationConfig.from_pretrained("google/gemma-3-1b-it")
gemma1b_quant_config = BitsAndBytesConfig(load_in_4bit = True, bnb_4bit_compute_dtype = torch.bfloat16)

gemma1b_tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it")
gemma1b = AutoModelForCausalLM.from_pretrained("google/gemma-3-1b-it", quantization_config = gemma1b_quant_config).to(dev) 

config.json:   0%|          | 0.00/899 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/2.00G [00:00<?, ?B/s]

In [None]:
gemma_gen_config = GenerationConfig.from_pretrained("google/gemma-3-270m")
gemma_quant_config = BitsAndBytesConfig(load_in_4bit = True, bnb_4bit_compute_dtype = torch.bfloat16)

gemma3_270_tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-270m")
gemma3_270 = AutoModelForCausalLM.from_pretrained("google/gemma-3-270m", quantization_config = gemma_quant_config).to(dev)

In [6]:
from datasets import load_dataset
kuro_ds = load_dataset('Kurosawama/Translation_DPO_Llama-3.1-8B', split='train')
text = kuro_ds['chosen'][5][0]['content']

In [4]:
def generation(text_input, model, tokenizer):
    llm_input = tokenizer(text_input, return_tensors = 'pt').to(dev)
    outputs = model.generate(**llm_input, max_new_tokens = 250)
    answer = tokenizer.batch_decode(outputs, skip_special_tokens = True)[0]
    answer = answer[len(llm_input):]
    print(answer)

In [7]:
generation(text, gemma1b, gemma1b_tokenizer)

   Given a problem description and a question, the task is to parse the problem and the question into first-order logic formulars.
    The grammar of the first-order logic formular is defined as follows:
    1) logical conjunction of expr1 and expr2: expr1 ∧ expr2
    2) logical disjunction of expr1 and expr2: expr1 ∨ expr2
    3) logical exclusive disjunction of expr1 and expr2: expr1 ⊕ expr2
    4) logical negation of expr1: ¬expr1
    5) expr1 implies expr2: expr1 → expr2
    6) expr1 if and only if expr2: expr1 ↔ expr2
    7) logical universal quantification: ∀x
    8) logical existential quantification: ∃x
    A detailed example is shown next. Only answer with the premises, omit the explanation.
    --------------
    Problem:
    All people who regularly drink coffee are dependent on caffeine. People either regularly drink coffee or joke about being addicted to caffeine. No one who jokes about being addicted to caffeine is unaware that caffeine is a drug. Rina is either a student

In [4]:
llm_input = gemma3_270_tokenizer(text, return_tensors = 'pt').to(dev)
outputs = gemma3_270.generate(**llm_input, max_new_tokens = 100)
answer = gemma3_270_tokenizer.batch_decode(outputs, skip_special_tokens = True)[0]
answer = answer[len(llm_input):]

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it")
model = AutoModelForCausalLM.from_pretrained("google/gemma-3-1b-it")
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

In [8]:
test = """
∀x (Drinks(x) → Dependent(x)) ::: All people who regularly drink coffee are dependent on caffeine.
    ∀x (Drinks(x) ⊕ Jokes(x)) ::: People either regularly drink coffee or joke about being addicted to caffeine.
    ∀x (Jokes(x) → ¬Unaware(x)) ::: No one who jokes about being addicted to caffeine is unaware that caffeine is a drug. 
    (Student(rina) ∧ Unaware(rina)) ⊕ ¬(Student(rina) ∨ Unaware(rina)) ::: Rina is either a student and unaware that caffeine is a drug, or neither a student nor unaware that caffeine is a drug. 
    ¬(Dependent(rina) ∧ Student(rina)) → (Dependent(rina) ∧ Student(rina)) ⊕ ¬(Dependent(rina) ∨ Student(rina)) ::: If Rina is not a person dependent on caffeine and a student, then Rina is either a person dependent on caffeine and a student, or neither a person dependent on caffeine nor a student.
    """
print(len(test))

836
