In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel
import numpy as np



In [2]:
import os
import sys

# Get the absolute path of the current script's directory
current_dir = os.path.dirname(os.path.abspath("gemini2.5.ipynb"))

# Get the absolute path of the parent directory (project_folder)
parent_dir = os.path.dirname(current_dir)

# Add the parent directory to the Python path
sys.path.append(parent_dir)

# Now you can import from GetXY.py
from GetXY import expressions_not_in_x, outsideExpr, longer_Exps, y_test, out_y_test, long_y_test, x_string, y

x_test = expressions_not_in_x
out_x_test = outsideExpr
long_x_test = longer_Exps
# ... rest of your code
print("Successfully imported variables!")

2 - 1 + -2
2543
-1.0

Expressions not in x:
1 - -2 - -3
True
1457
6.0
15
-4.0
[-5.   1.   1.   0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5  0.5
  0.5]
Successfully imported variables!


In [3]:
tokenizer = AutoTokenizer.from_pretrained("./big_output")


In [4]:
# The base model ID, as indicated by the error message
base_model_id = "google/gemma-3-1b-it"

# Your fine-tuned adapter
adapter_id = "./big_output" 

# 1. Load the base model
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    dtype=torch.bfloat16,
    device_map="cuda",
    trust_remote_code=True
)

# 2. Load the Peft model and merge it with the base model
model = PeftModel.from_pretrained(model, adapter_id)
model = model.merge_and_unload()

# The 'model' variable now holds your fully fine-tuned model

In [5]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    print("added padding")

def encodePreds(x):
    out = []
    for i in x:
        messages = [
            {"role": "user", "content": i}
        ]
        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        out.append(tokenizer(prompt, return_tensors="pt").to(model.device))
    return out


In [6]:
inputs1 = encodePreds(x_test)
inputs2 = encodePreds(x_string)
inputs3 = encodePreds(out_x_test)
inputs4 = encodePreds(long_x_test)

In [7]:
def decodePreds(output):
    responses = []
    turn_separator = "<start_of_turn>model\n"
    decoded_preds = tokenizer.batch_decode(output, skip_special_tokens=False)
    for i in decoded_preds:
        pred_response = i.split(turn_separator, 1)[1]
        clean_pred = pred_response.replace("<end_of_turn>", "") \
                                  .replace("<pad>", "") \
                                  .replace("<start_of_turn>", "") \
                                  .replace("model", "") \
                                  .replace("\n", "") \
                                  .strip()
    
    #print(clean_pred)
    responses.append(clean_pred)
    
    return responses

In [8]:
responses1 = []
for input in inputs1:
    output = model.generate(
        **input,
        max_new_tokens=20, 
        do_sample=False,
        temperature=None, # Explicitly unset
        top_p=None,       # Explicitly unset
        top_k=None        # Explicitly unset
    )

    responses1.append(decodePreds(output))

In [9]:
responses2 = []
for input in inputs2:
    output = model.generate(
        **input,
        max_new_tokens=20, 
        do_sample=False,
        temperature=None, # Explicitly unset
        top_p=None,       # Explicitly unset
        top_k=None        # Explicitly unset
    )
    responses2.append(decodePreds(output))

In [10]:
responses3 = []
for input in inputs3:
    output = model.generate(
        **input,
        max_new_tokens=20, 
        do_sample=False,
        temperature=None, # Explicitly unset
        top_p=None,       # Explicitly unset
        top_k=None        # Explicitly unset
    )
    responses3.append(decodePreds(output))

In [11]:
responses4 = []
for input in inputs4:
    output = model.generate(
        **input,
        max_new_tokens=20, 
        do_sample=False,
        temperature=None, # Explicitly unset
        top_p=None,       # Explicitly unset
        top_k=None        # Explicitly unset
    )
    responses4.append(decodePreds(output))

In [12]:
print(responses1[:5])
print(responses2[:5])

[['2.0'], ['-2.0'], ['4.0'], ['1.0'], ['-11.0']]
[['-1.0'], ['-2.0'], ['-10.0'], ['-7.0'], ['-6.0']]


In [13]:
def safe_float(x):
    try:
        return float(x)
    except (ValueError, TypeError):
        return 0.0  # Default fallback; adjust as needed

def safe_str(x):
    try:
        return str(x)
    except (ValueError, TypeError):
        return ""  # Default fallback; adjust as needed

# Process responses1-4
float_preds1 = [safe_float(i[0]) if isinstance(i, (list, tuple)) and len(i) > 0 else 0.0 for i in responses1]
string_preds1 = [safe_str(i[0]) if isinstance(i, (list, tuple)) and len(i) > 0 else "" for i in responses1]

float_preds2 = [safe_float(i[0]) if isinstance(i, (list, tuple)) and len(i) > 0 else 0.0 for i in responses2]
string_preds2 = [safe_str(i[0]) if isinstance(i, (list, tuple)) and len(i) > 0 else "" for i in responses2]

float_preds3 = [safe_float(i[0]) if isinstance(i, (list, tuple)) and len(i) > 0 else 0.0 for i in responses3]
string_preds3 = [safe_str(i[0]) if isinstance(i, (list, tuple)) and len(i) > 0 else "" for i in responses3]

float_preds4 = [safe_float(i[0]) if isinstance(i, (list, tuple)) and len(i) > 0 else 0.0 for i in responses4]
string_preds4 = [safe_str(i[0]) if isinstance(i, (list, tuple)) and len(i) > 0 else "" for i in responses4]

# Process y_test and y
y_test_float = [safe_float(i) for i in y_test]
y_test_string = [safe_str(i) for i in y_test]

y_float = [safe_float(i) for i in y]
y_string = [safe_str(i) for i in y]

#this is part of the reason why using abs error in pretrained models with token outputs doesn't work.
#this is an improvement by ai on my code, to catch outlying tokens. This isn't the best way to do it because of the replacement with 0.0, but this is only going to be the case sometimes, so wont play that big a role.

In [14]:
out_diffs = []
long_diffs = []
out_acc = 0
long_acc = 0
for i in range(len(out_y_test)):
    diff = float(out_y_test[i]) - float_preds3[i]
    if diff == 0:
        out_acc += 1
    out_diffs.append(abs(diff))
out_mean_diff = np.mean(out_diffs)
out_acc = out_acc/len(out_y_test)
print(f"out abs error: {out_mean_diff}, accuracy: {out_acc}")

for i in range(len(long_y_test)):
    diff = float(long_y_test[i]) - float_preds4[i]
    if diff == 0:
        long_acc += 1
    long_diffs.append(abs(diff))
long_mean_diff = np.mean(long_diffs)
long_acc = long_acc/len(long_y_test)
print(f"long abs error: {long_mean_diff}, accuracy: {long_acc}")


out abs error: 2.19990234375, accuracy: 0.63330078125
long abs error: 2.400142857142857, accuracy: 0.39571428571428574


In [15]:
diffs = []
for i in range(len(y_test)):
    diff = y_test_float[i] - float_preds1[i]
    diffs.append(abs(diff))
print(np.mean(diffs))

0.2330130404941661


In [16]:
diffs = []
for i in range(len(y_float)):
    diff = y_float[i] - float_preds2[i]
    diffs.append(abs(diff))
print(np.mean(diffs))

0.2609123082972867


In [17]:
count = 0
for i in range(len(y_test_string)):
    if y_test_string[i] == string_preds1[i]:
        count += 1
print(count/len(y_test_string))

0.9341111873713109


In [18]:
count = 0
for i in range(len(y_string)):
    if y_string[i] == string_preds2[i]:
        count += 1
print(count/len(y_string))

0.939441604404247
