In [1]:
#dansk bibel                            "bla bla (originalt danski)" 
#engelsk bibel "bla bla" steering homie "bla bla (men dansk)"
from huggingface_hub import hf_hub_download
from utils.steering import generate_with_steering
from transformers import AutoModelForCausalLM
import fasttext
import torch
import os
from classes.datahandling import ParallelNSPDataset
from utils.probe_confidence_intervals import model_setup
model, tokenizer, device = model_setup("AI-Sweden-Models/gpt-sw3-356m")

model_name = "nb-nordic-lid.ftz"
language_prediction_model = fasttext.load_model(hf_hub_download("NbAiLab/nb-nordic-lid", model_name))

label, score = language_prediction_model.predict("Harry Potter är en serie fantasyromaner av författaren J.K. Rowling, som började ges", threshold=0.25)
label[0].split("__")[-1], score
bible_data = ParallelNSPDataset.from_tmx("data/bible-da-en.tmx","da","en")
bible_data[0]

found device: cpu


{'da': ('I Begyndelsen skabte Gud Himmelen og Jorden.',
  'Og Jorden var øde og tom, og der var Mørke over Verdensdybet. Men Guds Ånd svævede over Vandene.'),
 'en': ('In the beginning God created the heavens and the earth.',
  "Now the earth was formless and empty. Darkness was on the surface of the deep. God's Spirit was hovering over the surface of the waters.")}

In [2]:

def load_targeted_steering_vectors(steering_vector_path: str) -> tuple[dict,dict,dict]:
    """loads steering vectors that are targeted towards a language. 
    it returns the target, complement and combined, with combined = target - complement

    Args:
        steering_vector_path (str): some path

    Returns:
        tuple[dict,dict,dict]: target, complement, combined
    """
    combined = dict()
    complement = dict()
    target = dict()
    for vector in os.listdir(steering_vector_path):
        type = vector.split("_")[0]
        layer = vector.split("_")[4]
        if type == "combined":
            combined[int(layer)] = torch.load(str(steering_vector_path +vector))
        elif type == "complement":
            complement[int(layer)] = torch.load(str(steering_vector_path + vector))
        elif type == "target":
            target[int(layer)] = torch.load(str(steering_vector_path +vector))
    return target, complement, combined

steering_vector_path = "steering_vectors/test_run_2/"

target, complement, combined = load_targeted_steering_vectors(steering_vector_path)



  complement[int(layer)] = torch.load(str(steering_vector_path + vector))
  target[int(layer)] = torch.load(str(steering_vector_path +vector))
  combined[int(layer)] = torch.load(str(steering_vector_path +vector))


In [3]:
combined[15]

tensor([-0.0917,  0.2917, -0.0411,  ...,  0.1085, -0.4137, -0.0248])

In [None]:
#Inserts a steering vector and shifts the model towards that direction. 
def gen_outputs(bible_data:ParallelNSPDataset, 
                language_1:str,
                language_2:str, 
                bible_index:int, 
                layer:int,
                steering_vector:torch.Tensor,
                steering_lambda:int,
                model:AutoModelForCausalLM) -> tuple:
    """Inserts a steering vector and shifts the model towards that direction. 
    If we want to shift a model from example english to danish, then we set language_1 = "da" and language_2 = "en"
    Additionally the steering vector should be the one steering towards danish.

    Args:
        bible_data (ParallelNSPDataset): dataset with bible data
        language_1 (str): the language you want to steer towards
        language_2 (str): the language you steer away from
        bible_index (int): index of a given verse in the bible
        layer (int): layer of the model where you want to insert the steering vector
        steering_vector (torch.Tensor): the steering vector
        steering_lambda (int): the strenght of the steering vector
        model (AutoModelForCausalLM): the model you want to use

    Returns:
        tuple: _description_
    """


    language_1_prompt = bible_data[bible_index][language_1][0].lower()
    language_1_true_bible_verse = bible_data[bible_index][language_1][1]
    
    language_2_prompt = bible_data[bible_index][language_2][0].lower()
    language_2_true_bible_verse = bible_data[bible_index][language_2][1]
    
    input_ids = tokenizer(language_1_prompt, return_tensors="pt")["input_ids"]
    generated_token_ids = model.generate(inputs=input_ids, max_new_tokens=30, do_sample=True)[0]
    language_1_predicted_bible_verse = tokenizer.decode(generated_token_ids)[len(language_1_prompt):]
    
    language_2_predicted_bible_verse = generate_with_steering(model,tokenizer,layer,language_2_prompt,steering_vector[layer], steering_lambda= steering_lambda)
    language_2_predicted_bible_verse = language_2_predicted_bible_verse[0][len(language_2_prompt):]
    
    return language_1_predicted_bible_verse, language_2_predicted_bible_verse, language_1_true_bible_verse,language_2_true_bible_verse

In [19]:
danish_predicted_output_list = []
english_predicted_output_list = []
danish_true_label_list = []
english_true_label = []

In [None]:
layer = 15
danish_predicted_output, english_predicted_output, danish_true_label,english_true_label = gen_outputs(bible_data, "da","en",50,layer,combined,5, model)

In [21]:
danish_true_label

'Så lod Gud HERREN Dvale falde over Adam, og da han var sovet ind, tog han et af hans Ribben og lukkede med Kød i dets Sted;'

In [22]:
danish_predicted_output

' I denne synd har hans tjener Adams søn syndet. Jesus Kristus har et stort ansvar for de mennesker, der kommer i hans efterfølgelse, for'

In [23]:
english_true_label

'Yahweh God caused a deep sleep to fall on the man, and he slept; and he took one of his ribs, and closed up the flesh in its place.'

In [26]:
english_predicted_output

'  6:12-13: "Men skal ikke blive til jord, men til himmel, men til støv, og til sten og til støv; men til himmel skal de blive, og til jord skal de blive til støv, og til sten skal de blive til støv, og til støv skal de'