# Imports

In [1]:
!pip install accelerate -q
!pip install bitsandbytes -q
!pip install -q gdown

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m46.5 MB/s[0m eta [36m0:00:00[0m

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m

[?25h

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd
import os
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm

In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig
import transformers
from safetensors.torch import load_file
from collections import defaultdict

# Config

In [None]:
model_size = '8b'
single_token_only = False
multi_token_only = False
run_test_mode = False

input_languages = ['English', 'Spanish', 'Chinese', 'Greek', 'Turkish', 'Ukrainian', 'French', 'Russian', 'Korean', 'Hungarian', 'Catalan', 'Persian', 'Dutch', 'Japanese', 'Vietnamese', 'Hebrew', 'Arabic']
target_languages = ['English', 'Spanish', 'Chinese', 'Greek', 'Turkish', 'Ukrainian', 'French', 'Russian', 'Korean', 'Hungarian', 'Catalan', 'Persian', 'Dutch', 'Japanese', 'Vietnamese', 'Hebrew', 'Arabic']

# input_languages = ['English', 'Spanish', 'Chinese', 'Greek']  # 1
# input_languages = ['Turkish', 'Ukrainian', 'French', 'Russian']  # 2
# input_languages = ['Korean', 'Hungarian', 'Catalan', 'Persian']  # 3
# input_languages = ['Dutch', 'Japanese', 'Vietnamese', 'Hebrew', 'Arabic']  # 4


# Llama Wrapper

In [6]:
class AttnWrapper(torch.nn.Module):
    def __init__(self, attn):
        super().__init__()
        self.attn = attn
        self.activations = None
        self.add_tensor = None
        self.act_as_identity = False
    #https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L422
    def forward(self, *args, **kwargs):
        if self.act_as_identity:
            #print(kwargs)
            kwargs['attention_mask'] += kwargs['attention_mask'][0, 0, 0, 1]*torch.tril(torch.ones(kwargs['attention_mask'].shape,
                                                                                                   dtype=kwargs['attention_mask'].dtype,
                                                                                                   device=kwargs['attention_mask'].device),
                                                                                        diagonal=-1)
        output = self.attn(*args, **kwargs)
        if self.add_tensor is not None:
            output = (output[0] + self.add_tensor,)+output[1:]
        self.activations = output[0]
        return output

    def reset(self):
        self.activations = None
        self.add_tensor = None
        self.act_as_identity = False

class MLPWrapper(torch.nn.Module):
    def __init__(self, mlp):
        super().__init__()
        self.mlp = mlp
        self.up_proj = mlp.up_proj
        self.gate_proj = mlp.gate_proj
        self.act_fn = mlp.act_fn
        self.down_proj = mlp.down_proj
        self.neuron_interventions = {}
        self.post_activation = None

    def forward(self, x):
        post_activation = self.act_fn(self.gate_proj(x)) * self.up_proj(x)
        self.post_activation = post_activation.detach().cpu()
        output = self.down_proj(post_activation)
        if len(self.neuron_interventions) > 0:
            print('performing intervention: mlp_neuron_interventions')
            for neuron_idx, mean in self.neuron_interventions.items():
                output[:, :, neuron_idx] = mean
        return output

    def reset(self):
        self.neuron_interventions = {}

    def freeze_neuron(self, neuron_idx, mean):
        self.neuron_interventions[neuron_idx] = mean

class BlockOutputWrapper(torch.nn.Module):
    def __init__(self, block, unembed_matrix, norm):
        super().__init__()
        self.block = block
        self.unembed_matrix = unembed_matrix
        self.norm = norm

        self.block.self_attn = AttnWrapper(self.block.self_attn)
        self.block.mlp = MLPWrapper(self.block.mlp)
        self.post_attention_layernorm = self.block.post_attention_layernorm

        self.attn_mech_output_unembedded = None
        self.intermediate_res_unembedded = None
        self.mlp_post_activation = None # these are the hidden neurons of the MLP
        self.mlp_output_unembedded = None
        self.block_output_unembedded = None
        self.add_to_last_tensor = None
        self.output = None
        self.output_normalized = None

    def forward(self, *args, **kwargs):
        output = self.block(*args, **kwargs)
        if self.add_to_last_tensor is not None:
            print('performing intervention: add_to_last_tensor')
            output[0][:, -1, :] += self.add_to_last_tensor
        self.output = output[0]
        self.output_normalized = self.norm(output[0].to(self.norm.weight.device))
        self.block_output_unembedded = self.unembed_matrix(self.output_normalized.to(self.unembed_matrix.weight.device))
        attn_output = self.block.self_attn.activations
        self.attn_mech_output_unembedded = self.unembed_matrix(self.norm(attn_output.to(self.unembed_matrix.weight.device)))
        attn_output += args[0].to(attn_output.device)
        self.intermediate_res_unembedded = self.unembed_matrix(self.norm(attn_output))
        mlp_output = self.block.mlp(self.post_attention_layernorm(attn_output.to(self.post_attention_layernorm.weight.device)))
        self.mlp_post_activation = self.block.mlp.post_activation
        self.mlp_output_unembedded = self.unembed_matrix(self.norm(mlp_output.to(self.unembed_matrix.weight.device)))
        return output

    def mlp_freeze_neuron(self, neuron_idx, mean):
        self.block.mlp.freeze_neuron(neuron_idx, mean)

    def block_add_to_last_tensor(self, tensor):
        self.add_to_last_tensor = tensor

    def attn_add_tensor(self, tensor):
        self.block.self_attn.add_tensor = tensor

    def reset(self):
        self.block.self_attn.reset()
        self.block.mlp.reset()
        self.add_to_last_tensor = None

    def get_attn_activations(self):
        return self.block.self_attn.activations

In [7]:
class LlamaHelper:
    def __init__(self, dir, hf_token=None, device=None, load_in_8bit=True, use_embed_head=False):
        quantization_config = BitsAndBytesConfig(load_in_8bit=load_in_8bit)
        if device is None:
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
        else:
            self.device = device
        self.tokenizer = AutoTokenizer.from_pretrained(dir, token=hf_token)
        self.model = AutoModelForCausalLM.from_pretrained(
            dir,
            token=hf_token,
            device_map='auto',
            quantization_config=quantization_config
        )
        self.use_embed_head = True
        W = list(self.model.model.embed_tokens.parameters())[0].detach()
        self.head_embed = torch.nn.Linear(W.size(1), W.size(0), bias=False)
        self.head_embed.to(W.dtype)
        self.norm = self.model.model.norm
        with torch.no_grad():
            self.head_embed.weight.copy_(W)
        self.head_embed.to(self.model.device)
        self.head_unembed = self.model.lm_head
        #self.model = self.model.to(self.device)
        self.device = next(self.model.parameters()).device
        if use_embed_head:
            head = self.head_embed
        else:
            head = self.head_unembed
        for i, layer in enumerate(self.model.model.layers):
            self.model.model.layers[i] = BlockOutputWrapper(layer, head, self.model.model.norm)


    def set_embed_head(self):
        self.use_embed_head = True
        for i, layer in enumerate(self.model.model.layers):
            self.model.model.layers[i].unembed_matrix = self.head_embed

    def set_unembed_head(self):
        self.use_embed_head = False
        for i, layer in enumerate(self.model.model.layers):
            self.model.model.layers[i].unembed_matrix = self.head_unembed

    def generate_text(self, prompt, max_length=100):
        inputs = self.tokenizer(prompt, return_tensors="pt")
        generate_ids = self.model.generate(inputs.input_ids.to(self.device), max_length=max_length)
        return self.tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]


    def generate_intermediate_text(self, layer_idx, prompt, max_length=100, temperature=1.0):
        layer = self.model.model.layers[layer_idx]
        for _ in range(max_length):
            self.get_logits(prompt)
            next_id = self.sample_next_token(layer.block_output_unembedded[:,-1], temperature=temperature)
            #next_token = self.tokenizer.decode(next_id)
            prompt = self.tokenizer.decode(self.tokenizer.encode(prompt)[1:]+[next_id])
            if next_id == self.tokenizer.eos_token_id:
                break
        return prompt

    def sample_next_token(self, logits, temperature=1.0):
        assert temperature >= 0, "temp must be geq 0"
        if temperature == 0:
            return self._sample_greedy(logits)
        return self._sample_basic(logits/temperature)

    def _sample_greedy(self, logits):
        return logits.argmax().item()

    def _sample_basic(self, logits):
        return torch.distributions.categorical.Categorical(logits=logits).sample().item()

    def get_logits(self, prompt):
        inputs = self.tokenizer(prompt, return_tensors="pt")
        with torch.no_grad():
          logits = self.model(inputs.input_ids).logits
          return logits

    def set_neuron_intervention(self, layer_idx, neuron_idx, mean):
        self.model.model.layers[layer_idx].mlp_freeze_neuron(neuron_idx, mean)

    def set_add_attn_output(self, layer, add_output):
        self.model.model.layers[layer].attn_add_tensor(add_output)

    def get_attn_activations(self, layer):
        return self.model.model.layers[layer].get_attn_activations()

    def set_add_to_last_tensor(self, layer, tensor):
      print('setting up intervention: add tensor to last soft token')
      self.model.model.layers[layer].block_add_to_last_tensor(tensor)

    def reset_all(self):
        for layer in self.model.model.layers:
            layer.reset()

    def print_decoded_activations(self, decoded_activations, label):
        softmaxed = torch.nn.functional.softmax(decoded_activations[0][-1], dim=-1)
        values, indices = torch.topk(softmaxed, 10)
        probs_percent = [int(v * 100) for v in values.tolist()]
        tokens = self.tokenizer.batch_decode(indices.unsqueeze(-1))
        print(label, list(zip(indices.detach().cpu().numpy().tolist(), tokens, probs_percent)))

    def logits_all_layers(self, text, return_attn_mech=False, return_intermediate_res=False, return_mlp=False, return_block=True):
        res = defaultdict(list)
        self.get_logits(text)
        for i, layer in enumerate(self.model.model.layers):
            if return_block:
                res['block_2'] += [layer.block_output_unembedded.detach().cpu()]
            if return_attn_mech:
                res['attn'] += [layer.attn_mech_output_unembedded.detach().cpu()]
            if return_intermediate_res:
                res['block_1'] += [layer.intermediate_res_unembedded.detach().cpu()]
            if return_mlp:
                res['mlp'] += [layer.mlp_output_unembedded.detach().cpu()]
        for k,v in res.items():
            res[k] = torch.cat(v, dim=0)
        if len(res) == 1:
            return list(res.values())[0]
        return res

    def latents_all_layers(self, text, return_attn_mech=False, return_intermediate_res=False, return_mlp=False, return_mlp_post_activation=False, return_block=True):
        if return_attn_mech or return_intermediate_res or return_mlp:
            raise NotImplemented("not implemented")
        self.get_logits(text)
        tensors = []
        if return_block:
            for i, layer in enumerate(self.model.model.layers):
                tensors += [layer.output.detach().cpu()]
        elif return_mlp_post_activation:
            for i, layer in enumerate(self.model.model.layers):
                tensors += [layer.mlp_post_activation.detach().cpu()]
        return torch.cat(tensors, dim=0)

    def decode_all_layers(self, text, topk=10, print_attn_mech=True, print_intermediate_res=True, print_mlp=True, print_block=True):
        print('Prompt:', text)
        self.get_logits(text)
        for i, layer in enumerate(self.model.model.layers):
            print(f'Layer {i}: Decoded intermediate outputs')
            if print_attn_mech:
                self.print_decoded_activations(layer.attn_mech_output_unembedded, 'Attention mechanism')
            if print_intermediate_res:
                self.print_decoded_activations(layer.intermediate_res_unembedded, 'Intermediate residual stream')
            if print_mlp:
                self.print_decoded_activations(layer.mlp_output_unembedded, 'MLP output')
            if print_block:
                self.print_decoded_activations(layer.block_output_unembedded, 'Block output')

# Main Process

In [8]:
# random seed
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f7ea0b3e5f0>

In [10]:
if model_size.lower() in ['7b', '13b', '70b']:
  custom_model = 'meta-llama/Llama-2-%s-hf'%model_size
  model_name = 'llama2'
elif model_size.lower() in ['8b']:
  custom_model = 'meta-llama/Meta-Llama-3-%s'%model_size.upper()
  model_name = 'llama3'
else:
  raise ValueError('model_size must match one of Llama-2 or Llama-3 family.')

if "Llama-2" in custom_model:
    hf_token = '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'
elif "Llama-3" in custom_model:
    hf_token = "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"
else:
    raise ValueError('custom model must be Llama-2 or Llama-3')

if single_token_only and multi_token_only:
    raise ValueError('single_token_only and multi_token_only cannot be True at the same time')


## Upload csv

In [11]:
file_name = "./translation_inference.csv"

if not os.path.exists(file_name):
    !gdown 1Bg9PjAuQyOe61CI54IEGlPYbsWLONSps -O $file_name

df = pd.read_csv(file_name)
df.columns = df.columns.str.replace(r"\s*\(.*\)\s*", "", regex=True)

Downloading...

From: https://drive.google.com/uc?id=1Bg9PjAuQyOe61CI54IEGlPYbsWLONSps

To: /content/translation_inference.csv

100% 35.8k/35.8k [00:00<00:00, 82.6MB/s]


In [12]:
df

Unnamed: 0,English,Spanish,Chinese,Greek,Turkish,Ukrainian,French,Russian,Korean,Hungarian,Catalan,Persian,Dutch,Japanese,Vietnamese,Hebrew,Arabic
0,book,libro,书\r,Βιβλίο,kitap,книга,livre,книга,책,könyv,llibre,کتاب,boek,本,sách,ספר,كتاب
1,cloud,nube,云\r,σύννεφο,bulut,хмара,nuage,облако,구름,felhő,núvol,ابر,wolk,雲,mây,ענן,سحابة
2,bag,bolsa,包\r,τσάντα,çanta,сумка,sac,сумка,가방,táska,bossa,کیف,tas,バッグ,túi,תיק,حقيبة
3,mouth,boca,嘴,στόμα,ağız,рот,bouche,рот,입,száj,boca,دهان,mond,口,miệng,פה,فم
4,soil,suelo,土壤,έδαφος,toprak,ґрунт,sol,почва,흙,talaj,sòl,خاک,grond,土,đất,אדמה,تربة
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,telephone,teléfono,电话,τηλέφωνο,telefon,телефон,téléphone,телефон,전화,telefon,telèfon,تلفن,telefoon,電話,điện thoại,טלפון,هاتف
246,tree,árbol,树\r,δέντρο,ağaç,дерево,arbre,дерево,나무,fa,arbre,درخت,boom,木,cây,עץ,شجرة
247,insect,insecto,昆虫,έντομο,böcek,комаха,insecte,насекомое,곤충,rovar,insecte,حشره,insect,昆虫,côn trùng,חרק,حشرة
248,lemon,limón,柠檬,λεμόνι,limon,лимон,citron,лимон,레몬,citrom,llimona,لیمو,citroen,レモン,chanh,לימון,ليمون


## Preparing Llama

In [13]:
if custom_model is not None:
    llama = LlamaHelper(dir=custom_model, load_in_8bit=True, hf_token=hf_token)
else:
    raise ValueError('Please set the custom model')


The secret `HF_TOKEN` does not exist in your Colab secrets.

To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.

You will be able to reuse this secret in all of your notebooks.

Please note that authentication is recommended but still optional to access public models or datasets.



tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [14]:
unemb = nn.Sequential(llama.model.model.norm, llama.model.lm_head)
print(unemb)
# prepare for energy plots
U = list(unemb[1].parameters())[0].detach().cpu().float()
weights = list(unemb[0].parameters())[0].detach().cpu().float()
print(f'U {U.shape} weights {weights.unsqueeze(0).shape}')
U_weighted = U.clone()
#U_weighted = U_weighted / ((U_weighted**2).mean(dim=1, keepdim=True))**0.5
U_weighted *= weights.unsqueeze(0)
U_normalized = U_weighted / ((U_weighted**2).sum(dim=1, keepdim=True))**0.5
v = U.shape[0]
TT = U_normalized.T @ U_normalized
avgUU = (((TT)**2).sum() / v**2)**0.5
print(avgUU.item())

Sequential(

  (0): LlamaRMSNorm()

  (1): Linear(in_features=4096, out_features=32000, bias=False)

)

U torch.Size([32000, 4096]) weights torch.Size([1, 4096])

0.05862168222665787


In [15]:
tokenizer = llama.tokenizer
model = llama.model

## Prediction

### Get tokenization

In [16]:
def token_prefixes(token_str: str):
    n = len(token_str)
    tokens = [token_str[:i] for i in range(1, n+1)]
    return tokens

def add_spaces(tokens):
    return ['▁' + t for t in tokens] + tokens

def capitalizations(tokens):
    unique_tokens = list(set(tokens))
    return unique_tokens

def unicode_prefix_tokid(zh_char = "云", tokenizer=tokenizer):
    start = zh_char.encode().__str__()[2:-1]
    if '\\x' in start:
      start = start.split('\\x')[1]
    else:
      return None
    unicode_format = '<0x%s>'
    start_key = unicode_format%start.upper()
    if start_key in tokenizer.get_vocab():
        return tokenizer.get_vocab()[start_key]
    return None

def process_tokens(token_str: str, tokenizer, lang):
    with_prefixes = token_prefixes(token_str)
    with_spaces = add_spaces(with_prefixes)
    with_capitalizations = capitalizations(with_spaces)
    final_tokens = []
    for tok in with_capitalizations:
        if tok in tokenizer.get_vocab():
            final_tokens.append(tokenizer.get_vocab()[tok])
    tokid = unicode_prefix_tokid(token_str, tokenizer)
    if tokid is not None:
        final_tokens.append(tokid)
    return final_tokens

id2voc = {id: voc for voc, id in tokenizer.get_vocab().items()}

def get_tokens(token_ids, id2voc=id2voc):
    return [id2voc[tokid] for tokid in token_ids]

def compute_entropy(probas):
    return (-probas * torch.log2(probas)).sum(dim=-1)

In [17]:
def sample(df, ind, lang1, lang2, k=5, tokenizer=tokenizer, lang_latent='English'):
    df = df.reset_index(drop=True)
    temp = df[df.index != ind]
    sample = pd.concat([temp.sample(k-1), df[df.index == ind]], axis=0)
    prompt = ""
    for idx, (df_idx, row) in enumerate(sample.iterrows()):
        if idx < k-1:
            prompt += f'{lang1}: "{row[lang1]}" - {lang2}: "{row[lang2]}"\n'
            continue
        prompt += f'{lang1}: "{row[lang1]}" - {lang2}: "'
        in_token_str = row[lang1]
        out_token_str = row[lang2]
        out_token_id = process_tokens(out_token_str, tokenizer, lang2)
        latent_token_str = row[lang_latent]
        latent_token_id = process_tokens(latent_token_str, tokenizer, "English")
        intersection = set(out_token_id).intersection(set(latent_token_id))
        if len(out_token_id) == 0 or len(latent_token_id) == 0:
            yield None
        # if len(intersection) > 0:
        #     yield None
        yield {'prompt': prompt,
                'out_token_id': out_token_id,
                'out_token_str': out_token_str,
                'latent_token_id': latent_token_id,
                'latent_token_str': latent_token_str,
                'in_token_str': in_token_str}


### Inference model

In [44]:
def get_model_inference(prepared_dataset):
    latent_token_probs = []
    out_token_probs = []
    entropy = []
    energy = []
    latents_all = []

    # _itr = tqdm(prepared_dataset, desc="Inferencing model", leave=False, position=2)

    for idx, d in enumerate(prepared_dataset):
        latents = llama.latents_all_layers(d['prompt'])
        logits = unemb(latents)
        last = logits[:, -1, :].float().softmax(dim=-1).detach().cpu()
        latent_token_probs += [last[:, torch.tensor(d['latent_token_id'])].sum(dim=-1)]
        out_token_probs += [last[:, torch.tensor(d['out_token_id'])].sum(dim=-1)]
        entropy += [compute_entropy(last)]
        latents_all += [latents[:, -1, :].float().detach().cpu().clone()]
        latents_normalized = latents[:, -1, :].float()
        latents_normalized = latents_normalized / (((latents_normalized**2).mean(dim=-1, keepdim=True))**0.5)
        latents_normalized /= (latents_normalized.norm(dim=-1, keepdim=True))
        norm = ((U_normalized @ latents_normalized.T)**2).mean(dim=0)**0.5
        energy += [norm/avgUU]

    latent_token_probs = torch.stack(latent_token_probs)
    out_token_probs = torch.stack(out_token_probs)
    entropy = torch.stack(entropy)
    energy = torch.stack(energy)
    latents = torch.stack(latents_all)

    return {
        "latent_token_probs": latent_token_probs.tolist(),
        "out_token_probs": out_token_probs.tolist(),
        "entropy": entropy.tolist(),
        "energy": energy.tolist(),
#         "latents_all": latents.tolist(),
    }


### Run Experiment

In [45]:
import json

def execute_sample(df, input_language, target_language):
    dataset = []
    # _itr = tqdm(range(len(df)), desc=f'{input_language} => {target_language}', leave=False, position=2)
    for ind in range(len(df)):
      result = next(sample(df, ind, lang1=input_language, lang2=target_language))
      if result is None:
        continue
      else:
        dataset.append(result)
    return dataset

def get_results(df ,input_languages, target_languages, model_name, latent_lang = 'English'):
  input_lang_itr = tqdm(input_languages, desc="Input Languages", leave=False, position=0)
  for input_lang in input_lang_itr:
      input_lang_itr.set_postfix(input_lang=input_lang)

      target_lang_itr = tqdm(target_languages, desc="Target Languages", leave=False, position=1)
      for target_lang in target_lang_itr:
          input_lang_itr.set_description(f'({input_lang} => {target_lang})')
          target_lang_itr.set_postfix(target_lang=target_lang)

          if input_lang != target_lang:
              if latent_lang in [input_lang, target_lang]:
                selected_df = df[[input_lang, target_lang]]
              else:
                selected_df = df[[input_lang, target_lang, latent_lang]]

              target_lang_itr.set_description('(prepare tokenized data)')
              dataset = execute_sample(selected_df, input_lang, target_lang)

              predicted_df = pd.DataFrame(dataset)
              save_df_as_json(predicted_df, input_lang, target_lang, os.path.join(model_name, "tokenized"))

              target_lang_itr.set_description('(inferencing model)')
              results = get_model_inference(dataset)
              result_file_name = f"{input_lang} to {target_lang}.json"
              result_file_path = os.path.join(model_name, result_file_name)
              with open(result_file_path, 'w') as json_file:
                json.dump(results, json_file, indent=4)

def save_df_as_json(df, input_lang, target_lang, model_name):
    dir_name = model_name
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

    file_name = f"{input_lang}_to_{target_lang}.json"
    file_path = os.path.join(dir_name, file_name)

    json_results = df.to_dict(orient='records')

    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(json_results, f, ensure_ascii=False, indent=4)

In [37]:
if run_test_mode:
    df = df.head(5)

In [46]:
get_results(df, input_languages, target_languages, model_name)

Input Languages:   0%|          | 0/4 [00:00<?, ?it/s, input_lang=English]

Target Languages:   0%|          | 0/3 [00:00<?, ?it/s][A

English => English:   0%|          | 0/4 [00:00<?, ?it/s, input_lang=English]

English => Spanish:   0%|          | 0/4 [00:00<?, ?it/s, input_lang=English]

(prepare tokenized data):   0%|          | 0/3 [00:00<?, ?it/s, target_lang=Spanish][A

(inferencing model):   0%|          | 0/3 [00:01<?, ?it/s, target_lang=Spanish]     [A

(inferencing model):  67%|██████▋   | 2/3 [00:08<00:04,  4.10s/it, target_lang=Spanish][A

English => Chinese:   0%|          | 0/4 [00:08<?, ?it/s, input_lang=English]

(prepare tokenized data):  67%|██████▋   | 2/3 [00:08<00:04,  4.10s/it, target_lang=Chinese][A

(inferencing model):  67%|██████▋   | 2/3 [00:09<00:04,  4.10s/it, target_lang=Chinese]     [A

(inferencing model): 100%|██████████| 3/3 [00:14<00:00,  5.19s/it, target_lang=Chinese][A

English => Chinese:  25%|██▌       | 1/4 [00:14<00:44, 14.95s/it, input