# Jupyter Notebook for Project "Comparison of LLM Prompting Techniques"

In [39]:
import pandas as pd
import mlflow
import mlflow.pyfunc
import sacrebleu
from llama_cpp import Llama
import time


## 1 Data Loading
In the first step we import the given translations as pandas Dataframes and print a quick overview of the dataframe.

In [40]:
data = pd.read_pickle('machine_translation.pkl')
data

Unnamed: 0,complexity,text_german,text_english
0,easy,Felix hat es satt: Ständig ist Mama unterwegs....,Felix is fed up: Mom is always on the go. But ...
1,news_gen,Die rund 1.400 eingesetzten Beamten haben demn...,"The approximately 1,400 deployed officers have..."
2,news_spec,"Der Staatschef hat zugleich aber das Recht, vo...",The head of state also has the right to appoin...
3,pop_science,Dass der Klimawandel die Hitzewellen in Südasi...,There is no question that climate change is in...
4,science,"Der DSA-110, der sich am Owens Valley Radio Ob...","The DSA-110, situated at the Owens Valley Radi..."


In [41]:
data_info = pd.DataFrame()
data_info['complexity'] = data['complexity']
data_info['text_german_length'] = data['text_german'].str.len()
data_info['text_english_length'] = data['text_english'].str.len()
data_info

Unnamed: 0,complexity,text_german_length,text_english_length
0,easy,485,415
1,news_gen,296,280
2,news_spec,518,484
3,pop_science,542,521
4,science,1003,827


In [42]:
from enum import Enum


class Language(Enum):
    ENGLISH = 'English'
    GERMAN = 'German'

class Complexity(Enum):
    EASY = 'easy'
    NEWS_GEN = 'news_gen'
    NEWS_SPEC = 'news_spec'
    POP_SCIENCE = 'pop_science'
    SCIENCE = 'science'

***
## 2 Model Loading
In the second step we import the AI-Models which are given in the specified task. For doing so we use the `llama-cpp-python` library (further documentation can be found [here](https://github.com/abetlen/llama-cpp-python)) and import the models directly from [huggingface](https://huggingface.co/).

Quick overview and installation guide of llama.cpp:
- https://www.datacamp.com/tutorial/llama-cpp-tutorial
- https://christophergs.com/blog/running-open-source-llms-in-python

In [43]:
# Configuration of the models
MODELS = {
    'gemma': {
        'repo_id': 'lmstudio-ai/gemma-2b-it-GGUF',
        'filename': 'gemma-2b-it-q8_0.gguf',
    },
    'llama32': {
        'repo_id': 'hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF',
        'filename': 'llama-3.2-3b-instruct-q8_0.gguf',
    },
    'llama31': {
        'repo_id': 'lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF',
        'filename': 'Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf',
    },
    # 'aya23': {
    #     'repo_id': 'bartowski/aya-23-35B-GGUF',
    #     'filename': 'aya-23-35B-Q5_K_M.gguf',
    # },
}

In [44]:
def create_llama_model(repo_id, filename, n_ctx=None):
    try:
        if n_ctx is None:
            # default of llama_cpp
            n_ctx = 512
        if repo_id is not None and filename is not None:
            model = Llama.from_pretrained(
                repo_id=repo_id,
                filename=filename,
                n_ctx=n_ctx,
                # n_gpu_layers=n_gpu_layers,
                n_threads=5,
                verbose=False,
            )
            print(f"Model {repo_id} erfolgreich geladen mit n_ctx={n_ctx}")
            return model
        else:
            return None
    except Exception as e:
        print(f"Fehler beim Laden von {filename}: {e}")
        return None

***

## 3 Pipeline

### 3.1 Model Interaction

In [45]:
def translate(model, prompt, reference_translation):
    # we estimate the needed max_tokens based on the tokenized prompt and reference_translation
    token_length_ref = len(model.tokenize(reference_translation.encode('utf-8')))
    token_length_prompt = len(model.tokenize(prompt.encode('utf-8')))
    # the model should not need more tokens than this
    estimated_max_tokens = (token_length_prompt + token_length_ref) * 1.5

    response = model(prompt, max_tokens=estimated_max_tokens, echo=False)
    # print(response['choices'][0]['text'])
    return response['choices'][0]['text']

### 3.2 Metrics Calculation
[GitHub Repo to MetricX](https://github.com/google-research/metricx)

In [46]:
import subprocess
import json
import os


def calculate_metricx_score(source, reference, hypothesis):
    '''
    Calculates the MetricX-score based on source, reference, and hypothesis using metricx24.
    We are currently using the metricx-24-hybrid-large-v2p6-bfloat16 model but there are also other options
        as can be seen here: https://github.com/google-research/metricx

    Args:
        source: The source text (String).
        reference: The reference translation (String).
        hypothesis: The hypothesis translation (String).

    Returns:
        The calculated score as a float or None in case of an error.
    '''

    data = [{'id': '1', 'source': source, 'reference': reference, 'hypothesis': hypothesis}]

    # Create temporary JSONL files
    input_file = './temp_input.jsonl'
    output_file = './temp_output.jsonl'
    model = 'google/metricx-24-hybrid-large-v2p6-bfloat16'

    try:
        with open(input_file, 'w', encoding='utf-8') as f:
            for entry in data:
                json.dump(entry, f)
                f.write('\n')

        command = [
            'python', '-m', 'metricx24.predict',
            '--tokenizer', 'google/mt5-xl',
            '--model_name_or_path', model,
            '--max_input_length', '1536',
            '--batch_size', '1',
            '--input_file', input_file,
            '--output_file', output_file
        ]

        process = subprocess.Popen(
            command,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            bufsize=1,
            universal_newlines=True
        )

        # Capture output and errors (optional, can be useful for debugging)
        #for line in process.stdout:
        #    print(line, end='')
        #for line in process.stderr:
        #    print(f'ERROR: {line}', end='')

        process.wait()

        if process.returncode != 0:
            print(f'Error executing metricx24. Return code: {process.returncode}')
            return None

        # Read score from the output file
        with open(output_file, 'r', encoding='utf-8') as f:
            for line in f:
                try:
                    output_data = json.loads(line)
                    score = float(output_data.get('prediction'))  # Ensure that 'score' exists
                    return score
                except (json.JSONDecodeError, ValueError, AttributeError):
                    print('Error parsing the output file.')
                    return None

        return None  # If no valid line was found in the output file

    finally:
        # Remove temporary files
        try:
            os.remove(input_file)
            os.remove(output_file)
        except FileNotFoundError:
            pass  #If the files don't exist for some reason, the error is caught




In [47]:
from rouge_score import rouge_scorer

def evaluate_translation(source, reference, hypothesis):
    # Note that BLEU and chrF Scores can only be between 0 and 100
    #    but sacreblue returns floats as percentage values
    # --> so the scores are between 0 and 100)
    bleu_score = sacrebleu.corpus_bleu([hypothesis], [[reference]]).score
    chrf_score = sacrebleu.corpus_chrf([hypothesis], [[reference]]).score

    metricx_score = calculate_metricx_score(source, reference, hypothesis)
    if metricx_score is None:
        metricx_score = -1

    rougel_scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
    rougel_score = rougel_scorer.score(reference, hypothesis)

    return {'BLEU': bleu_score,
            'chrF': chrf_score,
            'rougeL': (rougel_score['rougeL'].fmeasure * 100),
            'MetricX': metricx_score}


In [48]:
#evaluate_translation("Felix hat es satt: Ständig ist Mama unterwegs. Doch warum das so ist, will ihm niemand verraten. Für Felix ist daher klar: Seine Mutter ist eine Geheimagentin. Als er an seinem zehnten Geburtstag einen rätselhaften Brief erhält, scheint sich seine Vermutung zu bestätigen. Zusammen mit seiner besten Freundin Lina macht er sich daran, das Geheimnis um Mamas Arbeit zu lüften. Ehe sie sich versehen, stecken die beiden mitten in ihrem ersten spannenden Fall als angehende Geheimagenten.", "Felix is fed up: Mom is always on the go. But nobody will tell him why that is. For Felix, it's clear: his mother is a secret agent. When he receives a mysterious letter on his tenth birthday, his suspicion seems to be confirmed. Together with his best friend Lina, he sets out to uncover the secret of mom's job. Before they know it, the two are in the middle of their first exciting case as budding secret agents.", "\n\n**English Translation:**\n\nFelix had sat: Mama was constantly on the go. But why this is the case, no one will tell him. Therefore, clear to Felix: his mother is a covert agent. When he receives a cryptic letter on his eleventh birthday, it seems his suspicion is confirmed. Together with his best friend Lina, he starts unraveling the mystery of his mother's job. When they finally manage to solve the case, they stick to their first exciting clue like detectives.")

### 3.3 Logging to MLFLow

In [49]:
def log_to_mlflow(experiment_name, metrics, prompt_type, model_name, complexity, target_language, tmp_result,
                  prompt_language):
    experiment = mlflow.get_experiment_by_name(experiment_name)

    if experiment:
        if experiment.lifecycle_stage == 'deleted':
            mlflow.tracking.MlflowClient().restore_experiment(experiment.experiment_id)
            #mlflow.delete_experiment(experiment.experiment_id)
    else:
        mlflow.create_experiment(experiment_name)

    mlflow.set_experiment(experiment_name)
    with mlflow.start_run(run_name=f'{model_name}/{complexity}/{prompt_type}'):
        mlflow.log_param('model', model_name)
        mlflow.log_param('complexity', complexity)
        mlflow.log_param('prompt_type', prompt_type)
        mlflow.log_param('target_language', target_language)
        mlflow.log_param('prompt_language', prompt_language)
        for key, value in metrics.items():
            mlflow.log_metric(key, value)

        tmp_result.to_json('tmp_results.json', index=False)
        mlflow.log_artifact('tmp_results.json')
        mlflow.end_run()


### 3.4 Pipeline Composition

In [50]:
import gc

def run_pipeline(texts):
    results = pd.DataFrame(
        columns=['model', 'complexity', 'prompt_type', 'prompt', 'source_text', 'hypothesis', 'reference', 'metrics',
                 'prompt_language'])
    mlflow.set_tracking_uri(uri='http://127.0.0.1:5000')

    for model_name, model_config in MODELS.items():
        for _, row in texts.iterrows():
            model = createModel(model_config, row)

            # Übersetzung Deutsch -> Englisch
            for prompt_type, template_data in PROMPT_TEMPLATES_GERMAN_ENGLISH.items():
                complexity = row['complexity']
                if pd.notna(row['text_german']):
                    results = execute_mlflow_run(complexity, model, model_name, prompt_type, Language.ENGLISH, results,
                                                 row['text_german'], row['text_english'], template_data)

            # Übersetzung Englisch -> Deutsch
            for prompt_type, template_data in PROMPT_TEMPLATES_ENGLISH_GERMAN.items():
                complexity = row['complexity']
                if pd.notna(row['text_english']):
                    results = execute_mlflow_run(complexity, model, model_name, prompt_type, Language.GERMAN, results,
                                                 row['text_english'], row['text_german'], template_data)

            # we dont need the model anymore so we delete it
            del model
            gc.collect()
    return results


def createModel(model_config, row):
    # at first we just use the dummyModel for tokenization
    dummyModel = create_llama_model(model_config['repo_id'], model_config['filename'])
    combined_text = f"{row['text_german']} {row['text_english']}"
    text_tokens = len(dummyModel.tokenize(combined_text.encode('utf-8')))
    # we want to tokenize the longest template
    max_promp_template = max(
        (t['template'] for d in (PROMPT_TEMPLATES_GERMAN_ENGLISH, PROMPT_TEMPLATES_ENGLISH_GERMAN) for t in d.values()),
        key=len)
    prompt_tokens = len(dummyModel.tokenize(max_promp_template.encode('utf-8')))

    # now we delete the dummyModel and then create the final model based on the estimated_max_tokens
    del dummyModel
    gc.collect()
    estimated_max_tokens = (text_tokens + prompt_tokens) * 2
    n_ctx = int(estimated_max_tokens * 1.2)
    print(f"estimated_max_tokens: {estimated_max_tokens}; n_ctx: {n_ctx}")
    model = create_llama_model(model_config['repo_id'], model_config['filename'], n_ctx=n_ctx)
    return model


def execute_mlflow_run(complexity, model, model_name, prompt_type, target_language: Language, results, source_text,
                       reference_text, template_data):
    prompt = template_data['template'].format(text=source_text)
    prompt_language = template_data['prompt_language']

    start_time_translation = time.time()
    hypothesis = translate(model, prompt, reference_text)
    end_time_translation = time.time()
    print('Prompt finished in (seconds): ', round(end_time_translation - start_time_translation, 2))
    metrics = evaluate_translation(source=source_text, reference=reference_text, hypothesis=hypothesis)
    print('Metric Calculation in (seconds): ', round(time.time() - end_time_translation, 2))

    tmp_result = pd.DataFrame([{
        'model': model_name,
        'complexity': complexity,
        'prompt_type': prompt_type,
        'prompt': prompt,
        'source_text': source_text,
        'hypothesis': hypothesis,
        'reference_text': reference_text,
        'metrics': metrics,
        'prompt_language': prompt_language.value  # Hier .value für den Stringwert
    }])

    # MLflow-Logging
    experiment_name = f'{model_name}_{complexity}'

    log_to_mlflow(experiment_name, metrics, prompt_type, model_name, complexity, target_language.value, tmp_result,
                  prompt_language.value)

    # Ergebnis speichern
    results = pd.concat([
        results,
        tmp_result
    ], ignore_index=True)
    return results

### 3.5 Prompt Composition


In [51]:
# TODO: Verschiedene Prompt Arten: zero-shot, few-shot und verschiedene Variationen reinbringen


PROMPT_TEMPLATES_ENGLISH_GERMAN = {
    #'zero_shot_to-german_english_1': {
    #    'template': 'Please translate the following text from English to German: \"{text}\"',
    #    'prompt_language': Language.ENGLISH
    #},
    #'zero_shot_to-german_german_1': {
    #    'template': 'Bitte übersetze diesen Text von Englisch nach Deutsch: \"{text}\"',
    #    'prompt_language': Language.GERMAN
    #},
    #'zero_shot_to-german_english_2': {
    #    'template': 'Please translate the following text from English to German. Do not ask questions, just return the translation. This is the text: \"{text}\"',
    #    'prompt_language': Language.ENGLISH
    #},
    #'zero_shot_to-german_german_2': {
    #    'template': 'Bitte übersetze diesen Text von Englisch nach Deutsch. Stelle keine Gegenfragen, sondern gib einfach die Übersetzung aus. Das ist der Text: \"{text}\"',
    #    'prompt_language': Language.GERMAN
    #},
    'zero_shot_style_to-german_english_1': {
        'template': 'Please translate the following text from English to German. Do not ask questions and do not give further explanations, just return the translation. Keep the original tone of the text. This is the text to translate: \"{text}\"',
        'prompt_language': Language.ENGLISH,
        'prompt_type': 'zero_shot_style',
        'complexeties':
    },
    'zero_shot_style_to-german_german_1': {
        'template': 'Bitte übersetze diesen Text von Englisch nach Deutsch. Stelle keine Gegenfragen und nenne keine Begründungen, sondern gib einfach die Übersetzung aus. Behalte den ursprünglichen Ton des Textes bei. Das ist der zu übersetzende Text: \"{text}\"',
        'prompt_language': Language.GERMAN
    },
    # ... weitere Einträge
}

PROMPT_TEMPLATES_GERMAN_ENGLISH = {
    #'zero_shot_to-english_english_1': {
    #    'template': 'Please translate the following text from German to English: \"{text}\"',
    #    'prompt_language': Language.GERMAN
    #},
    #'zero_shot_to-english_german_1': {
    #    'template': 'Bitte übersetze diesen Text von Deutsch nach Englisch: \"{text}\"',
    #    'prompt_language': Language.GERMAN
    #},
    #'zero_shot_to-english_english_2': {
    #    'template': 'Please translate the following text from German to English. Do not ask questions, just return the translation. This is the text: \"{text}\"',
    #   'prompt_language': Language.ENGLISH
    #},
    #'zero_shot_to-english_german_2': {
    #    'template': 'Bitte übersetze diesen Text von Deutsch nach Englisch. Stelle keine Gegenfragen, sondern gib einfach die Übersetzung aus. Das ist der Text: \"{text}\"',
    #    'prompt_language': Language.GERMAN
    #},
    'zero_shot_style_to-english_english_1': {
        'template': 'Please translate the following text from German to English. Do not ask questions and do not give further explanations, just return the translation. Keep the original tone of the text. This is the text to translate: \"{text}\"',
        'prompt_language': Language.ENGLISH
    },
    'zero_shot_style_to-english_german_1': {
        'template': 'Bitte übersetze diesen Text von Deutsch nach Englisch. Stelle keine Gegenfragen und nenne keine Begründungen, sondern gib einfach die Übersetzung aus. Behalte den ursprünglichen Ton des Textes bei. Das ist der zu übersetzende Text: \"{text}\"',
        'prompt_language': Language.GERMAN
    },
    # ...
}

***
## 4 Execute Pipeline

In [52]:
translation_results = run_pipeline(data)
print('Fertig mit der Pipeline. Speichere Ergebnisse...')
translation_results.to_csv('translation_results.csv', sep=';')
print('Pipeline abgeschlossen. Ergebnisse gespeichert.')

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


Model lmstudio-ai/gemma-2b-it-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 516; n_ctx: 619


llama_init_from_model: n_ctx_per_seq (640) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


Model lmstudio-ai/gemma-2b-it-GGUF erfolgreich geladen mit n_ctx=619
Prompt finished in (seconds):  2.32
Metric Calculation in (seconds):  30.37
🏃 View run gemma/easy/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/429765178055128713/runs/0fa3f8df0bdd455bb2bd7b3e56bfa7b6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/429765178055128713
Prompt finished in (seconds):  2.08
Metric Calculation in (seconds):  24.54
🏃 View run gemma/easy/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/429765178055128713/runs/faab2b453ac6433aa85a4321bca465e5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/429765178055128713
Prompt finished in (seconds):  1.66
Metric Calculation in (seconds):  24.32
🏃 View run gemma/easy/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/429765178055128713/runs/b2511817a19145848edc27315cc0cfd2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/429765178055128713
Prompt

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


Model lmstudio-ai/gemma-2b-it-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 346; n_ctx: 415


llama_init_from_model: n_ctx_per_seq (416) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


Model lmstudio-ai/gemma-2b-it-GGUF erfolgreich geladen mit n_ctx=415
Prompt finished in (seconds):  6.01
Metric Calculation in (seconds):  20.38
🏃 View run gemma/news_gen/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/391944840061747289/runs/42f698cb4d43403284ed2c935288a2bd
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/391944840061747289
Prompt finished in (seconds):  7.69
Metric Calculation in (seconds):  21.34
🏃 View run gemma/news_gen/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/391944840061747289/runs/869f05456b814d0d9dca538fc4ac8b93
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/391944840061747289
Prompt finished in (seconds):  1.11
Metric Calculation in (seconds):  18.81
🏃 View run gemma/news_gen/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/391944840061747289/runs/577d80da285940eeaaf01a904d982f1c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/3919448400617

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


Model lmstudio-ai/gemma-2b-it-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 526; n_ctx: 631


llama_init_from_model: n_ctx_per_seq (640) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


Model lmstudio-ai/gemma-2b-it-GGUF erfolgreich geladen mit n_ctx=631
Prompt finished in (seconds):  2.0
Metric Calculation in (seconds):  22.9
🏃 View run gemma/news_spec/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/713590614913207437/runs/91a7fa5d9e4d46389ddca8ded5e32d41
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/713590614913207437
Prompt finished in (seconds):  11.46
Metric Calculation in (seconds):  29.78
🏃 View run gemma/news_spec/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/713590614913207437/runs/d59296da393b40a3b2c8e0876ef5fc98
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/713590614913207437
Prompt finished in (seconds):  1.61
Metric Calculation in (seconds):  22.88
🏃 View run gemma/news_spec/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/713590614913207437/runs/35ff35647945406a9513feed2eac9a47
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/71359061491

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


Model lmstudio-ai/gemma-2b-it-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 540; n_ctx: 648


llama_init_from_model: n_ctx_per_seq (672) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


Model lmstudio-ai/gemma-2b-it-GGUF erfolgreich geladen mit n_ctx=648
Prompt finished in (seconds):  2.27
Metric Calculation in (seconds):  24.63
🏃 View run gemma/pop_science/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/819534162661537410/runs/225e7ad29aff40a0a81aedeeaac42011
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/819534162661537410
Prompt finished in (seconds):  10.64
Metric Calculation in (seconds):  30.27
🏃 View run gemma/pop_science/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/819534162661537410/runs/dc75fa7532af45888939b3f7fe190900
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/819534162661537410
Prompt finished in (seconds):  1.61
Metric Calculation in (seconds):  24.21
🏃 View run gemma/pop_science/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/819534162661537410/runs/14edbe69f4b84df78383021ce6640142
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/819

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


Model lmstudio-ai/gemma-2b-it-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 1096; n_ctx: 1315


llama_init_from_model: n_ctx_per_seq (1344) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


Model lmstudio-ai/gemma-2b-it-GGUF erfolgreich geladen mit n_ctx=1315
Prompt finished in (seconds):  24.56
Metric Calculation in (seconds):  54.27
🏃 View run gemma/science/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/895795565739563094/runs/e507835277204861a33a6cbafaf931a0
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/895795565739563094
Prompt finished in (seconds):  25.66
Metric Calculation in (seconds):  58.45
🏃 View run gemma/science/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/895795565739563094/runs/539a58bc48e7416481c4881524cd7b42
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/895795565739563094
Prompt finished in (seconds):  2.91
Metric Calculation in (seconds):  36.72
🏃 View run gemma/science/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/895795565739563094/runs/42968316d38642d0880d0eddf9306c63
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/8957955657395

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 596; n_ctx: 715


llama_init_from_model: n_ctx_per_seq (736) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF erfolgreich geladen mit n_ctx=715
Prompt finished in (seconds):  56.12
Metric Calculation in (seconds):  56.81
🏃 View run llama32/easy/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/170611153105097398/runs/e92b5083e21e4a8881bc8a3ada415151
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/170611153105097398
Prompt finished in (seconds):  18.64
Metric Calculation in (seconds):  31.14
🏃 View run llama32/easy/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/170611153105097398/runs/9bfde635327e495cbcad4f0e93c8646a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/170611153105097398
Prompt finished in (seconds):  24.44
Metric Calculation in (seconds):  33.03
🏃 View run llama32/easy/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/170611153105097398/runs/a342602591b54de58e8618d3102e8b6e
🧪 View experiment at: http://127.0.0.1:5000/#/experiment

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 408; n_ctx: 489


llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF erfolgreich geladen mit n_ctx=489
Prompt finished in (seconds):  32.75
Metric Calculation in (seconds):  33.79
🏃 View run llama32/news_gen/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/379656095678811037/runs/e8226f1799664defac50f97b4c5abaf7
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/379656095678811037
Prompt finished in (seconds):  2.94
Metric Calculation in (seconds):  19.13
🏃 View run llama32/news_gen/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/379656095678811037/runs/bf8097d682be4c38a0e9e6c22ae769b6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/379656095678811037
Prompt finished in (seconds):  12.48
Metric Calculation in (seconds):  22.2
🏃 View run llama32/news_gen/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/379656095678811037/runs/5ea2553991ca40e1b28d36d354a9ec93
🧪 View experiment at: http://127.0.0.1:5000/#/

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 610; n_ctx: 732


llama_init_from_model: n_ctx_per_seq (736) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF erfolgreich geladen mit n_ctx=732
Prompt finished in (seconds):  13.9
Metric Calculation in (seconds):  29.29
🏃 View run llama32/news_spec/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/194158564450517751/runs/01dc60a9d85e4d9993efa5575323caf6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/194158564450517751
Prompt finished in (seconds):  3.38
Metric Calculation in (seconds):  23.24
🏃 View run llama32/news_spec/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/194158564450517751/runs/33c30cfde9f945df8d33db560c60193a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/194158564450517751
Prompt finished in (seconds):  21.21
Metric Calculation in (seconds):  30.61
🏃 View run llama32/news_spec/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/194158564450517751/runs/fcda53b291aa4e51820aee85c430cdcd
🧪 View experiment at: http://127.0.0.1:5000

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 626; n_ctx: 751


llama_init_from_model: n_ctx_per_seq (768) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF erfolgreich geladen mit n_ctx=751
Prompt finished in (seconds):  17.74
Metric Calculation in (seconds):  32.45
🏃 View run llama32/pop_science/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/553009959054917340/runs/4494700ee99441ae8e1ae84d5c74e8a2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/553009959054917340
Prompt finished in (seconds):  3.74
Metric Calculation in (seconds):  23.65
🏃 View run llama32/pop_science/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/553009959054917340/runs/e8ff4237a62a457fb37e28f3ca7cb74f
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/553009959054917340
Prompt finished in (seconds):  23.43
Metric Calculation in (seconds):  31.03
🏃 View run llama32/pop_science/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/553009959054917340/runs/f1218c6a8ffa45ddb9aa354e18bcc60c
🧪 View experiment at: http://127.0.0

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 1164; n_ctx: 1396


llama_init_from_model: n_ctx_per_seq (1408) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model hugging-quants/Llama-3.2-3B-Instruct-Q8_0-GGUF erfolgreich geladen mit n_ctx=1396
Prompt finished in (seconds):  114.25
Metric Calculation in (seconds):  150.0
🏃 View run llama32/science/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/248431796146488211/runs/140a2e48eb1143b49114c7bb70056cb7
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/248431796146488211
Prompt finished in (seconds):  109.12
Metric Calculation in (seconds):  122.62
🏃 View run llama32/science/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/248431796146488211/runs/76644faa6aaf4cfe950a64e807832e2c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/248431796146488211
Prompt finished in (seconds):  41.68
Metric Calculation in (seconds):  56.97
🏃 View run llama32/science/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/248431796146488211/runs/45ba785f49f74748ab60e194f16ff747
🧪 View experiment at: http://127.0.0.1:5000

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 596; n_ctx: 715


llama_init_from_model: n_ctx_per_seq (736) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF erfolgreich geladen mit n_ctx=715
Prompt finished in (seconds):  84.41
Metric Calculation in (seconds):  59.38
🏃 View run llama31/easy/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/496790586212187793/runs/2edf7db1d86e44fb8fc44f950964097e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/496790586212187793
Prompt finished in (seconds):  91.7
Metric Calculation in (seconds):  51.36
🏃 View run llama31/easy/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/496790586212187793/runs/4280014ec7b14281970ff8c5a559e4c0
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/496790586212187793
Prompt finished in (seconds):  81.5
Metric Calculation in (seconds):  46.45
🏃 View run llama31/easy/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/496790586212187793/runs/05dabcb90e9c430cbf29a43c4eb10411
🧪 View experiment at: http://127.0.0.1:5000/#/experime

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 408; n_ctx: 489


llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF erfolgreich geladen mit n_ctx=489
Prompt finished in (seconds):  55.87
Metric Calculation in (seconds):  33.39
🏃 View run llama31/news_gen/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/378317835055917962/runs/ccccd2d9a57f4574ba47a07eb4cfac6e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/378317835055917962
Prompt finished in (seconds):  62.37
Metric Calculation in (seconds):  32.58
🏃 View run llama31/news_gen/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/378317835055917962/runs/f0390d6185254ca388bdcd84cfa26cb7
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/378317835055917962
Prompt finished in (seconds):  53.42
Metric Calculation in (seconds):  35.1
🏃 View run llama31/news_gen/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/378317835055917962/runs/66c96a59bba64779b2154257329820e4
🧪 View experiment at: http://127.0.0.1:50

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 610; n_ctx: 732


llama_init_from_model: n_ctx_per_seq (736) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF erfolgreich geladen mit n_ctx=732
Prompt finished in (seconds):  86.81
Metric Calculation in (seconds):  50.86
🏃 View run llama31/news_spec/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/278262578605400461/runs/b173184e911d4f078dfa2e5299dad028
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/278262578605400461
Prompt finished in (seconds):  93.76
Metric Calculation in (seconds):  57.5
🏃 View run llama31/news_spec/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/278262578605400461/runs/ada534674e2b42b5a5c20a2feecef0cd
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/278262578605400461
Prompt finished in (seconds):  83.69
Metric Calculation in (seconds):  47.38
🏃 View run llama31/news_spec/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/278262578605400461/runs/67fca6f091a0493e981811f34da63839
🧪 View experiment at: http://127.0.0.1

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 626; n_ctx: 751


llama_init_from_model: n_ctx_per_seq (768) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF erfolgreich geladen mit n_ctx=751
Prompt finished in (seconds):  88.53
Metric Calculation in (seconds):  62.14
🏃 View run llama31/pop_science/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/358136329887025901/runs/6bc0cec13faa41789b9bf49039e7c62a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/358136329887025901
Prompt finished in (seconds):  95.96
Metric Calculation in (seconds):  55.99
🏃 View run llama31/pop_science/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/358136329887025901/runs/317effe294644aeeb400b173e48b30a6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/358136329887025901
Prompt finished in (seconds):  85.49
Metric Calculation in (seconds):  47.3
🏃 View run llama31/pop_science/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/358136329887025901/runs/61cf9fa9f2ef40eb84eeced6809585cd
🧪 View experiment at: http://127

llama_init_from_model: n_ctx_per_seq (512) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF erfolgreich geladen mit n_ctx=512
estimated_max_tokens: 1164; n_ctx: 1396


llama_init_from_model: n_ctx_per_seq (1408) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Model lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF erfolgreich geladen mit n_ctx=1396
Prompt finished in (seconds):  175.31
Metric Calculation in (seconds):  54.11
🏃 View run llama31/science/zero_shot_style_to-english_english_1 at: http://127.0.0.1:5000/#/experiments/958464439758456234/runs/7f1ce09c80d941bc9018e6ee070a4c8a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/958464439758456234
Prompt finished in (seconds):  182.64
Metric Calculation in (seconds):  144.84
🏃 View run llama31/science/zero_shot_style_to-english_german_1 at: http://127.0.0.1:5000/#/experiments/958464439758456234/runs/7b8f356d2d6d48b4ac49e70bffbc5fbf
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/958464439758456234
Prompt finished in (seconds):  166.72
Metric Calculation in (seconds):  126.74
🏃 View run llama31/science/zero_shot_style_to-german_english_1 at: http://127.0.0.1:5000/#/experiments/958464439758456234/runs/1987b9432aec458d8ddeca08695e72cf
🧪 View experiment at: http://127.0.0.