In [59]:
import torch
from langchain import PromptTemplate
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline 
import os 
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from typing import Dict
from class_extender import ClassExtender
import time

os.environ["HF_TOKEN"]='your_huggingface_API_key'

# /home/tpllmws23/Chatbot-LLama-Pruefungsamt/Chatbot-Benni/finetune/convert/model/raftv2

BASE_MODEL_ID =  "/home/tpllmws23/llms/raftv2" # "mistralai/Mistral-7B-v0.3"

#"../../../llms/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, use_fast=True)
#tokenizer.pad_token = tokenizer.eos_token
#tokenizer.pad_token = tokenizer.unk_token

# Initialize language model
start_time = time.time()
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_ID, torch_dtype=torch.bfloat16,
trust_remote_code=True, device_map="auto",
quantization_config=bnb_config)
load_time = time.time() - start_time

generation_config = GenerationConfig.from_pretrained(BASE_MODEL_ID)
generation_config.max_new_tokens = 1024 # maximum number of new tokens that can be generated by the model
generation_config.temperature = 0.0001 # randomness of the generated tex
generation_config.top_p = 0 # diversity of the generated text
generation_config.do_sample = True 
generation_config.repetition_penalty = 1.2

#generation_config.use_cache=True,
#generation_config.num_return_sequences=1,

generation_config.output_logits=True,
generation_config.output_scores=True,
generation_config.output_hidden_states=True,
generation_config.return_dict_in_generate=True,
print(generation_config)

'''
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
)
'''


from transformers import TextClassificationPipeline
from transformers import Pipeline
'''
class TextClassificationFullOutputsPipeline(TextClassificationPipeline):
    def postprocess(self, model_outputs):
        return model_outputs
'''



def postprocess(self, model_outputs, **postprocess_parameters: Dict):
    clean_up_tokenization_spaces=True
    input_ids = model_outputs["input_ids"]
    prompt_text = model_outputs["prompt_text"]
    generated_sequence = model_outputs["generated_sequence"][0]
    records = []
    for sequence in generated_sequence:
        # Decode text
        text = self.tokenizer.decode(
            sequence,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
        )

        # Remove PADDING prompt of the sequence if XLNet or Transfo-XL model is used
        if input_ids is None:
            prompt_length = 0
        else:
            prompt_length = len(
                self.tokenizer.decode(
                    input_ids[0],
                    skip_special_tokens=True,
                    clean_up_tokenization_spaces=clean_up_tokenization_spaces,
                )
            )

        all_text = text[prompt_length:]
        if isinstance(prompt_text, str):
            all_text = prompt_text + all_text
            

        record = {"generated_text": all_text, "model_outputs": model_outputs["output"], "input_ids": input_ids}
    records.append(record)
    return records

pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        return_full_text=True,
        generation_config=generation_config,
    )

with ClassExtender(type(pipe), postprocess), ClassExtender(TextClassificationPipeline, postprocess):

    

    #pipe.postprocess = postprocess_new


    llm = HuggingFacePipeline(pipeline=pipe)

    def create_eval_prompt(query: str, context: str):
        system_prompt = "You are a smart helpful assistant for the HTWG Konstanz. Answer the following question based only on the provided context. It is mandatory to answer in GERMAN:\n\n"
        return f"[INST]{system_prompt}Context: {context}\n\nQuestion: {query}[/INST]"
    
    data = [{"page_content": "\n\u00a7 4 Sprachkenntnisse\n(1) 1Neben den allgemeinen Zugangsvoraussetzungen (\u00a7 59 LHG) sind f\u00fcr die in \u00a7 1 Abs. 1\nS. 1 genannten Studieng\u00e4nge deutsche Sprach kenntnisse nachzuweisen. 2Diese k\u00f6nnen\ndurch eine deutsche Hochschulzugangsberechtigung (u. a. erfolgreich abgeschlossenes\ngrundst\u00e4ndiges Hochschulstudium) nachgewiesen werden. 3Ferner kann der\nSprachnachweis durch die Vorlage eines der folgenden Dokumente erbracht werden:\n1. Feststellungspr\u00fcfung f\u00fcr ein Bachelorstudium durch Vorlage der Zugangsberechtigung\ndes Studienkollegs an der Hochschule Konstanz,\n2. Test Deutsch als Fremdsprache (TestDaF), sofern im Durchschnitt mindestens die\nStufe TDN 4 erreicht wurde,   Seite 5 von 43 3. Deutsche Sprachpr\u00fcfung f\u00fcr den Hochschulzugang (DSH), sofern die DSH mit\nmindestens der Stufe DSH -2 abgeschlossen wurde,\n4. \u201eTelc Deutsch C1 Hochschule\u201c\noder eine \u00e4quivalente Sprachpr\u00fcfung gem\u00e4\u00df der Rahmenordnung \u00fcber Deutsche\nSprachpr\u00fcfungen f\u00fcr das Studium an deutschen Hochschulen (RO -DT). 4Auf den Nachweis\neiner deutschen Sprachpr\u00fcfung kann bei Bewerber innen und Bewerbern im besonders\nbegr\u00fcndeten Einzelfall verzichtet werden, insbesondere wenn sie die deutsche\nStaatsangeh\u00f6rigkeit besitzen.\n(2) 1Sprachnachweise f\u00fcr den gew\u00e4hl ten Studiengang, die durch die Bewerberin oder den\nBewerber bis zum Bewerbungsschluss nicht vorgelegt werden k\u00f6nnen, k\u00f6nnen bis zum\nVorlesungsbeginn des Semesters gem\u00e4\u00df Terminplan der Hochschule Konstanz, f\u00fcr das  der\nAntrag auf Zulassung gestellt wurde, nachgereicht werden. 2Die Zulassung erfolgt in diesem\nFall gem \u00e4\u00df \u00a7 6  Abs. 5  unter Vorbehalt .\n(3) 1F\u00fcr Zeitstudierende gelten die Regelungen in \u00a7 10 Zulassungs - und\nImmatrikulationsordnung (ZIO) der Hochschule Konstanz.", "metadata": {"file_path": "/home/tpllmws23/Chatbot-LLama-Pruefungsamt/main_data_filtered/119_ZuSMa_Senat_18012022.pdf"}, "type": "Document"}, {"page_content": "\n\u00a7 21b Mechatronik (MME) Berufsbegleitendes Studium\n(1) Studiengangspezifische Zugangsvoraussetzungen gem\u00e4\u00df \u00a7 5  Abs. 1\nZugangsvoraussetzungen f\u00fcr den Masterstudiengang Mechatronik sind:\n1. Ein mit der Note 2,9 oder besser abgeschlossenes grundst\u00e4ndiges Hochschulstudium\ngem\u00e4\u00df \u00a7 5 Abs. 1 Nr. 1 in einem Studiengang der Fachrichtungen Systemtechnik,\nMaschinenbau, Elektrotechnik, Fahrzeugtechnik, Mechatronik, Feinwerktechnik oder einer\nverwandten Fachrichtung.\n2. Englischkenntnisse, \u00e4quivalent z u Niveau- Stufe B1 des Europ\u00e4ischen Referenzrahmens\nf\u00fcr das Lernen und Lehren von Fremdsprachen. Als \u00e4quivalent zu einem Zertifikat \u00fcber die\nNiveau -Stufe B1 gelten insbesondere folgende Nachweise:\nI. das Schulabschlusszeugnis, aus dem der Besuch des Englischunterrichts bis zum\nErreichen des mittleren Bildungsabschlusses (10. Klasse) bzw. bis zum Erreichen\nder Fachhochschulreife hervorgeht oder\nII. ein Notenspiegel, aus dem die bestandene Pr\u00fcfungsleistung \u00fcber eine\nLehrveranstaltung im Rahmen des grundst\u00e4ndigen Studiums hervorgeht, die die\nenglische Sprache zum Inhalt hatte oder\nIII. eine Bescheinigung \u00fcber den mindestens sechsmonatigen Aufenthalt an einer Schule, Hochschule oder anderen Bildungsinstitution mit Englisch als\nUnterrichtssprache oder\nIV. eine Bescheinigung \u00fcber den Aufenthalt im englischsprachigen Ausland, der einen Zeitraum von mindestens sechs Monaten bzw. einem Studiensemester umfasst.\nDie Vorlage anderer geeigneter Nachweise ist m\u00f6glich.\n(2) Auswahlkriterien nach \u00a7 9 Abs. 2\n1. Ergebnis eines Auswahlgespr\u00e4chs\nNicht zutreffend.\n2. Leistungen, die mit der Abschlusspr\u00fcfung des grundst\u00e4ndigen Studiums nach Abs. 1\ni. V. m. \u00a7 5 Abs. 1 Nr. 1 nachgewiesen sind\nDie Durchschnittsnote der Abschlusspr\u00fcfung des grundst\u00e4ndigen Hochschulstudiums nach\nAbs. 1 bildet die Teilnote 1 als Basis zur Bestimmung der Auswahlnote.  Abweichend von Satz\n1 bildet in den F\u00e4llen des \u00a7 3 Abs. 2 Nr. 1 Satz 2 die Durchschnittsnote nach \u00a7 3 Abs. 2 Nr. 1 Satz 3 die Teilnote 1. Bei ausl\u00e4ndischen Bildungsnachweisen ist die Durchschnittsnote nach\ndeutsc her Deutung als Teilnote 1 zu ber\u00fccksichtigen.\nZus\u00e4tzlich werden die Einzelnoten folgender F\u00e4cher der Abschlusspr\u00fcfung des grundst\u00e4ndigen Hochschulstudiums, die \u00fcber die Eignung f\u00fcr den gew\u00e4hlten Studiengang\nbesonderen Aufschluss geben, f\u00fcr die Auswahl herangezogen:\n- Technische Mechanik (Dynamik),\n- Elektrotechnik,\n- Messtechnik,\n- Regelungstechnik,\n- Elektrische Antriebe.\nDabei wird eine Note zwischen 1,0 und 1,7 in einem der o. g. F\u00e4cher jeweils mit dem Wert 0,1\nbewertet. Die kumulierte Gesamtzahl bildet die Teil note 2.", "metadata": {"file_path": "/home/tpllmws23/Chatbot-LLama-Pruefungsamt/main_data_filtered/119_ZuSMa_Senat_18012022.pdf"}, "type": "Document"}, {"page_content": "\n\u00a7 21a Mechatronik (MME) Vollzeitstudium\n(1) Studiengangspezifische Zugangsvoraussetzungen gem\u00e4\u00df \u00a7 5  Abs. 1\nZugangsvoraussetzungen f\u00fcr den Masterstudiengang Mechatronik sind:\n1. Ein mit der Note 2,9 oder besser abgeschlossenes grundst\u00e4ndiges Hochschulstudium\ngem\u00e4\u00df \u00a7 5 Abs. 1 Nr. 1 in einem Studiengang der Fachrichtungen Maschinenbau,\nElektrotechnik, Fahrzeugtechnik, Mechatronik, Feinwerktechnik oder einer verwandten\nFachrichtung.\n2. Englischkenntnisse, \u00e4quivalent zu Niveau- Stufe B1 des Europ\u00e4ischen Referenzrahmens\nf\u00fcr das Lernen und Lehren von Fremdsprachen. Als \u00e4quivalent zu einem Zertifikat \u00fcber die\nNiveau -Stufe B1 gelten insbesondere folgende Nachweise:\nI. das Schulabschlusszeugnis, aus dem der Besuch des Englischunterrichts bis zum\nErreichen des mittleren Bildungsabschlusses (10. Klass e) bzw. bis zum Erreichen\nder Fachhochschulreife hervorgeht oder\nII. ein Notenspiegel, aus dem die bestandene Pr\u00fcfungsleistung \u00fcber eine\nLehrveranstaltung im Rahmen des grundst\u00e4ndigen Studiums hervorgeht, die die\nenglische Sprache zum Inhalt hatte oder\nIII. eine Bescheinigung \u00fcber den mindestens sechsmonatigen Aufenthalt an einer Schule, Hochschule oder anderen Bildungsinstitution mit Englisch als\nUnterrichtssprache oder\nIV. eine Bescheinigung \u00fcber den Aufenthalt im englischsprachigen Ausland, der einen Zeitraum von mindestens sechs Monaten bzw. einem Studiensemester umfasst.\nDie Vorlage anderer geeigneter Nachweise ist m\u00f6glich.\n(2) Auswahlkriterien nach \u00a7 9 Abs. 2\n1. Ergebnis eines Auswahlgespr\u00e4chs\nNicht zutreffend.\n2. Leistungen, die mit der Abschlusspr\u00fcfung des grundst\u00e4ndigen Studiums nach Abs. 1\ni. V. m. \u00a7 5 Abs. 1 Nr. 1 nachgewiesen sind\nDie Durchschnittsnote der Abschlusspr\u00fcfung des grundst\u00e4ndigen Hochschulstudiums nach\nAbs. 1 bildet die Teilnote 1 als Basis zur Bestimmung der Auswahlnote. Abweichend von Satz\n1 bildet in den F\u00e4llen des \u00a7 3 Abs. 2 Nr. 1 Satz 2 die Durchschnittsnote nach \u00a7 3 Abs. 2 Nr. 1\nSatz 3 die Teilnote 1. Bei ausl\u00e4ndischen Bildungsnachweisen ist die Durchschnittsnote nach\ndeutscher Deutung als Teilnote 1 zu ber\u00fccksichtigen.\nZus\u00e4tzlich werden die Einzelnoten folgender F\u00e4cher der Abschlusspr\u00fcfung des grundst\u00e4ndigen Hochschulstudiums, die \u00fcber die Eignung f\u00fcr den gew\u00e4hlten Studiengang\nbesonderen Aufschluss geben, f\u00fcr die Auswahl herangezogen:\n- Technische Mechanik (Dynamik),\n- Elektrotechnik,\n- Messt echnik,\n- Regelungstechnik,\n- Elektrische Antriebe.\nDabei wird eine Note zwischen 1,0 und 1,7 in einem der o. g. F\u00e4cher jeweils mit dem Wert 0,1\nbewertet. Die kumulierte Gesamtzahl bildet die Teilnote 2.", "metadata": {"file_path": "/home/tpllmws23/Chatbot-LLama-Pruefungsamt/main_data_filtered/119_ZuSMa_Senat_18012022.pdf"}, "type": "Document"}, {"page_content": "\n\u00a7 9 Zugangs - und Auswahlkriterien in den Masterstudieng\u00e4ngen\n(1)  1Im Besonderen Teil (\u00a7\u00a7 12 -26) dieser Satzung k\u00f6nnen ein oder mehrere der in Absatz 2\ngenannten Auswahlkriterien als weitere Zugangskriterien festgelegt werden. 2N\u00e4heres\nregelt der B esondere Teil f\u00fcr den jeweiligen Studiengang (\u00a7\u00a7 12 -26).\n(2)  1F\u00fcr die Bildung der Ranglisten f\u00fcr das erste Fachsemester in den Masterstudieng\u00e4ngen\nwird, neben dem Ergebnis des fachlich einschl\u00e4gigen Hochschulabschlusses oder des\ngleichwertigen Abschlusses,  mindestens eines der folgenden  Auswahlkriterien\nber\u00fccksichtigt:\n1. Leistungen, die in dem Studium erbracht wurden, das Voraussetzung f\u00fcr den Zugang\nzu dem Masterstudiengang ist ,   Seite 8 von 43 2. Englischkenntnisse , n\u00e4heres regelt der Besondere Teil f\u00fcr den jeweiligen Studiengang\n(\u00a7\u00a7 12 -26),\n3. Berufst\u00e4tigkeit und Qualifikationen:\na) Art einer abgeschlossenen Berufsausbildung oder einer Berufst\u00e4tigkeit in einem\nanerkannten Ausbildungsberuf  oder eine andere einschl\u00e4gige Berufst\u00e4tigkeit , die \u00fcber\ndie fachspezifische Eignung Auskunft gibt, jeweils  einzeln und in Kombination, und\nb) Qualifikation en, die \u00fcber die fachspezifische Leistung Auskunft geben, jeweils einzeln\noder in Kombination,\n4. das Ergebnis eines fachspezifischen Studieneignungstests ,\n5. das Ergebnis des Auswahlgespr\u00e4chs/anderen m\u00fcndlichen Verfahrens  gem\u00e4\u00df \u00a7 9a ,\n6. ein Motivationsschreiben,\n7. eine schriftliche Abhandlung (Essay).\n2N\u00e4heres sowie die Gewichtung regelt der B esondere Teil f\u00fcr den jeweiligen Studiengang (\u00a7\u00a7\n12-26).\n(2) 1Die Auswahl f\u00fcr h\u00f6here Fachsemester erfolgt gem\u00e4\u00df \u00a7 7 HZG i. V. m. \u00a7 32 HZVO.", "metadata": {"file_path": "/home/tpllmws23/Chatbot-LLama-Pruefungsamt/main_data_filtered/119_ZuSMa_Senat_18012022.pdf"}, "type": "Document"}]
    context = [entry['page_content'] for entry in data]
    #eval_prompt = create_eval_prompt("""Welche Dokumente kÃ¶nnen als Nachweis fÃ¼r deutsche Sprachkenntnisse akzeptiert werden?#""", str(context))
    eval_prompt = create_eval_prompt("""Was sind Zugangsvoraussetzungen f\u00fcr den Masterstudiengang Mechatronik?""", str(context))
    

    from typing import (
    Optional,

    )
    from langchain_core.runnables import RunnableConfig, ensure_config
    


    '''
    from transformers.pipelines.text_generation import ReturnType
    def postprocess(self, model_outputs, return_type=ReturnType.FULL_TEXT, clean_up_tokenization_spaces=True):
        return model_outputs
    
    from transformers import TextClassificationPipeline
    from transformers import Pipeline


    with ClassExtender(Pipeline, postprocess), ClassExtender(TextClassificationPipeline, postprocess):
        output = llm.invoke(eval_prompt)
    '''

    from typing import Any, List, Optional

    from langchain_core.callbacks import CallbackManagerForLLMRun
    from langchain_core.outputs import Generation, LLMResult
    def _generate(
    self,
    prompts: List[str],
    stop: Optional[List[str]] = None,
    run_manager: Optional[CallbackManagerForLLMRun] = None,
    **kwargs: Any,
) -> LLMResult:
        # List to hold all results
        text_generations: List[str] = []
        model_outputs: List[Any] = []
        pipeline_kwargs = kwargs.get("pipeline_kwargs", {})

        for i in range(0, len(prompts), self.batch_size):
            batch_prompts = prompts[i : i + self.batch_size]

            # Process batch of prompts
            responses = self.pipeline(
                batch_prompts,
                **pipeline_kwargs,
            )

            # Process each response in the batch
            for j, response in enumerate(responses):
                if isinstance(response, list):
                    # if model returns multiple generations, pick the top one
                    response = response[0]

                if self.pipeline.task == "text-generation":
                    text = response["generated_text"]
                    model_output = {
                        "logits": response["model_outputs"]["logits"],
                        "sequences": response["model_outputs"]["sequences"],
                        "input_ids": response["input_ids"],
                        "scores": response["model_outputs"]["scores"],
                    }
                elif self.pipeline.task == "text2text-generation":
                    text = response["generated_text"]
                elif self.pipeline.task == "summarization":
                    text = response["summary_text"]
                elif self.pipeline.task in "translation":
                    text = response["translation_text"]
                else:
                    raise ValueError(
                        f"Got invalid task {self.pipeline.task}, "
                        f"currently only {VALID_TASKS} are supported"
                    )

                # Append the processed text to results
                text_generations.append(text)
                if model_output is not None: model_outputs.append(model_output)

        return LLMResult(
            generations=[[Generation(text=text)] for text in text_generations],
            llm_output={
                "model_outputs": model_outputs
            }
        )
    
    def invoke(self, input: str) -> LLMResult:
        return (
            self.generate_prompt(
                [self._convert_input(input)],
            )
        )
    

    start_time = time.time()
    input_ids = tokenizer(eval_prompt, return_tensors="pt").input_ids
    with torch.no_grad():
        output = model(input_ids)
    prompt_eval_time = time.time() - start_time


    output: LLMResult = LLMResult(generations=[], llm_output={})
    
    start_time = time.time()
    with ClassExtender(type(llm), invoke), ClassExtender(type(llm), _generate):
        output = llm.invoke(eval_prompt) # type: ignore
    eval_time = time.time() - start_time


    

    if output.llm_output is None:
        print("MISSING LLM Outpout") 
        #return
    print(output.llm_output["model_outputs"])
    logits = output.llm_output["model_outputs"][0]["logits"]
    sequences = output.llm_output["model_outputs"][0]["sequences"]
    input_ids = output.llm_output["model_outputs"][0]["input_ids"]

    num_prompt_tokens = len(input_ids[0])
    num_generated_tokens = len(sequences[-1]) - num_prompt_tokens
    total_tokens = num_prompt_tokens + num_generated_tokens
    tokens_per_second = total_tokens / (prompt_eval_time + eval_time)

    print(f"Load time: {load_time * 1000:.2f} ms")
    print(f"Prompt eval time: {prompt_eval_time * 1000:.2f} ms / {num_prompt_tokens} tokens "
        f"({prompt_eval_time / num_prompt_tokens * 1000:.2f} ms per token, "
        f"{num_prompt_tokens / prompt_eval_time:.2f} tokens per second)")
    print(f"Eval time: {eval_time * 1000:.2f} ms / {num_generated_tokens} tokens "
        f"({eval_time / num_generated_tokens * 1000:.2f} ms per token, "
        f"{num_generated_tokens / eval_time:.2f} tokens per second)")
    print(f"Total time: {(prompt_eval_time + eval_time) * 1000:.2f} ms / {total_tokens} tokens")

    

Loading checkpoint shards: 100%|██████████| 3/3 [00:06<00:00,  2.01s/it]


GenerationConfig {
  "bos_token_id": 1,
  "do_sample": true,
  "eos_token_id": 2,
  "max_new_tokens": 1024,
  "output_hidden_states": [
    true
  ],
  "output_logits": [
    true
  ],
  "output_scores": [
    true
  ],
  "repetition_penalty": 1.2,
  "return_dict_in_generate": [
    true
  ],
  "temperature": 0.0001,
  "top_p": 0
}

[{'logits': (tensor([[-422.0000,    0.4844,    2.5625,  ...,   -5.5000,   -4.0000,
           -3.5625]]), tensor([[-302.0000,   -6.4062,   -0.3145,  ...,   -5.9688,   -4.7188,
           -6.5312]]), tensor([[-239.0000,   -8.5000,    3.4531,  ...,   -7.2812,   -8.3125,
           -6.4062]]), tensor([[-258.0000,   -8.3125,    4.4688,  ...,   -7.0625,   -7.7188,
           -7.4688]]), tensor([[-274.0000,   -7.5312,    2.7344,  ...,   -7.6875,   -7.5625,
           -7.2500]]), tensor([[-298.0000,   -8.1250,    3.2031,  ...,   -6.8438,   -8.0625,
           -7.5625]]), tensor([[-213.0000,   -9.5625,    3.4688,  ...,   -6.5312,   -8.6875,
           -7.6250]]), t

In [56]:
output.generations[0][0].text

"[INST]You are a smart helpful assistant for the HTWG Konstanz. Answer the following question based only on the provided context. It is mandatory to answer in GERMAN:\n\nContext: ['\\n§ 4 Sprachkenntnisse\\n(1) 1Neben den allgemeinen Zugangsvoraussetzungen (§ 59 LHG) sind für die in § 1 Abs. 1\\nS. 1 genannten Studiengänge deutsche Sprach kenntnisse nachzuweisen. 2Diese können\\ndurch eine deutsche Hochschulzugangsberechtigung (u. a. erfolgreich abgeschlossenes\\ngrundständiges Hochschulstudium) nachgewiesen werden. 3Ferner kann der\\nSprachnachweis durch die Vorlage eines der folgenden Dokumente erbracht werden:\\n1. Feststellungsprüfung für ein Bachelorstudium durch Vorlage der Zugangsberechtigung\\ndes Studienkollegs an der Hochschule Konstanz,\\n2. Test Deutsch als Fremdsprache (TestDaF), sofern im Durchschnitt mindestens die\\nStufe TDN 4 erreicht wurde,   Seite 5 von 43 3. Deutsche Sprachprüfung für den Hochschulzugang (DSH), sofern die DSH mit\\nmindestens der Stufe DSH -2 abges

In [60]:
scores = output.llm_output["model_outputs"][0]["scores"]
scores

(tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[-inf, -inf, -inf,  ..., -inf, -inf, -inf]]),
 tensor([[

In [39]:
torch.stack(list(scores), dim=0).shape

torch.Size([649, 1, 32768])

In [61]:
import torch
import torch.nn.functional as F


# Assuming `logits` is a tensor of shape (sequence_length, vocab_size)
log_probs = F.log_softmax(torch.stack(list(logits), dim=0).squeeze(), dim=-1)
#probs = F.softmax(torch.stack(list(logits), dim=0).squeeze(), dim=-1)
#log_probs2 = torch.log(probs)
nll = F.nll_loss(log_probs, sequences[-1][input_ids.shape[1]:], reduction='none') # mask away prompt
average_nll = nll.mean()
# Perplexity is the exponentiation of the average NLL
perplexity = torch.exp(-average_nll)
print("PERPLEXITY: ", perplexity)

print("\n\nLOGPROBS: ", log_probs)

def score_probs(log_probs, sequence_ids):
    sequence_tokens = [tokenizer.decode(id) for id in sequence_ids]
    token_logprobs = []
    for k in range(1, sequence_ids.shape[0]):
        token_logprobs.append(log_probs[k-1, sequence_ids[k]])
    return sequence_tokens, token_logprobs


result = score_probs(log_probs, sequences[-1][input_ids.shape[1]:] )

sequence_ids = sequences[-1][input_ids.shape[1]:]

sequence_tokens = [tokenizer.decode(id) for id in sequence_ids]

print("SCORE: ", result)

print("NLL: ", nll)



PERPLEXITY:  tensor(0.7297)


LOGPROBS:  tensor([[-4.3830e+02, -1.5816e+01, -1.3738e+01,  ..., -2.1800e+01,
         -2.0300e+01, -1.9863e+01],
        [-3.2001e+02, -2.4421e+01, -1.8329e+01,  ..., -2.3983e+01,
         -2.2733e+01, -2.4546e+01],
        [-2.6047e+02, -2.9974e+01, -1.8020e+01,  ..., -2.8755e+01,
         -2.9786e+01, -2.7880e+01],
        ...,
        [-2.1625e+02, -3.3063e+01, -1.1688e+01,  ..., -3.2126e+01,
         -3.0876e+01, -2.9876e+01],
        [-2.7100e+02, -2.8563e+01, -7.3759e+00,  ..., -3.0813e+01,
         -2.9876e+01, -2.7001e+01],
        [-5.1752e+02, -2.2567e+01, -1.9638e-02,  ..., -2.4957e+01,
         -2.5363e+01, -2.5395e+01]])
SCORE:  (['To', 'answer', 'this', 'question', ',', 'we', 'need', 'to', 'identify', 'the', 'requirements', 'or', 'pr', 'ere', 'quis', 'ites', 'mentioned', 'in', 'the', 'context', 'for', 'admission', 'into', 'the', 'Master', "'", 's', 'program', 'in', 'Me', 'chat', 'ron', 'ics', '.', 'The', 'relevant', 'information', 'can', 'be

In [62]:
torch.exp(average_nll)

tensor(1.3704)

In [5]:

print(torch.stack(list(scores), dim=0).shape)
print(sequences[-1][input_ids.shape[1]:].shape)


result = score_probs(torch.stack(list(scores), dim=0).squeeze(), sequences[-1][input_ids.shape[1]:] )
for tok, log_prob in zip(result[0], result[1]):

    # | token | token string | logits | probability

    print(f"| {tok:8s} | {log_prob:.3f} | {torch.exp(-log_prob):.2%}")

torch.Size([649, 1, 32768])
torch.Size([649])
| To       | 4.948 | 0.71%
| answer   | 4.719 | 0.89%
| this     | 8.281 | 0.03%
| question | 8.750 | 0.02%
| ,        | 12.250 | 0.00%
| we       | 5.562 | 0.38%
| need     | 5.417 | 0.44%
| to       | 10.125 | 0.00%
| identify | 5.130 | 0.59%
| the      | 11.062 | 0.00%
| documents | 3.297 | 3.70%
| that     | 4.906 | 0.74%
| can      | 6.406 | 0.17%
| be       | 5.750 | 0.32%
| accepted | 6.781 | 0.11%
| as       | 8.562 | 0.02%
| proof    | 4.156 | 1.57%
| of       | 8.812 | 0.01%
| German   | 13.125 | 0.00%
| language | 14.688 | 0.00%
| skills   | -0.097 | 110.15%
| mentioned | 11.458 | 0.00%
| in       | 7.188 | 0.08%
| the      | 6.719 | 0.12%
| context  | 2.513 | 8.10%
| .        | 4.750 | 0.87%
| The      | 1.977 | 13.85%
| relevant | 11.688 | 0.00%
| information | 3.542 | 2.90%
| is       | 6.406 | 0.17%
| found    | 12.188 | 0.00%
| in       | 6.510 | 0.15%
| the      | 10.562 | 0.00%
| sentence | 2.148 | 11.67%
| :        | 10.2

In [42]:
score_probs(torch.stack(list(scores), dim=0).squeeze(), sequences[-1][input_ids.shape[1]:] )[1]

[tensor(4.9479),
 tensor(4.7188),
 tensor(8.2812),
 tensor(8.7500),
 tensor(12.2500),
 tensor(5.5625),
 tensor(5.4167),
 tensor(10.1250),
 tensor(5.1302),
 tensor(11.0625),
 tensor(3.2969),
 tensor(4.9062),
 tensor(6.4062),
 tensor(5.7500),
 tensor(6.7812),
 tensor(8.5625),
 tensor(4.1562),
 tensor(8.8125),
 tensor(13.1250),
 tensor(14.6875),
 tensor(-0.0967),
 tensor(11.4583),
 tensor(7.1875),
 tensor(6.7188),
 tensor(2.5130),
 tensor(4.7500),
 tensor(1.9766),
 tensor(11.6875),
 tensor(3.5417),
 tensor(6.4062),
 tensor(12.1875),
 tensor(6.5104),
 tensor(10.5625),
 tensor(2.1484),
 tensor(10.2500),
 tensor(4.1146),
 tensor(5.5312),
 tensor(6.8750),
 tensor(4.1875),
 tensor(3.5938),
 tensor(9.4271),
 tensor(3.0729),
 tensor(6.5365),
 tensor(-0.9703),
 tensor(3.5677),
 tensor(2.5651),
 tensor(0.8984),
 tensor(6.8229),
 tensor(9.0625),
 tensor(1.9661),
 tensor(2.2266),
 tensor(3.6979),
 tensor(2.3828),
 tensor(7.5521),
 tensor(1.7057),
 tensor(0.2588),
 tensor(4.5573),
 tensor(5.3385),
 t

In [43]:
score_probs(log_probs, sequences[-1][input_ids.shape[1]:] )[1]


[tensor(-8.6498),
 tensor(-12.4211),
 tensor(-10.8303),
 tensor(-10.2502),
 tensor(-6.7545),
 tensor(-14.4436),
 tensor(-14.8937),
 tensor(-11.8751),
 tensor(-12.7680),
 tensor(-8.3280),
 tensor(-12.4922),
 tensor(-14.8517),
 tensor(-12.2119),
 tensor(-14.9990),
 tensor(-14.1860),
 tensor(-12.9380),
 tensor(-17.8222),
 tensor(-12.1459),
 tensor(-8.0014),
 tensor(-8.8141),
 tensor(-22.2299),
 tensor(-4.8453),
 tensor(-12.5011),
 tensor(-10.9393),
 tensor(-14.9311),
 tensor(-14.2549),
 tensor(-15.1632),
 tensor(-6.6746),
 tensor(-16.2790),
 tensor(-14.2631),
 tensor(-4.3065),
 tensor(-13.6988),
 tensor(-7.8448),
 tensor(-14.6947),
 tensor(-7.8094),
 tensor(-10.1294),
 tensor(-16.7325),
 tensor(-15.6250),
 tensor(-27.8126),
 tensor(-20.9063),
 tensor(-2.1949),
 tensor(-13.1855),
 tensor(-13.1616),
 tensor(-27.1836),
 tensor(-17.9704),
 tensor(-22.1720),
 tensor(-19.1727),
 tensor(-16.1875),
 tensor(-11.1255),
 tensor(-22.7657),
 tensor(-21.9532),
 tensor(-17.9376),
 tensor(-16.0168),
 ten

In [44]:
logits[-1].shape
vocab_size = 32768

In [45]:
scores_test = torch.stack(scores).reshape(len(scores), -1).transpose(0, 1)
print(scores_test.shape)
torch.stack(list(scores), dim=0).squeeze().shape

torch.Size([32768, 649])


torch.Size([649, 32768])

In [46]:
scores_test = scores_test.reshape(-1, vocab_size, scores_test.shape[-1])
print(scores_test.shape)
scores_test

torch.Size([1, 32768, 649])


tensor([[[-548.0000, -318.0000, -256.0000,  ..., -166.0000, -101.0000,
          -484.0000],
         [   1.6953,   -6.2188,   -8.0625,  ...,   -8.5625,   -6.5312,
            -3.1094],
         [   4.3125,    1.1641,    4.5000,  ...,   10.6250,   12.0000,
            18.2500],
         ...,
         [  -4.4375,   -6.3125,   -7.1562,  ...,   -3.9531,   -7.0000,
            -6.1875],
         [  -4.3750,   -4.5312,   -8.4375,  ...,   -7.0625,   -5.7812,
            -6.0000],
         [  -4.5938,   -7.0938,   -6.7188,  ...,   -7.7188,   -4.3438,
            -5.4375]]])

In [47]:
scores_test = torch.nn.functional.log_softmax(scores_test, dim=1)
scores_test

tensor([[[-5.6254e+02, -3.3249e+02, -2.7494e+02,  ..., -1.8518e+02,
          -1.1956e+02, -5.0225e+02],
         [-1.2847e+01, -2.0710e+01, -2.7003e+01,  ..., -2.7743e+01,
          -2.5087e+01, -2.1364e+01],
         [-1.0230e+01, -1.3327e+01, -1.4441e+01,  ..., -8.5556e+00,
          -6.5557e+00, -4.3070e-03],
         ...,
         [-1.8980e+01, -2.0804e+01, -2.6097e+01,  ..., -2.3134e+01,
          -2.5556e+01, -2.4442e+01],
         [-1.8917e+01, -1.9022e+01, -2.7378e+01,  ..., -2.6243e+01,
          -2.4337e+01, -2.4254e+01],
         [-1.9136e+01, -2.1585e+01, -2.5660e+01,  ..., -2.6899e+01,
          -2.2899e+01, -2.3692e+01]]])

In [48]:
scores_test = scores_test.reshape(-1, scores_test.shape[-1])
print(scores_test.shape)
scores_test

torch.Size([32768, 649])


tensor([[-5.6254e+02, -3.3249e+02, -2.7494e+02,  ..., -1.8518e+02,
         -1.1956e+02, -5.0225e+02],
        [-1.2847e+01, -2.0710e+01, -2.7003e+01,  ..., -2.7743e+01,
         -2.5087e+01, -2.1364e+01],
        [-1.0230e+01, -1.3327e+01, -1.4441e+01,  ..., -8.5556e+00,
         -6.5557e+00, -4.3070e-03],
        ...,
        [-1.8980e+01, -2.0804e+01, -2.6097e+01,  ..., -2.3134e+01,
         -2.5556e+01, -2.4442e+01],
        [-1.8917e+01, -1.9022e+01, -2.7378e+01,  ..., -2.6243e+01,
         -2.4337e+01, -2.4254e+01],
        [-1.9136e+01, -2.1585e+01, -2.5660e+01,  ..., -2.6899e+01,
         -2.2899e+01, -2.3692e+01]])

In [49]:
indices = sequences[:,input_ids.shape[1]:]

# 8. Compute scores
transition_scores_test_from_scores = scores_test.gather(0, indices)

In [50]:
transition_scores_test

NameError: name 'transition_scores_test' is not defined

In [51]:
import numpy as np

generated_tokens = sequences[-1][input_ids.shape[1]:]

for tok, score in zip(generated_tokens, transition_scores_test_from_scores[0]):

    # | token | token string | logits | probability

    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")

|  2559 | To       | -0.292 | 74.65%
|  5140 | answer   | -0.220 | 80.23%
|  1224 | this     | -0.191 | 82.63%
|  3764 | question | -0.005 | 99.51%
| 29493 | ,        | -0.048 | 95.35%
|  1246 | we       | -0.005 | 99.52%
|  1695 | need     | -0.018 | 98.17%
|  1066 | to       | -0.004 | 99.63%
|  9819 | identify | -0.424 | 65.43%
|  1040 | the      | -0.295 | 74.44%
| 10949 | documents | -0.595 | 55.16%
|  1137 | that     | -0.633 | 53.11%
|  1309 | can      | -0.039 | 96.14%
|  1115 | be       | -0.124 | 88.34%
|  9776 | accepted | -0.467 | 62.71%
|  1158 | as       | -0.000 | 99.99%
|  7935 | proof    | -0.080 | 92.29%
|  1070 | of       | -0.004 | 99.57%
|  6335 | German   | -0.001 | 99.88%
|  4610 | language | -0.002 | 99.84%
|  7034 | skills   | -0.133 | 87.53%
|  7851 | mentioned | -0.497 | 60.83%
|  1065 | in       | -0.027 | 97.30%
|  1040 | the      | -0.030 | 97.04%
|  3526 | context  | -0.498 | 60.80%
| 29491 | .        | -0.027 | 97.38%
|  1183 | The      | -0.032 | 96.87%

In [52]:
compute_perplexity(transition_scores_test_from_scores[0])

1.1054807077175697

In [63]:
from multiprocessing import process
from transformers.generation.logits_process import RepetitionPenaltyLogitsProcessor, TemperatureLogitsWarper

processor = RepetitionPenaltyLogitsProcessor(1.2)
warper = TemperatureLogitsWarper(generation_config.temperature)

logits_shifted = torch.stack(list(logits), dim=0)[:-1, :, :].squeeze()
sequence_ids_shifted = sequence_ids.unsqueeze(0)[:, 1:]

logits_unshifted = torch.stack(list(logits), dim=0).squeeze()
sequence_ids_unshifted = sequence_ids.unsqueeze(0)[:, :]


scores_self = processor(input_ids, logits_unshifted)
print(scores_self.shape)
scores_self

torch.Size([312, 32768])


tensor([[-4.2200e+02,  4.8438e-01,  2.5625e+00,  ..., -5.5000e+00,
         -4.0000e+00, -3.5625e+00],
        [-3.0200e+02, -6.4062e+00, -3.1445e-01,  ..., -5.9688e+00,
         -4.7188e+00, -6.5312e+00],
        [-2.3900e+02, -8.5000e+00,  3.4531e+00,  ..., -7.2812e+00,
         -8.3125e+00, -6.4062e+00],
        ...,
        [-1.9300e+02, -9.8125e+00,  1.1562e+01,  ..., -8.8750e+00,
         -7.6250e+00, -6.6250e+00],
        [-2.4900e+02, -6.5625e+00,  1.4625e+01,  ..., -8.8125e+00,
         -7.8750e+00, -5.0000e+00],
        [-4.9800e+02, -3.0469e+00,  1.9500e+01,  ..., -5.4375e+00,
         -5.8438e+00, -5.8750e+00]])

In [40]:
min(50, scores_self.size(-1))
top_k = 50
indices_to_remove = scores_self < torch.topk(scores_self, top_k)[0][..., -1, None]
indices_to_remove
scores_processed_top_k = scores_self.masked_fill(indices_to_remove, -float("Inf"))
torch.max(scores_processed_top_k [0])

tensor(142500.)

In [43]:
top_p = 0.0

sorted_logits, sorted_indices = torch.sort(scores_processed_top_k, descending=False)
cumulative_probs = sorted_logits.softmax(dim=-1).cumsum(dim=-1)

# Remove tokens with cumulative top_p above the threshold (token with 0 are kept)
sorted_indices_to_remove = cumulative_probs <= (1 - top_p)
# Keep at least min_tokens_to_keep
sorted_indices_to_remove[..., -1 :] = 0

# scatter sorted tensors to original indexing
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
scores_processed_top_p = scores_processed_top_k.masked_fill(indices_to_remove, -float("Inf"))
print(torch.max(scores_processed_top_k [0]))
scores_processed_top_p 



tensor(142500.)


In [45]:
probs = F.softmax(scores_processed_top_p , dim=-1)
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
next_tokens

tensor([ 2559,  5140,  1224,  3764, 29493,  1246,  1695,  1066,  9819,  1040,
        10949,  1137,  1309,  1115,  9776,  1158,  7935,  1070,  6335,  4610,
         7034,  7851,  1065,  1040,  3526, 29491,  1183,  9366,  2639,  1309,
         2187,  1065,  1040, 13039, 29515,  1299,  2125, 29498, 15075,  1832,
         1113, 29533,  1031,  1847, 16325,  1659, 16714,  1363, 29479,  1363,
         1537,  1046,  7099,  1970, 12743, 21305, 13299,  1659, 16048,  9099,
         1152,  1261,  2088, 29474,  2002,  2241,  5523,  7214,  5593,  1299,
         1184, 29498, 15075,  1832,  1619, 13039, 13593,  1137,  1504,  1228,
         3320, 10949,  1137,  1309,  1115,  1158,  7935,  1070,  6335,  4610,
         7034, 10143, 29491,  1183, 13039,  1070,  1935, 10949,  1117,  2846,
         1158,  1040, 15003, 24516, 29491,  1183,   781, 29557,  2019, 10622,
         1493, 10438,  1183, 10949,  1137,  1309,  1115,  9776,  1158,  7935,
         1070,  6335,  4610,  7034,  3792, 29515, 29473, 29508, 

In [49]:
torch.max(probs[1])

tensor(1.)

In [38]:
torch.max(scores_processed[0]).item()

142500.0

In [17]:
i = 0
res = []
for row in logits_unshifted:
    #print(row.nelement())
    #print(i)
    #scores_self[i] = processor(input_ids, row.unsqueeze(0).to(torch.float))
    tmp = row.unsqueeze(0).clone().type(torch.FloatTensor)

    res.append(processor(input_ids, tmp))
    scores_self[i] = processor(input_ids, tmp) / generation_config.temperature
    i += 1

In [19]:
scores_self

tensor([[-5480000.0000,    16953.1250,    43125.0000,  ...,
           -44375.0000,   -43750.0000,   -45937.5000],
        [-3180000.0000,   -62187.5000,    11640.6250,  ...,
           -63125.0000,   -45312.5000,   -70937.5000],
        [-2560000.0000,   -80625.0000,    45000.0000,  ...,
           -71562.5000,   -84375.0000,   -67187.5000],
        ...,
        [-2860000.0000,   -73750.0000,    51875.0000,  ...,
           -47812.5000,   -57500.0000,   -60937.5000],
        [-2840000.0000,   -74062.5000,   105000.0000,  ...,
           -77500.0000,   -69062.5000,   -61875.0000],
        [-4820000.0000,   -23125.0000,   168750.0000,  ...,
           -64062.5000,   -61562.5000,   -51250.0000]])

In [31]:
res

[tensor([[-548.0000,    1.6953,    4.3125,  ...,   -4.4375,   -4.3750,
            -4.5938]]),
 tensor([[-318.0000,   -6.2188,    1.1641,  ...,   -6.3125,   -4.5312,
            -7.0938]]),
 tensor([[-256.0000,   -8.0625,    4.5000,  ...,   -7.1562,   -8.4375,
            -6.7188]]),
 tensor([[-274.0000,   -7.8125,    5.8125,  ...,   -6.8750,   -7.9062,
            -7.7500]]),
 tensor([[-310.0000,   -7.1875,    3.8750,  ...,   -7.7812,   -7.6875,
            -7.4062]]),
 tensor([[-304.0000,   -7.7500,    3.7969,  ...,   -6.7812,   -8.4375,
            -7.5312]]),
 tensor([[-228.0000,   -9.2500,    4.1250,  ...,   -6.5000,   -8.8125,
            -7.9688]]),
 tensor([[-280.0000,   -9.0625,    5.5000,  ...,   -7.4062,   -9.1250,
            -9.6875]]),
 tensor([[-193.0000,   -8.1875,    3.1406,  ...,   -4.9062,   -6.5625,
            -7.3750]]),
 tensor([[-310.0000,   -6.9688,    4.4062,  ...,   -6.6875,   -6.7812,
            -6.2812]]),
 tensor([[-302.0000,   -6.7500,    2.2031,  ...,  

In [29]:
torch.stack(list(scores), dim=0).squeeze()

tensor([[-548.0000,    1.6953,    4.3125,  ...,   -4.4375,   -4.3750,
           -4.5938],
        [-318.0000,   -6.2188,    1.1641,  ...,   -6.3125,   -4.5312,
           -7.0938],
        [-256.0000,   -8.0625,    4.5000,  ...,   -7.1562,   -8.4375,
           -6.7188],
        ...,
        [-166.0000,   -8.5625,   10.6250,  ...,   -3.9531,   -7.0625,
           -7.7188],
        [-101.0000,   -6.5312,   12.0000,  ...,   -7.0000,   -5.7812,
           -4.3438],
        [-484.0000,   -3.1094,   18.2500,  ...,   -6.1875,   -6.0000,
           -5.4375]])

In [46]:
i = 0
x = torch.stack(list(scores), dim=0).squeeze()
for row in scores_self:
    j = 0
    for element in row:
        if element != x[i][j] :
            print("NOT EQUAL: ", j)
            print(element.item() )
            print(x[i][j].item() )
        j+=1
    i+=1

NOT EQUAL:  1053
0.8593749403953552
0.859375
NOT EQUAL:  1065
1.7578123807907104
1.7578125
NOT EQUAL:  1102
1.8749998807907104
1.875
NOT EQUAL:  1108
1.7968748807907104
1.796875
NOT EQUAL:  1155
1.9531248807907104
1.953125
NOT EQUAL:  1216
0.1147460862994194
0.11474609375
NOT EQUAL:  1217
0.02746581844985485
0.0274658203125
NOT EQUAL:  1328
6.874999523162842
6.875
NOT EQUAL:  1347
1.8749998807907104
1.875
NOT EQUAL:  1429
6.249999523162842
6.25
NOT EQUAL:  1805
0.8984374403953552
0.8984375
NOT EQUAL:  1970
1.5820311307907104
1.58203125
NOT EQUAL:  2288
0.3857421576976776
0.3857421875
NOT EQUAL:  2391
0.2490234225988388
0.2490234375
NOT EQUAL:  3973
1.6210936307907104
1.62109375
NOT EQUAL:  7590
0.8105468153953552
0.810546875
NOT EQUAL:  8130
0.7910155653953552
0.791015625
NOT EQUAL:  9622
1.6796873807907104
1.6796875
NOT EQUAL:  9799
0.4687499701976776
0.46875
NOT EQUAL:  12066
1.8359373807907104
1.8359375
NOT EQUAL:  15833
0.8789061903953552
0.87890625
NOT EQUAL:  21014
0.062255855649

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7f198c036190>>
Traceback (most recent call last):
  File "/home/tpllmws23/environments/rag/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


NOT EQUAL:  1058
3.359374761581421
3.359375
NOT EQUAL:  1117
3.828124761581421
3.828125
NOT EQUAL:  1124
3.281249761581421
3.28125
NOT EQUAL:  1131
1.5820311307907104
1.58203125
NOT EQUAL:  1133
3.203124761581421
3.203125
NOT EQUAL:  1152
3.359374761581421
3.359375
NOT EQUAL:  1164
6.171874523162842
6.171875
NOT EQUAL:  1195
3.242187261581421
3.2421875
NOT EQUAL:  1224
3.984375
3.3203125
NOT EQUAL:  1228
3.749999761581421
3.75
NOT EQUAL:  1246
4.4375
3.6979165077209473
NOT EQUAL:  1429
1.9140623807907104
1.9140625
NOT EQUAL:  1454
1.9531248807907104
1.953125
NOT EQUAL:  1695
7.0
5.8333330154418945
NOT EQUAL:  2256
1.9921873807907104
1.9921875
NOT EQUAL:  2559
6.03125
5.026041507720947
NOT EQUAL:  2851
1.6015623807907104
1.6015625
NOT EQUAL:  3041
0.7617186903953552
0.76171875
NOT EQUAL:  3064
1.7968748807907104
1.796875
NOT EQUAL:  3526
6.718749523162842
6.71875
NOT EQUAL:  3764
0.4589843451976776
0.458984375
NOT EQUAL:  3923
1.9531248807907104
1.953125
NOT EQUAL:  29474
1.796874880790

KeyboardInterrupt: 

In [25]:
torch.stack(list(scores), dim=0).shape

torch.Size([649, 1, 32768])

In [7]:
eq = torch.eq(scores_self, torch.stack(list(scores), dim=0).squeeze())
torch.any(eq, 0).shape

torch.Size([32768])

In [8]:
torch.equal(scores_self, torch.stack(list(scores), dim=0).squeeze())

False

In [28]:
torch.allclose(scores_self, torch.stack(list(scores), dim=0).squeeze())

False

In [9]:
print(torch.sum(torch.eq(scores_self, torch.stack(list(scores), dim=0).squeeze())).item()/scores_self.nelement())

1.0172526041666666e-05


In [190]:
torch.stack(list(scores), dim=0).squeeze()

tensor([[-548.0000,    1.6953,    4.3125,  ...,   -4.4375,   -4.3750,
           -4.5938],
        [-318.0000,   -6.2188,    1.1641,  ...,   -6.3125,   -4.5312,
           -7.0938],
        [-256.0000,   -8.0625,    4.5000,  ...,   -7.1562,   -8.4375,
           -6.7188],
        ...,
        [-166.0000,   -8.5625,   10.6250,  ...,   -3.9531,   -7.0625,
           -7.7188],
        [-101.0000,   -6.5312,   12.0000,  ...,   -7.0000,   -5.7812,
           -4.3438],
        [-484.0000,   -3.1094,   18.2500,  ...,   -6.1875,   -6.0000,
           -5.4375]])

In [65]:
vocab_size = 32768
scores_transition = scores_self.transpose(0, 1)
print(scores_transition.shape)
print(torch.stack(list(logits), dim=0).squeeze().shape)
scores_transition = scores_transition.reshape(-1, vocab_size, scores_transition.shape[-1])
print(scores_transition.shape)
scores_transition = torch.nn.functional.log_softmax(scores_transition, dim=1)

print(scores_transition.shape)
print(sequence_ids_shifted.shape)
print(sequences[:,input_ids.shape[1]:].shape)
indices = sequence_ids_shifted

# 8. Compute scores
transition_scores_self = scores_transition.squeeze().gather(0, indices)
transition_scores_self 

torch.Size([32768, 249])
torch.Size([249, 32768])
torch.Size([1, 32768, 249])
torch.Size([1, 32768, 249])
torch.Size([1, 248])
torch.Size([1, 249])


tensor([[ -9.5944, -12.4211, -10.8303, -10.2502,  -6.7545, -14.4436, -14.8937,
         -11.8751, -12.7679,  -8.3280, -12.4921, -14.8516, -12.2119, -14.9990,
         -14.1860, -12.9380, -17.8222, -12.1459,  -8.0014,  -8.8141, -22.2299,
          -4.8453, -12.5011, -10.9393, -14.9311, -14.2549, -15.1632,  -6.6746,
         -16.2790, -14.2631,  -4.3065, -13.6988,  -7.8448, -14.6946,  -7.8094,
         -10.1293, -16.7325, -15.6250, -27.8126, -20.9063,  -2.1949, -13.1855,
         -13.1616, -27.1836, -17.9704, -22.1720, -19.1727, -16.1875, -11.1255,
         -22.7657, -21.9532, -17.9376, -16.0168, -12.3128, -18.8283, -22.4396,
         -15.1592, -14.7189, -22.0314, -22.5156, -18.6406, -23.1250, -22.5247,
         -10.5006, -21.2423, -17.4688, -21.7188, -16.2507, -13.3658, -15.9087,
         -18.0314, -14.7500, -24.8750, -20.2813, -14.8928,  -5.4550, -12.6192,
         -10.7569, -14.1770, -15.8129,  -7.1367, -15.5044, -11.3808, -11.5004,
         -14.5989, -17.1250, -18.5330, -14.5287,  -5

In [53]:
print(scores[12][0][1695])
print(scores_self[12][1695])

tensor(4.4271)
tensor(5.3125)


In [64]:
import math

def compute_perplexity(log_probs):
    total_log_prob = 0
    for log_prob in log_probs:
        total_log_prob += log_prob
    perplexity = math.exp(-total_log_prob / len(log_probs))
    return perplexity

In [69]:
transition_scores = model.compute_transition_scores(sequences, torch.unbind(scores_self.unsqueeze(1), dim=0), normalize_logits=True)
import numpy as np

generated_tokens = sequences[-1][input_ids.shape[1]:][1:]


log_probs = transition_scores[0][:-1]


print("PERPLEXITY: ", compute_perplexity(log_probs))
 
for tok, score in zip(generated_tokens, log_probs):

    # | token | token string | logits | probability

    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")

PERPLEXITY:  1.371638369396272
|  5140 | answer   | -0.168 | 84.49%
|  1224 | this     | -0.015 | 98.54%
|  3764 | question | -1.224 | 29.42%
| 29493 | ,        | -0.000 | 99.99%
|  1246 | we       | -0.003 | 99.70%
|  1695 | need     | -0.002 | 99.79%
|  1066 | to       | -0.012 | 98.85%
|  9819 | identify | -0.000 | 99.97%
|  1040 | the      | -0.464 | 62.85%
|  9064 | requirements | -0.038 | 96.28%
|  1210 | or       | -1.039 | 35.39%
|  1492 | pr       | -2.470 | 8.46%
|  1165 | ere      | -0.620 | 53.77%
| 11993 | quis     | -0.000 | 100.00%
|  4155 | ites     | -0.000 | 99.99%
|  7851 | mentioned | -0.000 | 99.99%
|  1065 | in       | -2.054 | 12.82%
|  1040 | the      | -0.063 | 93.91%
|  3526 | context  | -0.000 | 99.96%
|  1122 | for      | -0.079 | 92.40%
| 24256 | admission | -0.143 | 86.64%
|  1546 | into     | -1.099 | 33.31%
|  1040 | the      | -0.704 | 49.48%
| 10129 | Master   | -0.007 | 99.29%
| 29510 | '        | -0.454 | 63.51%
| 29481 | s        | -0.012 | 98.81%
|

In [5]:
transition_scores = model.compute_transition_scores(sequences, scores, normalize_logits=True)
import numpy as np

generated_tokens = sequences[-1][input_ids.shape[1]:]

print("PERPLEXITY: ", compute_perplexity(transition_scores[0]))

for tok, score in zip(generated_tokens, transition_scores[0]):

    # | token | token string | logits | probability

    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")

PERPLEXITY:  1.9461695291850882
|  1183 | The      | -3.542 | 2.89%
|  3526 | context  | -1.157 | 31.44%
|  6080 | provides | -0.329 | 71.98%
|  3624 | several  | -2.505 | 8.17%
|  3645 | options  | -0.345 | 70.85%
|  1137 | that     | -1.684 | 18.56%
|  1309 | can      | -0.010 | 98.98%
|  7799 | serve    | -1.920 | 14.66%
|  1158 | as       | -0.000 | 100.00%
|  7935 | proof    | -0.255 | 77.52%
|  1070 | of       | -0.013 | 98.75%
|  6335 | German   | -0.004 | 99.58%
|  4610 | language | -0.001 | 99.89%
|  7034 | skills   | -0.186 | 83.02%
| 29491 | .        | -0.338 | 71.34%
|  3725 | These    | -0.599 | 54.92%
|  3792 | include  | -0.038 | 96.32%
| 29515 | :        | -0.182 | 83.35%
|  1027 |          | -3.716 | 2.43%
|   781 | 
        | -0.212 | 80.88%
| 29508 | 1        | -1.278 | 27.85%
| 29491 | .        | -0.010 | 99.01%
|  1098 | A        | -0.080 | 92.27%
| 16511 | certificate | -0.474 | 62.23%
|  8870 | showing  | -2.526 | 8.00%
|  6821 | successful | -1.172 | 30.99%
| 15

In [66]:
import numpy as np

generated_tokens = sequences[-1][input_ids.shape[1]:]

print("PERPLEXITY: ", compute_perplexity(transition_scores_self[0]))
 
for tok, score in zip(generated_tokens, transition_scores_self[0]):

    # | token | token string | logits | probability

    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")

PERPLEXITY:  798828.3533445142
|  2559 | To       | -9.594 | 0.01%
|  5140 | answer   | -12.421 | 0.00%
|  1224 | this     | -10.830 | 0.00%
|  3764 | question | -10.250 | 0.00%
| 29493 | ,        | -6.755 | 0.12%
|  1246 | we       | -14.444 | 0.00%
|  1695 | need     | -14.894 | 0.00%
|  1066 | to       | -11.875 | 0.00%
|  9819 | identify | -12.768 | 0.00%
|  1040 | the      | -8.328 | 0.02%
| 10949 | documents | -12.492 | 0.00%
|  1137 | that     | -14.852 | 0.00%
|  1309 | can      | -12.212 | 0.00%
|  1115 | be       | -14.999 | 0.00%
|  9776 | accepted | -14.186 | 0.00%
|  1158 | as       | -12.938 | 0.00%
|  7935 | proof    | -17.822 | 0.00%
|  1070 | of       | -12.146 | 0.00%
|  6335 | German   | -8.001 | 0.03%
|  4610 | language | -8.814 | 0.01%
|  7034 | skills   | -22.230 | 0.00%
|  7851 | mentioned | -4.845 | 0.79%
|  1065 | in       | -12.501 | 0.00%
|  1040 | the      | -10.939 | 0.00%
|  3526 | context  | -14.931 | 0.00%
| 29491 | .        | -14.255 | 0.00%
|  1183 | T

In [116]:
torch.unbind(scores_self.unsqueeze(1), dim=0)

(tensor([[-548.0000,    1.6953,    3.5937,  ...,   -4.4375,   -4.3750,
            -4.5938]]),
 tensor([[-318.0000,   -6.2188,    1.1641,  ...,   -6.3125,   -4.5312,
            -7.0938]]),
 tensor([[-256.0000,   -8.0625,    4.5000,  ...,   -7.1562,   -8.4375,
            -6.7188]]),
 tensor([[-274.0000,   -7.8125,    5.8125,  ...,   -6.8750,   -7.9062,
            -7.7500]]),
 tensor([[-310.0000,   -7.1875,    3.8750,  ...,   -7.7812,   -7.6875,
            -7.4062]]),
 tensor([[-304.0000,   -7.7500,    3.7969,  ...,   -6.7812,   -8.4375,
            -7.5312]]),
 tensor([[-228.0000,   -9.2500,    4.1250,  ...,   -6.5000,   -8.8125,
            -7.9688]]),
 tensor([[-280.0000,   -9.0625,    5.5000,  ...,   -7.4062,   -9.1250,
            -9.6875]]),
 tensor([[-193.0000,   -8.1875,    3.1406,  ...,   -4.9062,   -6.5625,
            -7.3750]]),
 tensor([[-310.0000,   -6.9688,    4.4062,  ...,   -6.6875,   -6.7812,
            -6.2812]]),
 tensor([[-302.0000,   -6.7500,    2.2031,  ...,  

AttributeError: 'tuple' object has no attribute 'shape'

In [65]:
xd = score_probs(torch.stack(list(logits), dim=0).squeeze(), sequences[-1][input_ids.shape[1]:] )[1]

In [62]:
print(sequences[-1][input_ids.shape[1]:].shape)

torch.Size([649])


In [66]:
torch.stack(list(logits), dim=0).squeeze().shape

print(sequence_ids[1:].shape)
torch.tensor(xd).shape

torch.Size([648])


torch.Size([648])

In [80]:
print(sequence_ids[1:].shape)
print(torch.tensor(xd).shape)
print(sequence_ids.shape)
print(torch.stack(list(logits), dim=0).squeeze().shape)

logits2= torch.stack(list(logits), dim=0)[:-1, :, :] # match shift -> no probability for first token
sequence_ids2 = sequence_ids.unsqueeze(0)[:, 1:] # shift away first token as probs[0] -> token[1]
gen_probs = torch.gather(logits2, 2, sequence_ids2[:,None,:]).squeeze(1)


print(logits2.shape)
print(sequence_ids2[:,None,:].shape)

torch.Size([648])
torch.Size([648])
torch.Size([649])
torch.Size([649, 32768])
torch.Size([648, 1, 32768])
torch.Size([1, 1, 648])


torch.Size([1, 648])

In [92]:
transition_scores_test_from_scores

tensor([[-2.9233e-01, -2.2030e-01, -1.9076e-01, -4.8652e-03, -4.7661e-02,
         -4.7935e-03, -1.8475e-02, -3.6574e-03, -4.2415e-01, -2.9515e-01,
         -5.9499e-01, -6.3272e-01, -3.9393e-02, -1.2393e-01, -4.6663e-01,
         -8.8926e-05, -8.0210e-02, -4.2637e-03, -1.1828e-03, -1.5914e-03,
         -1.3316e-01, -4.9716e-01, -2.7382e-02, -3.0030e-02, -4.9766e-01,
         -2.6540e-02, -3.1793e-02, -2.1462e-01, -2.8960e-02, -9.7140e-02,
         -2.4637e-02, -3.4244e-01, -4.4660e-01, -6.2563e-01, -4.0899e-01,
         -6.9131e-01, -7.2264e-02, -6.9141e-06, -4.3391e-05, -9.5367e-06,
         -7.9477e-01, -7.1876e-01, -5.0048e-02, -1.8202e-04, -2.0821e-02,
         -7.2953e-04, -6.9936e-03, -4.6123e-04, -9.9790e-03, -4.2978e-04,
         -8.0065e-04, -1.6554e-03, -2.3759e-02, -4.3002e-03, -3.2238e-03,
         -8.2221e-03, -9.9501e-03, -1.8577e-03, -1.1165e-02, -1.9167e-04,
         -5.4523e-04, -1.8070e-04, -4.2325e-02, -3.9372e-03, -1.9007e-03,
         -5.4226e-04, -1.6731e-03, -1.

In [94]:
print(sequence_ids.shape)
print(torch.stack(list(logits), dim=0).squeeze().shape)

logits_shifted = torch.stack(list(logits), dim=0)[:-1, :, :].squeeze()
sequence_ids_shifted = sequence_ids.unsqueeze(0)[:, 1:]

print(logits_shifted.shape)
print(sequence_ids_shifted.shape)

score1 = torch.gather(logits_shifted, 1, sequence_ids_shifted  )
score1

torch.Size([649])
torch.Size([649, 32768])
torch.Size([648, 32768])
torch.Size([1, 648])


tensor([[ 5.9375e+00,  3.1250e+00, -9.1797e-01, -5.3906e-01, -1.0000e+00,
          6.6406e-01,  9.5000e+00,  1.1484e+00,  3.2656e+00, -1.9688e+00,
          5.6250e-01, -8.2031e-01,  4.1406e-01,  1.7578e+00,  1.1641e+00,
         -2.8906e+00,  1.4141e+00,  3.7344e+00,  3.4375e-01, -3.1406e+00,
          3.4961e-01,  2.1094e+00,  3.2656e+00,  4.0625e+00,  1.5078e+00,
          1.1000e+01, -6.6016e-01, -1.4453e+00,  6.1328e-01, -1.9375e+00,
          2.1094e+00,  3.2656e+00,  2.6562e+00,  4.2500e+00,  5.5938e+00,
          2.9531e+00,  1.6328e+00,  1.1641e+00,  3.4375e+00,  3.0625e+00,
         -1.7891e+00,  1.3733e-02,  5.1514e-02,  3.1836e-01,  2.5312e+00,
          1.4688e+00, -2.1094e+00,  1.8984e+00, -2.1094e+00, -5.8750e+00,
         -3.1406e+00, -5.9326e-02,  1.8984e+00,  2.2812e+00, -1.7266e+00,
          2.4062e+00,  2.5312e+00,  1.5625e+00, -1.6016e+00,  4.4688e+00,
         -1.5547e+00, -1.1641e+00, -2.1875e+00,  1.3516e+00,  1.6309e-01,
         -1.4453e-01,  1.6094e+00,  1.

In [137]:
print(transition_scores_self.shape)
print(transition_scores_test_from_scores.shape)
torch.eq(transition_scores_self, transition_scores_test_from_scores[:,1:])

torch.Size([1, 648])
torch.Size([1, 649])


tensor([[False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, F

In [166]:
torch.eq(torch.stack(list(logits), dim=0).squeeze(), torch.stack(list(scores), dim=0).squeeze())

tensor([[True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        ...,
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True]])


In [186]:
torch.eq(scores_self, torch.stack(list(scores), dim=0).squeeze())

tensor([[True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        ...,
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True]])

In [172]:
probs2 = torch.stack(list(logits), dim=0)[:-1, :, :] # match shift -> no probability for first token
sequence_ids2 = sequence_ids.unsqueeze(0)[:, 1:] # shift away first token as probs[0] -> token[1]
gen_probs = torch.gather(probs2, 2, sequence_ids2[:,None,:]).squeeze(1)
print(gen_probs.shape)
print(sequence_ids2.shape)

torch.Size([1, 648])
torch.Size([1, 648])


In [187]:
torch.max(torch.where(torch.eq(scores_self, torch.stack(list(scores), dim=0).squeeze()) != True, 1.0, 0.0))

tensor(1.)

In [167]:
import numpy as np
np.savetxt('my_file.txt', torch.eq(torch.stack(list(logits), dim=0).squeeze(), torch.stack(list(scores), dim=0).squeeze()).numpy())

In [193]:
print(scores_self.shape)
print(torch.stack(list(scores), dim=0).squeeze().shape)
torch.eq(scores_self, torch.stack(list(scores), dim=0).squeeze())

torch.Size([649, 32768])
torch.Size([649, 32768])


tensor([[True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        ...,
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True]])

In [192]:
eqTest = torch.eq(scores_self, torch.stack(list(scores), dim=0).squeeze())
x = torch.where(eqTest != True)
x

(tensor([  0,   0,   0,  ..., 648, 648, 648]),
 tensor([ 1053,  1065,  1102,  ..., 30132, 30240, 30430]))

In [197]:
eqTest[0].nonzero().shape

torch.Size([32736, 1])

In [69]:
transition_scores = model.compute_transition_scores(sequences, scores, normalize_logits=True)
import numpy as np

generated_tokens = sequences[-1][input_ids.shape[1]:]

for tok, score in zip(generated_tokens, transition_scores[0]):

    # | token | token string | logits | probability

    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")

|  2559 | To       | -0.300 | 74.07%
|  5140 | answer   | -0.239 | 78.78%
|  1224 | this     | -0.194 | 82.33%
|  3764 | question | -0.005 | 99.52%
| 29493 | ,        | -0.049 | 95.19%
|  1246 | we       | -0.005 | 99.51%
|  1695 | need     | -0.018 | 98.17%
|  1066 | to       | -0.004 | 99.63%
|  9819 | identify | -0.425 | 65.37%
|  1040 | the      | -0.338 | 71.35%
| 10949 | documents | -0.606 | 54.53%
|  1137 | that     | -0.588 | 55.52%
|  1309 | can      | -0.039 | 96.13%
|  1115 | be       | -0.138 | 87.10%
|  9776 | accepted | -0.466 | 62.73%
|  1158 | as       | -0.000 | 99.99%
|  7935 | proof    | -0.080 | 92.29%
|  1070 | of       | -0.004 | 99.57%
|  6335 | German   | -0.001 | 99.88%
|  4610 | language | -0.002 | 99.84%
|  7034 | skills   | -0.133 | 87.55%
|  7851 | mentioned | -0.497 | 60.86%
|  1065 | in       | -0.027 | 97.30%
|  1040 | the      | -0.030 | 97.04%
|  3526 | context  | -0.510 | 60.03%
| 29491 | .        | -0.026 | 97.44%
|  1183 | The      | -0.027 | 97.29%

In [86]:
min = []
for row in log_probs:
    #print(row)
    print(torch.max(row))
    min.append(torch.max(row).item())

tensor(-0.3527)
tensor(-0.0166)
tensor(-0.1428)
tensor(-0.0002)
tensor(-0.0046)
tensor(-0.0062)
tensor(-0.0187)
tensor(-0.0001)
tensor(-0.4252)
tensor(-0.0183)
tensor(-0.6131)
tensor(-0.5886)
tensor(-0.4932)
tensor(-0.1382)
tensor(-0.4669)
tensor(-0.0004)
tensor(-0.1034)
tensor(-0.0834)
tensor(-0.0014)
tensor(-0.0016)
tensor(-0.1330)
tensor(-0.4697)
tensor(-0.0011)
tensor(-0.0018)
tensor(-0.0726)
tensor(-0.0049)
tensor(-0.1181)
tensor(-0.2368)
tensor(-0.0291)
tensor(-0.0443)
tensor(-0.0577)
tensor(-0.0113)
tensor(-0.0308)
tensor(-0.5229)
tensor(-0.0572)
tensor(-1.2470)
tensor(-0.0122)
tensor(-8.5830e-06)
tensor(-7.9509e-05)
tensor(-4.7206e-05)
tensor(-1.0492)
tensor(-0.3668)
tensor(-0.0058)
tensor(-1.1444e-05)
tensor(-0.0018)
tensor(-9.5244e-05)
tensor(-0.0009)
tensor(-3.9457e-05)
tensor(-0.0005)
tensor(-2.7537e-05)
tensor(-8.9165e-05)
tensor(-0.0001)
tensor(-0.0012)
tensor(-0.0003)
tensor(-0.0002)
tensor(-0.0002)
tensor(-0.0028)
tensor(-0.0001)
tensor(-0.0002)
tensor(-2.1338e-05)
tens

In [91]:
transition_scores

tensor([[-3.0015e-01, -2.3851e-01, -1.9447e-01, -4.8571e-03, -4.9283e-02,
         -4.8999e-03, -1.8478e-02, -3.6629e-03, -4.2517e-01, -3.3750e-01,
         -6.0647e-01, -5.8844e-01, -3.9480e-02, -1.3814e-01, -4.6625e-01,
         -8.8688e-05, -8.0214e-02, -4.2657e-03, -1.1571e-03, -1.5651e-03,
         -1.3294e-01, -4.9665e-01, -2.7347e-02, -3.0029e-02, -5.1036e-01,
         -2.5977e-02, -2.7480e-02, -2.1359e-01, -2.9048e-02, -9.7126e-02,
         -2.4876e-02, -3.4356e-01, -4.3006e-01, -6.2311e-01, -4.0414e-01,
         -7.0115e-01, -6.8293e-02, -6.9141e-06, -4.9113e-05, -1.0490e-05,
         -8.2086e-01, -7.0521e-01, -5.5858e-02, -1.9548e-04, -2.2055e-02,
         -7.3442e-04, -7.1356e-03, -4.7625e-04, -1.0028e-02, -4.2811e-04,
         -7.7575e-04, -1.6602e-03, -2.4087e-02, -4.6682e-03, -3.2293e-03,
         -8.5410e-03, -9.3506e-03, -1.8923e-03, -1.1185e-02, -1.9453e-04,
         -5.5286e-04, -1.8166e-04, -4.0535e-02, -3.9632e-03, -1.7122e-03,
         -4.8626e-04, -1.7307e-03, -1.

In [103]:
len(min)

for tok, log_prob in zip(generated_tokens, min):

    # | token | token string | logits | probability

    print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {log_prob:.3f} | {math.exp(log_prob):.2%}")


|  2559 | To       | -0.353 | 70.28%
|  5140 | answer   | -0.017 | 98.35%
|  1224 | this     | -0.143 | 86.69%
|  3764 | question | -0.000 | 99.98%
| 29493 | ,        | -0.005 | 99.54%
|  1246 | we       | -0.006 | 99.38%
|  1695 | need     | -0.019 | 98.15%
|  1066 | to       | -0.000 | 99.99%
|  9819 | identify | -0.425 | 65.36%
|  1040 | the      | -0.018 | 98.19%
| 10949 | documents | -0.613 | 54.17%
|  1137 | that     | -0.589 | 55.51%
|  1309 | can      | -0.493 | 61.06%
|  1115 | be       | -0.138 | 87.09%
|  9776 | accepted | -0.467 | 62.70%
|  1158 | as       | -0.000 | 99.96%
|  7935 | proof    | -0.103 | 90.17%
|  1070 | of       | -0.083 | 92.00%
|  6335 | German   | -0.001 | 99.86%
|  4610 | language | -0.002 | 99.84%
|  7034 | skills   | -0.133 | 87.55%
|  7851 | mentioned | -0.470 | 62.52%
|  1065 | in       | -0.001 | 99.89%
|  1040 | the      | -0.002 | 99.82%
|  3526 | context  | -0.073 | 93.00%
| 29491 | .        | -0.005 | 99.51%
|  1183 | The      | -0.118 | 88.86%

In [104]:
torch.stack(list(scores), dim=0).squeeze().shape

torch.Size([647, 32768])

In [105]:
torch.stack(list(logits), dim=0).squeeze().shape

torch.Size([647, 32768])

In [108]:
logits

(tensor([[-552.0000,    1.7266,    4.4375,  ...,   -4.3438,   -4.4062,
            -4.6875]]),
 tensor([[-318.0000,   -6.2188,    1.1641,  ...,   -6.3125,   -4.5000,
            -7.1875]]),
 tensor([[-258.0000,   -8.0625,    4.5000,  ...,   -7.1250,   -8.4375,
            -6.7500]]),
 tensor([[-274.0000,   -7.8750,    5.8750,  ...,   -6.8438,   -7.9062,
            -7.7812]]),
 tensor([[-312.0000,   -7.1875,    3.9375,  ...,   -7.7500,   -7.6875,
            -7.4062]]),
 tensor([[-304.0000,   -7.7812,    3.8281,  ...,   -6.7812,   -8.4375,
            -7.5312]]),
 tensor([[-228.0000,   -9.2500,    4.1562,  ...,   -6.5000,   -8.8125,
            -8.0000]]),
 tensor([[-282.0000,   -9.0000,    5.5625,  ...,   -7.4062,   -9.0625,
            -9.6875]]),
 tensor([[-195.0000,   -8.2500,    3.2031,  ...,   -4.9062,   -6.6250,
            -7.4062]]),
 tensor([[-312.0000,   -6.9375,    4.4062,  ...,   -6.6562,   -6.7812,
            -6.2812]]),
 tensor([[-302.0000,   -6.7500,    2.2031,  ...,  

In [144]:
compute_perplexity(transition_scores[-1])

1.1451389024693226

In [127]:
torch.max(torch.stack(list(scores), dim=0).squeeze())

tensor(32.)

In [20]:
result = score_probs(probs, sequences[-1][input_ids.shape[1]:] )
for tok, log_prob in zip(result[0], result[1]):

    # | token | token string | logits | probability

    print(f"| {tok:8s} | {log_prob:.3f} | {log_prob:.2%}")

| To       | 0.000 | 0.02%
| answer   | 0.000 | 0.00%
| this     | 0.000 | 0.00%
| question | 0.000 | 0.00%
| ,        | 0.001 | 0.12%
| we       | 0.000 | 0.00%
| need     | 0.000 | 0.00%
| to       | 0.000 | 0.00%
| identify | 0.000 | 0.00%
| the      | 0.000 | 0.03%
| documents | 0.000 | 0.00%
| that     | 0.000 | 0.00%
| can      | 0.000 | 0.00%
| be       | 0.000 | 0.00%
| accepted | 0.000 | 0.00%
| as       | 0.000 | 0.00%
| proof    | 0.000 | 0.00%
| of       | 0.000 | 0.00%
| German   | 0.000 | 0.03%
| language | 0.000 | 0.01%
| skills   | 0.000 | 0.00%
| mentioned | 0.007 | 0.74%
| in       | 0.000 | 0.00%
| the      | 0.000 | 0.00%
| context  | 0.000 | 0.00%
| .        | 0.000 | 0.00%
| The      | 0.000 | 0.00%
| relevant | 0.001 | 0.12%
| information | 0.000 | 0.00%
| is       | 0.000 | 0.00%
| found    | 0.013 | 1.35%
| in       | 0.000 | 0.00%
| the      | 0.000 | 0.04%
| sentence | 0.000 | 0.00%
| :        | 0.000 | 0.04%
| ##       | 0.000 | 0.00%
| begin    | 0.000 | 0.

In [76]:
result = score_probs(log_probs, sequences[-1][input_ids.shape[1]:] )
for tok, log_prob in zip(result[0], result[1]):

    # | token | token string | logits | probability

    print(f"| {tok:8s} | {log_prob:.3f} | {torch.exp(log_prob):.2%}")

| To       | -8.478 | 0.02%
| answer   | -12.235 | 0.00%
| this     | -10.705 | 0.00%
| question | -10.125 | 0.00%
| ,        | -6.692 | 0.12%
| we       | -14.444 | 0.00%
| need     | -14.894 | 0.00%
| to       | -11.875 | 0.00%
| identify | -12.769 | 0.00%
| the      | -8.268 | 0.03%
| documents | -12.457 | 0.00%
| that     | -14.807 | 0.00%
| can      | -12.212 | 0.00%
| be       | -14.826 | 0.00%
| accepted | -14.186 | 0.00%
| as       | -13.063 | 0.00%
| proof    | -17.822 | 0.00%
| of       | -12.146 | 0.00%
| German   | -8.001 | 0.03%
| language | -8.814 | 0.01%
| skills   | -22.261 | 0.00%
| mentioned | -4.907 | 0.74%
| in       | -12.564 | 0.00%
| the      | -10.877 | 0.00%
| context  | -14.916 | 0.00%
| .        | -14.255 | 0.00%
| The      | -15.251 | 0.00%
| relevant | -6.737 | 0.12%
| information | -16.279 | 0.00%
| is       | -14.326 | 0.00%
| found    | -4.308 | 1.35%
| in       | -13.730 | 0.00%
| the      | -7.843 | 0.04%
| sentence | -14.726 | 0.00%
| :        | -7.74

In [22]:
for tok, log_prob in zip(sequence_tokens, nll):

    # | token | token string | logits | probability

    print(f"| {tok:8s} | {-log_prob:.3f} | {torch.exp(-log_prob):.2%}")

| To       | -0.353 | 70.28%
| answer   | -0.017 | 98.35%
| this     | -2.018 | 13.29%
| question | -0.000 | 99.98%
| ,        | -0.005 | 99.54%
| we       | -0.006 | 99.38%
| need     | -0.019 | 98.15%
| to       | -0.000 | 99.99%
| identify | -0.425 | 65.36%
| the      | -0.018 | 98.19%
| documents | -0.613 | 54.17%
| that     | -0.589 | 55.51%
| can      | -0.493 | 61.06%
| be       | -0.138 | 87.09%
| accepted | -0.467 | 62.70%
| as       | -0.000 | 99.96%
| proof    | -0.103 | 90.17%
| of       | -0.083 | 92.00%
| German   | -0.001 | 99.86%
| language | -0.002 | 99.84%
| skills   | -0.133 | 87.55%
| mentioned | -1.470 | 23.00%
| in       | -0.001 | 99.89%
| the      | -0.002 | 99.82%
| context  | -0.073 | 93.00%
| .        | -0.005 | 99.51%
| The      | -0.118 | 88.86%
| relevant | -1.612 | 19.95%
| information | -0.029 | 97.13%
| is       | -0.044 | 95.67%
| found    | -0.058 | 94.40%
| in       | -0.011 | 98.87%
| the      | -0.031 | 96.96%
| sentence | -1.460 | 23.22%
| :      

In [77]:
def to_tokens_and_logprobs(model, tokenizer, input_texts):
    input_ids = tokenizer(input_texts, padding=True, return_tensors="pt").input_ids
    outputs = model(input_ids)
    probs = torch.log_softmax(outputs.logits, dim=-1).detach()

    print(outputs.logits.shape)
    print(input_ids.shape)
    print(outputs.keys())

    # collect the probability of the generated token -- probability at index 0 corresponds to the token at index 1
    probs = probs[:, :-1, :]
    input_ids = input_ids[:, 1:]
    gen_probs = torch.gather(probs, 2, input_ids[:, :, None]).squeeze(-1)

    print(gen_probs.shape)

    batch = []
    for input_sentence, input_probs in zip(input_ids, gen_probs):
        text_sequence = []
        print(input_sentence.shape)
        print(input_probs.shape)
        for token, p in zip(input_sentence, input_probs):
            if token not in tokenizer.all_special_ids:
                print(token.shape)
                print(p.shape)
                text_sequence.append((tokenizer.decode(token), p.item()))
        batch.append(text_sequence)
    return batch

In [78]:
from pprint import pprint
input_texts = ["One plus one is two", "Good morning", "Hello, how are you?"]

batch = to_tokens_and_logprobs(model, tokenizer, input_texts)
pprint(batch)

torch.Size([3, 7, 32768])
torch.Size([3, 7])
odict_keys(['logits', 'past_key_values'])
torch.Size([3, 6])
torch.Size([6])
torch.Size([6])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([6])
torch.Size([6])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([6])
torch.Size([6])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
torch.Size([])
[[('One', -7.680675506591797),
  ('plus', -15.00305461883545),
  ('one', -1.3036733865737915),
  ('is', -1.628778338432312),
  ('two', -0.6116618514060974)],
 [('Good', -11.739301681518555), ('morning', -5.576571464538574)],
 [('Hello', -9.746404647827148),
  (',', -1.3198721408843994),
  ('how', -5.69814395904541),
  ('are', -15.18750286102295),
  ('you', -0.04158167168498039),
  ('?', -0.338634818792343

In [60]:
sequences.shape

torch.Size([1, 3412])

In [67]:
def to_tokens_and_logprobs(sequence_ids, logits):
    probs = torch.log_softmax(torch.stack(list(logits), dim=0), dim=-1).detach()

    # collect the probability of the generated token -- probability at index 0 corresponds to the token at index 1
    probs = probs[:-1, :, :] # match shift -> no probability for first token
    sequence_ids = sequence_ids.unsqueeze(0)[:, 1:] # shift away first token as probs[0] -> token[1]
    gen_probs = torch.gather(probs, 2, sequence_ids[:,None,:]).squeeze(1)
    print(gen_probs.shape)
    print(sequence_ids.shape)
    batch = []
    for generation, probs in zip(sequence_ids, gen_probs):
        text_sequence = []
        #print(generation.shape)
        #print(probs.shape)
        for token, p in zip(generation, probs):
            if token not in tokenizer.all_special_ids:
                #print(token.shape)
                #print(p.shape)
                text_sequence.append((tokenizer.decode(token.item()), p.item()))
        batch.append(text_sequence)
    return batch

In [68]:
result = to_tokens_and_logprobs(sequences[-1][input_ids.shape[1]:], logits)
result

torch.Size([1, 248])
torch.Size([1, 248])


[[('answer', -8.649835586547852),
  ('this', -11.462335586547852),
  ('question', -15.505304336547852),
  (',', -15.126398086547852),
  ('we', -15.587335586547852),
  ('need', -13.923273086547852),
  ('to', -5.087335586547852),
  ('identify', -13.438898086547852),
  ('the', -11.321710586547852),
  ('documents', -16.55608558654785),
  ('that', -14.024835586547852),
  ('can', -15.407648086547852),
  ('be', -14.173273086547852),
  ('accepted', -12.829523086547852),
  ('as', -13.423273086547852),
  ('proof', -17.47796058654785),
  ('of', -13.173273086547852),
  ('German', -10.852960586547852),
  ('language', -14.243585586547852),
  ('skills', -17.72796058654785),
  ('mentioned', -14.237726211547852),
  ('in', -12.477960586547852),
  ('the', -11.321710586547852),
  ('context', -10.524835586547852),
  ('.', -13.079523086547852),
  ('The', -3.5873355865478516),
  ('relevant', -15.247491836547852),
  ('information', -16.03264808654785),
  ('is', -13.974054336547852),
  ('found', -16.5248355865

In [25]:
print(torch.stack(list(logits), dim=0).shape)
print(input_ids.shape)
print(sequences.shape)
sequences[-1][input_ids.shape[1]:].shape

torch.Size([647, 1, 32768])
torch.Size([1, 3163])
torch.Size([1, 3810])


torch.Size([647])

In [26]:
sequences[:, 1:].shape

torch.Size([1, 3809])

In [27]:
import torch
import torch.nn.functional as F

probs = F.log_softmax(torch.stack(list(logits), dim=0), dim=-1)
# collect the probability of the generated token -- probability at index 0 corresponds to the token at index 1
#probs = probs[:, :-1, :]
sequence_ids = sequences[-1][input_ids.shape[1]:]
gen_probs = torch.gather(probs.squeeze(), 0, sequence_ids.unsqueeze(0)).squeeze(-1)



RuntimeError: index 2559 is out of bounds for dimension 0 with size 647

In [12]:
probs.shape # seq_index, batch_idx, vocab_idx

torch.Size([249, 1, 32768])

In [20]:
probs.squeeze().shape

torch.Size([249, 32768])

In [28]:
sequence_ids.unsqueeze(0)[:, 1:].shape

p = probs[:-1, :, :]
s = sequence_ids.unsqueeze(0)[:, 1:]

gen_probs = torch.gather(p, 2, s[:,None,:]).squeeze(-1)

In [29]:
print(p.shape)
print(s.shape)

torch.Size([646, 1, 32768])
torch.Size([1, 646])


In [30]:
print(gen_probs.shape)
print(s.shape)

print(gen_probs.squeeze().shape)
print(s.squeeze().shape)

torch.Size([1, 1, 646])
torch.Size([1, 646])
torch.Size([646])
torch.Size([646])


In [31]:
text_sequence = []
for token, prob in zip(s.squeeze(), gen_probs.squeeze()):
    #print(token, prob)
    if token not in tokenizer.all_special_ids:
        text_sequence.append((tokenizer.decode(token.item()), prob.item()))
text_sequence

[('answer', -8.477746963500977),
 ('this', -11.227746963500977),
 ('question', -15.243371963500977),
 (',', -15.005090713500977),
 ('we', -15.298059463500977),
 ('need', -13.762903213500977),
 ('to', -5.040246963500977),
 ('identify', -13.298059463500977),
 ('the', -11.196496963500977),
 ('documents', -16.430871963500977),
 ('that', -13.817590713500977),
 ('can', -15.227746963500977),
 ('be', -14.007043838500977),
 ('accepted', -12.610559463500977),
 ('as', -13.204309463500977),
 ('proof', -17.212121963500977),
 ('of', -13.157434463500977),
 ('German', -10.618371963500977),
 ('language', -13.983606338500977),
 ('skills', -17.587121963500977),
 ('mentioned', -14.124231338500977),
 ('in', -12.368371963500977),
 ('the', -11.196496963500977),
 ('context', -10.321496963500977),
 ('.', -12.923059463500977),
 ('The', -3.5402472019195557),
 ('relevant', -15.067590713500977),
 ('information', -15.798059463500977),
 ('is', -13.848840713500977),
 ('found', -16.321496963500977),
 ('in', -12.368371

In [32]:
import math
for tok, log_prob in text_sequence:

    # | token | token string | logits | probability

    print(f"| {tok:8s} | {log_prob:.3f} | {math.exp(log_prob):.2%}")

| answer   | -8.478 | 0.02%
| this     | -11.228 | 0.00%
| question | -15.243 | 0.00%
| ,        | -15.005 | 0.00%
| we       | -15.298 | 0.00%
| need     | -13.763 | 0.00%
| to       | -5.040 | 0.65%
| identify | -13.298 | 0.00%
| the      | -11.196 | 0.00%
| documents | -16.431 | 0.00%
| that     | -13.818 | 0.00%
| can      | -15.228 | 0.00%
| be       | -14.007 | 0.00%
| accepted | -12.611 | 0.00%
| as       | -13.204 | 0.00%
| proof    | -17.212 | 0.00%
| of       | -13.157 | 0.00%
| German   | -10.618 | 0.00%
| language | -13.984 | 0.00%
| skills   | -17.587 | 0.00%
| mentioned | -14.124 | 0.00%
| in       | -12.368 | 0.00%
| the      | -11.196 | 0.00%
| context  | -10.321 | 0.00%
| .        | -12.923 | 0.00%
| The      | -3.540 | 2.90%
| relevant | -15.068 | 0.00%
| information | -15.798 | 0.00%
| is       | -13.849 | 0.00%
| found    | -16.321 | 0.00%
| in       | -12.368 | 0.00%
| the      | -11.196 | 0.00%
| sentence | -11.696 | 0.00%
| :        | -10.353 | 0.00%
| ##       |

In [36]:
s[:,:,None].shape

torch.Size([1, 248, 1])

In [32]:
probs[:-1, :, :].shape

torch.Size([248, 1, 32768])

In [25]:
sequence_ids.unsqueeze(0).shape # batch_idx, seq_idx

torch.Size([1, 249])

In [21]:
log_probs

tensor([[-5.6642e+02, -1.2689e+01, -9.9777e+00,  ..., -1.8759e+01,
         -1.8821e+01, -1.9103e+01],
        [-3.3502e+02, -2.3235e+01, -1.5853e+01,  ..., -2.3329e+01,
         -2.1517e+01, -2.4204e+01],
        [-2.7864e+02, -2.8705e+01, -1.6143e+01,  ..., -2.7768e+01,
         -2.9080e+01, -2.7393e+01],
        ...,
        [-3.0652e+02, -2.5896e+01, -1.3303e+01,  ..., -2.3303e+01,
         -2.4271e+01, -2.4615e+01],
        [-3.0300e+02, -2.8409e+01, -1.0502e+01,  ..., -2.8659e+01,
         -2.7909e+01, -2.7127e+01],
        [-4.9932e+02, -1.9697e+01, -4.4663e-01,  ..., -2.3790e+01,
         -2.3478e+01, -2.2509e+01]])