In [1]:
!bash /home/azureuser/cloudfiles/code/blobfuse/blobfuse_raadsinformatie.sh

In [2]:
import sys
sys.path.append("..")

# Select where to run notebook: "azure" or "local"
my_run = "azure"

# import my_secrets as sc
# import settings as st

if my_run == "azure":
    import config_azure as cf
elif my_run == "local":
    import config as cf


import os
if my_run == "azure":
    if not os.path.exists(cf.HUGGING_CACHE):
        os.mkdir(cf.HUGGING_CACHE)
    os.environ["TRANSFORMERS_CACHE"] = cf.HUGGING_CACHE

# set-up environment - GEITje-7b-chat InContextLearning:
# - install blobfuse -> sudo apt-get install blobfuse
# - pip install transformers
# - pip install torch
# - pip install accelerate
# - pip install jupyter
# - pip install ipywidgets

## Notebook overview
- Goal: Run experiment for InContext Learning GEITje
- Trial run model -> prompt GEITje using, example prompt
- Zeroshot prompts
- Fewshot prompts

Load data and functions:
- data is already split
- text is already converted to tokens using model tokenizer 

In [3]:
import pandas as pd
# df = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

import sys
sys.path.append('../scripts/') 
import prompt_template as pt
import prediction_helperfunctions as ph
import truncation as tf


In [4]:
import torch
torch.cuda.empty_cache()

#### Trial run Models 
Code to run the models with a simple prompt.

In [None]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='Rijgersberg/GEITje-7B-chat-v2',
                   device_map='auto', model_kwargs={'offload_buffers':True})


chatbot_llama = pipeline(task='conversational', model='meta-llama/Llama-2-7b-chat-hf',
                   device_map='auto', model_kwargs={'offload_buffers':True})

chatbot_mistral = pipeline(task='conversational', model='mistralai/Mistral-7B-Instruct-v0.2',
                   device_map='auto', model_kwargs={'offload_buffers':True})

## EXAMPLE PROMPT
# print(chatbot(
    # Conversation('Welk woord hoort er niet in dit rijtje thuis: "auto, vliegtuig, geitje, bus"?')
# ))

#### Experiment functions
Prompt GEITje for each document and save the prediction, return response, response time and the prompt version

Code structure:
- 2 functions/cells:
- predictions_incontextlearning -> given a df with docs that need to be predicted, prompt the model
- run the experiment -> built in failsaves (df run in parts, with saves in between)

In [5]:
import time
import os
import pandas as pd
from bm25 import BM25


""" Given a dataframe with txt, return a df with predictions """
# docs_df = dataframe with the documents that need to be predicted
# text_column = name of the column that includes the input_text. Can be different based on the text representation method. 
# prompt_function = prompt template 
# train_df = dataframe with docs, which can be used as examples/training data/context data
# num_examples = number of examples in the prompt

def predictions_incontextlearning(chatbot, docs_df, text_column, prompt_function, train_df, num_examples):
    results_df = pd.DataFrame(columns = ['id', 'path', 'text_column', 'prompt_function', 'response', 'prediction', 'label', 'runtime', 'date', 'prompt'])


    if prompt_function == pt.fewshot_prompt_bm25:
        BM25_model = BM25()
        BM25_model.fit(train_df[text_column])
    
    # elif prompt_function == fewshot_prompt_bm25:
    #     BM25_model = BM25()
    #     BM25_model.fit(train_df[text_column])

    # prompt each document
    for index, row in docs_df.iterrows():
        if (index + 1) % 200 == 0:
            print(f"Iteration {index +1}/{len(docs_df)} completed.")

        start_time = time.time()

        # get the prompt, with the doc filled in
        txt = row[text_column]

        # each prompt function takes different arguments
        # simple function is zeroshot+simple instruction
        # if prompt_function == pt.simple_prompt:
        #     prompt = prompt_function(txt)

        # elif prompt_function == pt.simple_prompt_v2:
        #     prompt = prompt_function(txt)

        # elif prompt_function == pt.OldSimple_prompt:
        #     prompt = prompt_function(txt)

        if prompt_function == pt.zeroshot_prompt_mistral_llama:
            prompt = prompt_function(txt)

        # elif prompt_function == pt.geitje_simple_prompt:
        #     prompt = prompt_function(txt)
      
        # select fewshot examples using bm25
        elif prompt_function == pt.fewshot_prompt_bm25:
            prompt = prompt_function(txt, train_df, num_examples, text_column, BM25_model)
        
        # elif prompt_function == pt.OldFewshot_prompt_bm25:
        #     prompt = prompt_function(txt, train_df, num_examples, text_column, BM25_model)

        # elif prompt_function == fewshot_prompt_bm25:
        #     prompt = prompt_function(txt, train_df, num_examples, text_column, BM25_model)

        else:
            raise ValueError("Prompt function not recognised. Check if prompt function is in prompt_template.py and included in the options above.")

        # prompt and get the response
        # print(prompt)
        converse = chatbot(Conversation(prompt))
        response = converse[1]['content']
        print("label: ", row['label'].lower())
        print("response: ", response)

        # extract prediction from response
        prediction = ph.get_prediction_from_response(response)
        print("prediction:", prediction)

        # save results in dataframe
        results_df.loc[len(results_df)] = {
            'id': row['id'],
            'path' : row['path'],
            'text_column' : docs_df.iloc[0]['trunc_col'],
            'prompt_function': ph.get_promptfunction_name(prompt_function),
            'response':response,
            'prediction':prediction,
            'label':row['label'].lower(),
            'runtime':time.time()-start_time,
            'date': ph.get_datetime(),
            'prompt':prompt
        }
    return results_df



In [6]:
import os
import time
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

"""
Function to run GEITje In-Context Learning experiment. 
The function allows to resume experiment, if run_id matches.
"""
# df = dataframe with all docs that need to have a prediction (docs still need to be predict + already predicted)
# run_id = unqiue for each experiment. 
# prompt_function = which prompt from prompt_template.py to use
# text_col = colum in df where the text is. (Needs to be already truncated)
# split_col = column with the dataset split. Either '2split' (train and test)or '4split'(train, test, dev and val)
# subset_train = indicates which subset to use as training. either 'train' or 'dev'
# subset_test = indicates which subset to use for testing. either 'test' or 'val'
# label_col = column with the true label
# prediction_path = path to file where predictions need to be saved.
# overview_path = path to file where results of each run need to be saved.
# model_name = name of the model. string.
# num_exmples = number of exaples given to prompt. zero in case of zeroshot. 

def run_experiment(chatbot, df, run_id, prompt_function, text_col, split_col, subset_train, subset_test, label_col, prediction_path, overview_path, model_name, num_examples=0):
    start_time = time.time()
    test_df = df.loc[df[split_col]==subset_test]
    train_df = df.loc[df[split_col]==subset_train]
    
    # get rows of df that still need to be predicted for the specific run_id
    to_predict, previous_predictions = ph.get_rows_to_predict(test_df, prediction_path, run_id)

    # devide to_predict into subsection of 50 predictions at a time. 
    # Allows to rerun without problem. And save subsections of 50 predictions.
    step_range = list(range(0, len(to_predict), 10))

    for i in range(len(step_range)):
        try:
            sub_to_predict = to_predict.iloc[step_range[i]:step_range[i+1]]
            print(f'Starting...{step_range[i]}:{step_range[i+1]} out of {len(to_predict)}')
        except Exception as e:
            sub_to_predict = to_predict[step_range[i]:]
            print(f'Starting...last {len(sub_to_predict)} docs')

        # prompt geitje
        predictions = predictions_incontextlearning(chatbot, sub_to_predict, text_col, prompt_function, train_df, num_examples)

        # save info
        predictions['run_id'] = run_id
        predictions['train_set'] = subset_train
        predictions['test_set'] = subset_test
        predictions['shots'] = num_examples

        # save new combinations in file
        print("Dont interrupt, saving predictions...")
        ph.combine_and_save_df(predictions, prediction_path)

        # if previous predictions, combine previous with new predictions, to get update classification report
        try:
            predictions = pd.concat([predictions, previous_predictions])

            # set previous predictions to all predictions made until now. Necessary for next loop
            previous_predictions = predictions
        except Exception as e:
            # set previous predictions to all predictions made until now. Necessary for next loop
            previous_predictions = predictions

        # save results in overview file
        date = ph.get_datetime()
        y_test = predictions['label']
        y_pred = predictions['prediction']
        report = classification_report(y_test, y_pred)

        overview = pd.DataFrame(
            [{
                'model':model_name,
                'run_id':run_id,
                'date': date,
                'train_set': subset_train,
                'test_set': subset_test,
                'train_set_support':len(df.loc[df[split_col]==subset_train]),
                'test_set_support':len(predictions),
                'split_col':split_col,
                'text_col':df.iloc[0]['trunc_col'],
                'runtime':sum(predictions['runtime']),
                'accuracy': accuracy_score(y_test, y_pred),
                'macro_avg_precision': precision_score(y_test, y_pred, average='macro'),
                'macro_avg_recall': recall_score(y_test, y_pred, average='macro'),
                'macro_avg_f1': f1_score(y_test, y_pred, average='macro'),
                'classification_report':report
            }   ]
        )
        # remove previous results of run_id, replace with new/updated results
        ph.replace_and_save_df(overview, overview_path, run_id)
        print("Saving done! Interrupting is allowed.")



Set up variables that are the same for each model

In [7]:
#set  variables, same for each model
TRAIN_SET = 'dev' # must be dev or train
TEST_SET = 'val' # must be val or test
SPLIT_COLUMN = '4split' #must be either 2split or 4split. 2split = data split into train and test. 4split = data split into train, test, dev and val. 
LABEL_COLUMN = 'label'
TEXT_COLUMN = 'trunc_txt'

In [8]:
txt = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

### GEITje

#### Load model - In-context learning

In [9]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='Rijgersberg/GEITje-7B-chat-v2',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'GEITje-7B-chat-v2'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'



Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

#### Load model - Finetuning

In [None]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='FemkeBakker/GEITjeSmallData200Tokens',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'GEITjeSmallData200Tokens'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'

#### Run experiment

In [None]:

# PREDICTION_PATH = f"{cf.output_path}/predictions/trialfewShotGeitjepredictions.pkl"
# OVERVIEW_PATH = f"{cf.output_path}/overview/trialfewShotGeitjepredictions.pkl"


PROMPT = pt.zeroshot_prompt_mistral_llama
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 200
BACK_THRESHOLD = 0

# if PROMPT == pt.simple_prompt or PROMPT==pt.simple_prompt_v2 or PROMPT==pt.OldSimple_prompt or PROMPT==pt.geitje_simple_prompt or PROMPT==pt.zeroshot_prompt_mistral_llama:
if PROMPT==pt.zeroshot_prompt_mistral_llama:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_bm25 or PROMPT==pt.OldFewshot_prompt_bm25:
    NUMBER_EXAMPLES = 2

    
OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/GEITje/{PROMPT_NAME}/overview.pkl"
PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/GEITje/{PROMPT_NAME}/predictions.pkl"

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print (run_id)

# small = txt.iloc[16:22]
# small['4split']=['val', 'dev', 'dev', 'dev', 'dev', 'dev']


IC_GEITje-7B-chat-v2zeroshot_prompt_mistral_llamaLlamaTokens200_0devval_numEx0


In [None]:
# ----- EXPERIMENT --------

# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)


# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_geitje, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


Starting...0:10 out of 209
label:  actualiteit
response:  {
    "categorie": "Raadsagenda supplement 3, woensdag 22 en donderdag 23 januari 2020"
}
prediction: agenda
label:  agenda
response:  {
    "categorie": "Agenda"
}
prediction: agenda
label:  motie
response:  De categorie van het document is 'Motie'.
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  raadsnotulen
response:  {
  "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  schriftelijke vraag
response:  {
    "categorie": "Agenda"
}
prediction: agenda
label:  schriftelijke vraag
response:  {
    "categorie": "Memo"
}
prediction: NoPredictionInOutput
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  agenda
response:  {
    "categorie": "Agenda"
}
prediction: agenda
label:  agenda
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
Dont interrupt, saving predictions

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


label:  motie
response:  {
    "categorie": "Motie"
}
prediction: motie
label:  besluit
response:  {
    "categorie": "Besluit"
}
prediction: besluit
label:  onderzoeksrapport
response:  {
    "categorie": "Factsheet"
}
prediction: factsheet
label:  factsheet
response:  {
    "categorie": "Factsheet"
}
prediction: factsheet
label:  raadsadres
response:  {
    "categorie": "Raadsadres"
}
prediction: raadsadres
label:  brief
response:  {
    "categorie": "Besluit"
}
prediction: besluit
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  onderzoeksrapport
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  actualiteit
response:  {
    "categorie": "Raadsagenda supplement 3, woensdag 18 en donderdag 19 juli 2018"
}
prediction: agenda
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
Dont interrupt, saving predictions...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...20:30 out of 209
label:  brief
response:  {
    "categorie": "Raadsinformatiebrief"
}
prediction: brief
label:  raadsadres
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  raadsadres
response:  {
    "categorie": "Raadsadres"
}
prediction: raadsadres
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  agenda
response:  {
    "categorie": "Agenda"
}
prediction: agenda
label:  actualiteit
response:  {
    "categorie": "Brief"
}
prediction: brief


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...30:40 out of 209
label:  motie
response:  Het document is een motie.
prediction: NoPredictionFormat
label:  raadsadres
response:  {
    "categorie": "Brief"
}
prediction: brief
label:  brief
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  actualiteit
response:  {
    "categorie": "Actualiteit"
}
prediction: actualiteit
label:  onderzoeksrapport
response:  {
    "categorie": "Besluit"
}
prediction: besluit
label:  agenda
response:  {
    "categorie": "Agenda"
}
prediction: agenda
label:  onderzoeksrapport
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  actualiteit
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
Dont interrupt, saving predict

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...40:50 out of 209
label:  raadsadres
response:  {
    "categorie": "Brief"
}
prediction: brief
label:  raadsadres
response:  {
    "categorie": "Raadsadres"
}
prediction: raadsadres
label:  agenda
response:  {
    "categorie": "Agenda"
}
prediction: agenda
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  brief
response:  {
    "categorie": "Raadsinformatiebrief"
}
prediction: brief
label:  agenda
response:  Het document is een 'Agenda' en het is gecategoriseerd als 'Agenda'.
prediction: NoPredictionFormat
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  motie
response:  {
    "categorie": "Motie"
}
prediction: motie
label:  motie
response:  De categorie van het document is 'Motie'.
prediction: NoPredictionFormat
Dont interrupt, saving p

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...50:60 out of 209
label:  raadsadres
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  raadsnotulen
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  motie
response:  {
    "categorie": "Motie"
}
prediction: motie
label:  agenda
response:  {
    "categorie": "Besluit"
}
prediction: besluit
label:  raadsadres
response:  {
    "categorie": "Raadsadres"
}
prediction: raadsadres
label:  actualiteit
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  onderzoeksrapport
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  actualiteit
response:  De output moet een JSON-object zijn met de volgende sleutels:

- 'categorie': 'Interpellatie'
prediction: NoPredictionFormat
label:  raadsnotulen
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  motie
response:  {
    "categorie": "Motie"
}
prediction: motie
Don

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...60:70 out of 209
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  onderzoeksrapport
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  onderzoeksrapport
response:  {
  "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  agenda
response:  Het document is een besluitvormend AB-agenda.
prediction: NoPredictionFormat
label:  besluit
response:  {
    "categorie": "Besluit"
}
prediction: besluit
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  onderzoeksrapport
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  agenda
response:  {
    "categorie": "Agenda"
}
prediction: agenda
Dont interr

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...70:80 out of 209
label:  onderzoeksrapport
response:  {
    "categorie": "Plan van Scholen"
}
prediction: NoPredictionInOutput
label:  brief
response:  {
    "categorie": "Raadsinformatiebrief"
}
prediction: brief
label:  brief
response:  {
    "categorie": "Raadsinformatiebrief"
}
prediction: brief
label:  onderzoeksrapport
response:  {
  "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  motie
response:  {
    "categorie": "Motie"
}
prediction: motie
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  agenda
response:  {
    "categorie": "Agenda"
}
prediction: agenda
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  raadsadres
response:  {
    "categorie": "Raadsadres"
}
prediction: raadsadres
Dont interrupt, saving predictio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...80:90 out of 209
label:  brief
response:  De output moet in JSON-formaat zijn en de categorie van het document bevatten.
prediction: NoPredictionFormat
label:  brief
response:  {
    "categorie": "Besluit"
}
prediction: besluit
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  besluit
response:  {
    "categorie": "Agenda"
}
prediction: agenda
label:  motie
response:  De categorie van het document is 'Motie'.
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  onderzoeksrapport
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  onderzoeksrapport
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  raadsadres
response:  {
    "categorie": "Brief"
}
prediction: brief
label:  actualiteit
response:  {
    "categorie"

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...90:100 out of 209
label:  agenda
response:  Het document is een Agenda voor de Raadscommissie voor Zorg en Welzijn, Ouderen en Sport en Recreatie van de Gemeente Amsterdam. De categorie van het document is 'Agenda'.
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  factsheet
response:  {
    "categorie": "Factsheet"
}
prediction: factsheet
label:  brief
response:  {
    "categorie": "Brief"
}
prediction: brief
label:  raadsadres
response:  {
    "categorie": "Raadsadres"
}
prediction: raadsadres
label:  besluit
response:  {
    "categorie": "Besluit"
}
prediction: besluit
label:  onderzoeksrapport
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  actualiteit
response:  {
    "categorie": "Actualiteit"
}
prediction: actualiteit
label:  raadsadres
response:  Het document is een 'Brief'.
prediction: NoPredictionFormat
label: 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...100:110 out of 209
label:  motie
response:  {
    "categorie": "Amendement"
}
prediction: NoPredictionInOutput
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  brief
response:  {
    "categorie": "Brief"
}
prediction: brief
label:  actualiteit
response:  {
    "categorie": "Actualiteit"
}
prediction: actualiteit
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  motie
response:  Motie
prediction: NoPredictionFormat
label:  voordracht
response:  Het document is een voordracht voor de Commissie AZ van 13 januari 2022.
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  brief
response:  {
    "categorie": "Brief"
}
prediction: brief
label:  onderzoeksrapport
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotul

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...110:120 out of 209
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  motie
response:  {
    "categorie": "Motie"
}
prediction: motie
label:  raadsadres
response:  {
    "categorie": "Brief"
}
prediction: brief
label:  motie
response:  De categorie van het document is 'Motie'.
prediction: NoPredictionFormat
label:  raadsadres
response:  {
    "categorie": "Brief"
}
prediction: brief
label:  agenda
response:  {
    "categorie": "Agenda"
}
prediction: agenda
label:  brief
response:  {
    "categorie": "Actualiteit"
}
prediction: actualiteit
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  raadsadres
response:  Raadsadres
prediction: NoPredictionFormat
label:  raadsadres
response:  {
    "categorie": "Raadsadres"
}
prediction: raadsadres
Dont interrupt, saving predictions...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...120:130 out of 209
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  voordracht
response:  {
    "categorie": "Voordracht"
}
prediction: voordracht
label:  raadsadres
response:  {
    "categorie": "Raadsadres"
}
prediction: raadsadres
label:  agenda
response:  {
    "categorie": "Agenda"
}
prediction: agenda
label:  motie
response:  {
    "categorie": "Brief"
}
prediction: brief
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  motie
response:  {
    "categorie": "Raadsinformatiebrief"
}
prediction: brief
label:  schriftelijke vraag
response:  {
    "categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
Dont int

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...130:140 out of 209
label:  motie
response:  De categorie van het document is 'Motie'.
prediction: NoPredictionFormat
label:  agenda
response:  {
    "categorie": "Raadsnotulen"
}
prediction: raadsnotulen


KeyboardInterrupt: 

In [None]:
pred = pd.read_pickle(OVERVIEW_PATH)
# pred_run = pred.loc[pred['run_id']==f'{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}']
display(pred)

In [None]:
print(OVERVIEW_PATH)

### Llama


#### Load model - In Context

In [None]:
from transformers import pipeline, Conversation

chatbot_llama = pipeline(task='conversational', model='meta-llama/Llama-2-7b-chat-hf',
                   device_map='cpu', model_kwargs={'offload_buffers':True})
# load llama using cpu, else will give cuda out of memory error when running fewshot bm25 prompt.

MODEL_NAME = 'Llama-2-7b-chat-hf'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'

#### Load model - finetuning

In [None]:
from transformers import pipeline, Conversation

# chatbot_llama = pipeline(task='conversational', model='FemkeBakker/LlamaSmallData200Tokens',
#                    device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'LlamaSmallData200Tokens'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'

#### Run experiment

In [None]:


PROMPT = pt.fewshot_prompt_bm25
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 200
BACK_THRESHOLD = 0

if PROMPT == pt.simple_prompt or PROMPT==pt.simple_prompt_v2 or PROMPT==pt.OldSimple_prompt:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_bm25:
    NUMBER_EXAMPLES = 2
    
OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/Llama/{PROMPT_NAME}/overview.pkl"
PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/Llama/{PROMPT_NAME}/predictions.pkl"

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print(run_id)
# small = txt.iloc[16:22]
# small['4split']=['val', 'dev', 'dev', 'dev', 'dev', 'dev']


In [None]:
# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)

# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_llama, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


In [None]:
pred = pd.read_pickle(OVERVIEW_PATH)
display(pred)
print(pred.iloc[0]['run_id'])
print(OVERVIEW_PATH)

### Mistral

#### Load model - in context

In [None]:
from transformers import pipeline, Conversation

chatbot_mistral = pipeline(task='conversational', model='mistralai/Mistral-7B-Instruct-v0.2',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'Mistral-7B-Instruct-v0.2'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'


#### Load model - finetuning

In [None]:
from transformers import pipeline, Conversation

chatbot_mistral = pipeline(task='conversational', model='FemkeBakker/MistralSmallData200Tokens',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'MistralSmallData200Tokens'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'

#### Run experiment

In [None]:

PROMPT = pt.simple_prompt_v2
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 200
BACK_THRESHOLD = 0

if PROMPT == pt.simple_prompt or PROMPT==pt.simple_prompt_v2:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_bm25:
    NUMBER_EXAMPLES = 2
    
OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/Mistral/{PROMPT_NAME}/overview.pkl"
PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/Mistral/{PROMPT_NAME}/predictions.pkl"

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print(run_id)
# small = txt.iloc[16:22]
# small['4split']=['val', 'dev', 'dev', 'dev', 'dev', 'dev']


In [None]:
# run experiment

# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)

# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_mistral, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


In [None]:
pred = pd.read_pickle(OVERVIEW_PATH)
display(pred)


## Gibberish

### Fewshot Experiment

In [None]:
txt = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

#set  variables, same for each model
TRAIN_SET = 'dev' # must be dev or train
TEST_SET = 'val' # must be val or test
SPLIT_COLUMN = '4split' #must be either 2split or 4split. 2split = data split into train and test. 4split = data split into train, test, dev and val. 
LABEL_COLUMN = 'label'
PREDICTION_PATH = f"{cf.output_path}/predictions/fewShotGeitjepredictions.pkl"
OVERVIEW_PATH = f"{cf.output_path}/overview/fewShotGeitjepredictions.pkl"
# PREDICTION_PATH = f"{cf.output_path}/predictions/trialfewShotGeitjepredictions.pkl"
# OVERVIEW_PATH = f"{cf.output_path}/overview/trialfewShotGeitjepredictions.pkl"
MODEL_NAME = 'GEITje-7B-chat-v2'
TEXT_COLUMN = 'trunc_txt'

In [None]:
# txt = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

#set  variables, same for each model
TRAIN_SET = 'dev' # must be dev or train
TEST_SET = 'val' # must be val or test
SPLIT_COLUMN = '4split' #must be either 2split or 4split. 2split = data split into train and test. 4split = data split into train, test, dev and val. 
LABEL_COLUMN = 'label'
PREDICTION_PATH = f"{cf.output_path}/predictions/fewShotLlamapredictions.pkl"
OVERVIEW_PATH = f"{cf.output_path}/overview/fewShotLlamapredictions.pkl"
# PREDICTION_PATH = f"{cf.output_path}/predictions/trialfewShotLlamapredictions.pkl"
# OVERVIEW_PATH = f"{cf.output_path}/overview/trialfewShotLlamapredictions.pkl"
MODEL_NAME = 'Llama-2-7b-chat-hf'
TEXT_COLUMN = 'trunc_txt'

In [None]:
# ----- EXPERIMENT: ?? --------

# run experiment
PROMPT = pt.fewshot_prompt_bm25
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens'
FRONT_THRESHOLD = 200
BACK_THRESHOLD = 0
NUMBER_EXAMPLES = 2
# small = txt.iloc[0:5]
# small['4split']='val'

# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)

# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(trunc_df, f'{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}', PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


In [None]:
pred = pd.read_pickle(OVERVIEW_PATH)
# pred_run = pred.loc[pred['run_id']==f'{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}']
# print(sum(pred_run['runtime']))
# pred['runtime'] = sum(pred_run['runtime'])
display(pred.head())

In [None]:
pred = pd.read_pickle(PREDICTION_PATH)
pred_run = pred.loc[pred['run_id']==f'{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}']
print(sum(pred_run['runtime']))

## End notebook

In [None]:
# def get_class_list():
#     return ['Voordracht', 'Besluit', 'Schriftelijke Vragen', 'Brief', 'Raadsadres', 'Onderzoeksrapport', 'Termijnagenda', 'Raadsnotulen', 'Agenda', 'Motie', 'Actualiteit', 'Factsheets']

# def fewshot_prompt_examples(doc, train_df, num_examples, text_column):
#     examples = train_df.sample(n=num_examples)

#     prompt = f"""
#     Het is jouw taak om een document te categoriseren in één van de categoriën.
#     Eerst krijg je een lijst met mogelijke categoriën, daarna {num_examples} voorbeelden van documenten en tot slot het document dat gecategoriseerd moet worden. 
    
#     Categoriën: {get_class_list()}
#     """

#     for index, row in examples.iterrows():
#         mini_prompt = f"""
#     Dit is een voorbeeld document de categorie {row['label']}:
#         {row[text_column]}
#         """

#         prompt += mini_prompt

#     doc_prompt = f"""
#     Categoriseer dit document:
#         {doc}
#     """

#     prompt += doc_prompt
#     return prompt

In [None]:
# def simple_prompt(doc,train_df, num_examples, text_column):
#     prompt = f"""
#     Classificeer het document in één van de categoriën.
#     Houd het kort, geef enkel de naam van de categorie als response.
    
#     Categoriën: {get_class_list()}
    
#     Document: 
#     {doc}
    
#     """
#     return prompt

In [None]:
# import time
# import os
# import pandas as pd


# """ Given a dataframe with txt, return a df with predictions """
# # docs_df = dataframe with the documents that need to be predicted
# # text_column = name of the column that includes the input_text. Can be different based on the text representation method. 
# # prompt_function = prompt template 

# def predictions_incontextlearning(docs_df, text_column, prompt_function, train_df, num_examples):
#     results_df = pd.DataFrame(columns = ['id', 'path', 'text_column', 'prompt_function', 'response', 'prediction', 'label', 'runtime', 'date', 'prompt'])
    
#     # prompt each document
#     for index, row in docs_df.iterrows():
#         if (index + 1) % 200 == 0:
#             print(f"Iteration {index +1}/{len(docs_df)} completed.")

#         start_time = time.time()

#         # get the prompt, with the doc filled in
#         txt = row[text_column]

#         # always give these as input, however not every template uses all of them
#         prompt = prompt_function(txt, train_df, num_examples, text_column)

#         # prompt and get the response
#         converse = chatbot(Conversation(prompt))
#         response = converse[1]['content']

#         # extract prediction from response
#         prediction = ph.get_prediction_from_response(response)

#         # save results in dataframe
#         results_df.loc[len(results_df)] = {
#             'id': row['id'],
#             'path' : row['path'],
#             'text_column' : text_column,
#             'prompt_function': ph.get_promptfunction_name(prompt_function),
#             'response':response,
#             'prediction':prediction,
#             'label':row['label'].lower(),
#             'runtime':time.time()-start_time,
#             'date': ph.get_datetime(),
#             'prompt':prompt
#         }
#     return results_df



In [None]:
# import os
# import time
# from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# """
# Function to run GEITje In-Context Learning experiment. 
# The function allows to resume experiment, if run_id matches.
# """
# # df = dataframe with all docs that need to have a prediction (docs still need to be predict + already predicted)
# # run_id = unqiue for each experiment. 
# # prompt_function = which prompt from prompt_template.py to use
# # text_col = colum in df where the text is. (Needs to be already truncated)
# # split_col = column with the dataset split. Either '2split' (train and test)or '4split'(train, test, dev and val)
# # subset_train = indicates which subset to use as training. either 'train' or 'dev'
# # subset_test = indicates which subset to use for testing. either 'test' or 'val'
# # label_col = column with the true label
# # prediction_path = path to file where predictions need to be saved.
# # overview_path = path to file where results of each run need to be saved.
# # model_name = name of the model. string.
# # num_exmples = number of exaples given to prompt. zero in case of zeroshot. 

# def run_experiment(df, run_id, prompt_function, text_col, split_col, subset_train, subset_test, label_col, prediction_path, overview_path, model_name, num_examples=0):
#     print(num_examples)
#     start_time = time.time()
#     test_df = df.loc[df[split_col]==subset_test]
#     train_df = df.loc[df[split_col]==subset_train]
    
#     # get rows of df that still need to be predicted for the specific run_id
#     to_predict, previous_predictions = ph.get_rows_to_predict(test_df, prediction_path, run_id)

#     # devide to_predict into subsection of 50 predictions at a time. 
#     # Allows to rerun without problem. 
#     step_range = list(range(0, len(to_predict), 3))

#     for i in range(len(step_range)):
#         try:
#             sub_to_predict = to_predict.iloc[step_range[i]:step_range[i+1]]
#             print(f'Starting...{step_range[i]}:{step_range[i+1]} out of {len(to_predict)}')
#         except Exception as e:
#             sub_to_predict = to_predict[step_range[i]:]
#             print(f'Starting...last {len(sub_to_predict)} docs')

#         # prompt geitje
#         predictions = predictions_incontextlearning(sub_to_predict, text_col, prompt_function, train_df, num_examples)

#         # save info
#         predictions['run_id'] = run_id
#         predictions['train_set'] = subset_train
#         predictions['test_set'] = subset_test
#         predictions['shots'] = num_examples

#         # save new combinations in file
#         ph.combine_and_save_df(predictions, prediction_path)

#         # if previous predictions, combine previous with new predictions, to get update classification report
#         try:
#             predictions = pd.concat([predictions, previous_predictions])

#             # set previous predictions to all predictions made until now. Necessary for next loop
#             previous_predictions = predictions
#         except Exception as e:
#             # set previous predictions to all predictions made until now. Necessary for next loop
#             previous_predictions = predictions

#         # save results in overview file
#         date = ph.get_datetime()
#         y_test = predictions['label']
#         y_pred = predictions['prediction']
#         report = classification_report(y_test, y_pred)

#         overview = pd.DataFrame(
#             [{
#                 'model':model_name,
#                 'run_id':run_id,
#                 'date': date,
#                 'train_set': subset_train,
#                 'test_set': subset_test,
#                 'train_set_support':len(df.loc[df[split_col]==subset_train]),
#                 'test_set_support':len(predictions),
#                 'split_col':split_col,
#                 'text_col':text_col,
#                 'runtime':time.time()-start_time,
#                 'accuracy': accuracy_score(y_test, y_pred),
#                 'macro_avg_precision': precision_score(y_test, y_pred, average='macro'),
#                 'macro_avg_recall': recall_score(y_test, y_pred, average='macro'),
#                 'macro_avg_f1': f1_score(y_test, y_pred, average='macro'),
#                 'classification_report':report
#             }   ]
#         )
#         # remove previous results of run_id, replace with new/updated results
#         ph.replace_and_save_df(overview, overview_path, run_id)

txt = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

#set  variables, same for each model
TRAIN_SET = 'dev' # must be dev or train
TEST_SET = 'val' # must be val or test
SPLIT_COLUMN = '4split' #must be either 2split or 4split. 2split = data split into train and test. 4split = data split into train, test, dev and val. 
LABEL_COLUMN = 'label'
PREDICTION_PATH = f"{cf.output_path}/predictions/tryoutGeitjepredictions.pkl"
OVERVIEW_PATH = f"{cf.output_path}/overview/tryoutGeitjepredictions.pkl"
MODEL_NAME = 'GEITje-7B-chat-v2'
TEXT_COLUMN = 'trunc_txt'

p_path = f"{cf.output_path}/predictions/tryoutGeitjepredictions.pkl"
o_path = f"{cf.output_path}/overview/tryoutGeitjeoverview.pkl"
run_experiment(small, 'tryout_zeroshot', pt.simple_prompt, 'trunc_txt', '4split', 'dev', 'val', 'label', p_path, o_path, 'GEITje-7B-chat-v2', 0)

In [None]:
yeet = pd.read_pickle(p_path)
yeet  = yeet.loc[yeet['run_id']=='tryout_zeroshot']
display(yeet)

In [None]:
print(yeet.iloc[0]['prompt'])

In [None]:
txt = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

small = txt.iloc[0:5]
small['4split']=['val', 'dev', 'val', 'dev', 'dev']

# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(small,'text', 'LlamaTokens', 200, 200)


### GIbberish

In [None]:
# import time
# import os
# import pandas as pd


# """ Given a dataframe with txt, return a df with predictions """
# # docs_df = dataframe with the documents that need to be predicted
# # text_column = name of the column that includes the input_text. Can be different based on the text representation method. 
# # prompt_function = prompt template -> ONLY prompt templates that take doc as input (ZERO SHOT)

# def zero_shot_predictions_incontextlearning(docs_df, text_column, prompt_function):
#     results_df = pd.DataFrame(columns = ['id', 'path', 'text_column', 'prompt_function', 'response', 'prediction', 'label', 'runtime', 'date'])
    
#     # prompt each document
#     for index, row in docs_df.iterrows():
#         if (index + 1) % 200 == 0:
#             print(f"Iteration {index +1}/{len(docs_df)} completed.")

#         start_time = time.time()

#         # get the prompt, with the doc filled in
#         txt = row[text_column]
#         prompt = prompt_function(txt)

#         # prompt and get the response
#         converse = chatbot(Conversation(prompt))
#         response = converse[1]['content']

#         # extract prediction from response
#         prediction = ph.get_prediction_from_response(response)

#         # save results in dataframe
#         results_df.loc[len(results_df)] = {
#             'id': row['id'],
#             'path' : row['path'],
#             'text_column' : text_column,
#             'prompt_function': ph.get_promptfunction_name(prompt_function),
#             'response':response,
#             'prediction':prediction,
#             'label':row['label'].lower(),
#             'runtime':time.time()-start_time,
#             'date': ph.get_datetime()
#         }
#     return results_df


#  import os
# import time
# from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

# """
# Function to run GEITje ZEROSHOT experiment. 
# The function allows to resume experiment, if run_id matches.
# """
# # df = dataframe with all docs that need to have a prediction (docs still need to be predict + already predicted)
# # run_id = unqiue for each experiment. 
# # prompt_function = which prompt from prompt_template.py to use
# # text_col = colum in df where the text is. (Needs to be already truncated)
# # split_col = column with the dataset split. Either '2split' (train and test)or '4split'(train, test, dev and val)
# # subset_train = indicates which subset to use as training. either 'train' or 'dev'
# # subset_test = indicates which subset to use for testing. either 'test' or 'val'
# # label_col = column with the true label
# # prediction_path = path to file where predictions need to be saved.
# # overview_path = path to file where results of each run need to be saved.
# # model_name = name of the model. string.
# # num_exmples = number of exaples given to prompt. zero inn case of zeroshot. 

# def run_experiment(df, run_id, prompt_function, text_col, split_col, subset_train, subset_test, label_col, prediction_path, overview_path, model_name, num_examples=0):
#     start_time = time.time()
#     test_df = df.loc[df[split_col]==subset_test]
    
#     # get rows of df that still need to be predicted for the specific run_id
#     to_predict, previous_predictions = ph.get_rows_to_predict(test_df, prediction_path, run_id)

#     # devide to_predict into subsection of 50 predictions at a time. 
#     # Allows to rerun without problem. 
#     step_range = list(range(0, len(to_predict), 3))

#     for i in range(len(step_range)):
#         try:
#             sub_to_predict = to_predict.iloc[step_range[i]:step_range[i+1]]
#             print(f'Starting...{step_range[i]}:{step_range[i+1]} out of {len(to_predict)}')
#         except Exception as e:
#             sub_to_predict = to_predict[step_range[i]:]
#             print(f'Starting...last {len(sub_to_predict)} docs')

#         # prompt geitje
#         predictions = zero_shot_predictions_incontextlearning(sub_to_predict, text_col, prompt_function)

#         # save info
#         predictions['run_id'] = run_id
#         predictions['train_set'] = subset_train
#         predictions['test_set'] = subset_test
#         predictions['shots'] = num_examples

#         # save new combinations in file
#         ph.combine_and_save_df(predictions, prediction_path)

#         # if previous predictions, combine previous with new predictions, to get update classification report
#         try:
#             predictions = pd.concat([predictions, previous_predictions])

#             # set previous predictions to all predictions made until now. Necessary for next loop
#             previous_predictions = predictions
#         except Exception as e:
#             # set previous predictions to all predictions made until now. Necessary for next loop
#             previous_predictions = predictions

#         # save results in overview file
#         date = ph.get_datetime()
#         y_test = predictions['label']
#         y_pred = predictions['prediction']
#         report = classification_report(y_test, y_pred)

#         overview = pd.DataFrame(
#             [{
#                 'model':model_name,
#                 'run_id':run_id,
#                 'date': date,
#                 'train_set': subset_train,
#                 'test_set': subset_test,
#                 'train_set_support':len(df.loc[df[split_col]==subset_train]),
#                 'test_set_support':len(predictions),
#                 'split_col':split_col,
#                 'text_col':text_col,
#                 'runtime':time.time()-start_time,
#                 'accuracy': accuracy_score(y_test, y_pred),
#                 'macro_avg_precision': precision_score(y_test, y_pred, average='macro'),
#                 'macro_avg_recall': recall_score(y_test, y_pred, average='macro'),
#                 'macro_avg_f1': f1_score(y_test, y_pred, average='macro'),
#                 'classification_report':report
#             }   ]
#         )
#         # remove previous results of run_id, replace with new/updated results
#         ph.replace_and_save_df(overview, overview_path, run_id)
 
# # p_path = f"{cf.output_path}/predictions/tryoutGeitjepredictions.pkl"
# # o_path = f"{cf.output_path}/overview/tryoutGeitjeoverview.pkl"
# # run_experiment(txt.iloc[25:30], 'tryout', pt.simple_prompt, trunc_col, '4split', 'dev', 'val', 'label', p_path, o_path, 'GEITje-7B-chat-v2')

In [None]:
import pandas as pd
df = pd.read_pickle(f"{cf.output_path}/txtfiles.pkl")
df = df.loc[df['set']=='val']
df['text_trunc_100'] = df['tokens'].apply(text_truncation,100)
df['text_trunc_1000'] = df['tokens'].apply(text_truncation,1000)

In [None]:
path = f"{cf.output_path}/predictions/ICgeitje_predictions.pkl"
resume_predictions(df, path, 2)


# dummy code

In [None]:
def run_in_subsections(df, path, set_run_id):

    iterations = list(range(0, len(df)+50, 50))
    for i in range(len(iterations)):
        try:
            subdf = df.iloc[iterations[i]:iterations[i+1]]

        except IndexError:
            subdf = df.iloc[iterations[i]:]

        # if set_run_id == 'new' and iterations[i]==0:
        #     run_prediction(subdf, 'text_trunc_100', pt.simple_prompt, 'new', path, 'val')
        # else:
        #     run_prediction(subdf, 'text_trunc_100', pt.simple_prompt, 'previous', path, 'val')



path = f"{cf.output_path}/predictions/ICgeitje_predictions_tryout.pkl"
run_in_subsections(df, path)

In [None]:
def run_in_subsections(df, path):
    subdf = df.iloc[0:50]
    run_prediction(subdf, 'text_trunc_100', pt.simple_prompt, 'new', path, 'val')

    iterations = list(range(50, len(df)+50, 50))
    for i in range(len(iterations)):
        if i < len(iterations)-2:
            subdf = df.iloc[iterations[i]:iterations[i+1]]
            print("\n", "iterations", iterations[i], iterations[i+1], "\n")
            run_prediction(subdf, 'text_trunc_100', pt.simple_prompt, 'previous', path, 'val')

        elif i < len(iterations)-1:
            subdf = df.iloc[iterations[i]:]
            print("\n", "iterations", iterations[i], '\n')
            run_prediction(subdf, 'text_trunc_100', pt.simple_prompt, 'previous', path, 'val')

path = f"{cf.output_path}/predictions/ICgeitje_predictions_tryout.pkl"
run_in_subsections(df, path)

In [None]:
yeet = pd.read_pickle(f"{cf.output_path}/predictions/ICgeitje_predictions.pkl")
display(yeet)

yeet = pd.read_pickle(f"{cf.output_path}/overview_results.pkl")
display(yeet)

In [None]:
import time
import sys
sys.path.append('../scripts/') 
import prompt_template as pt
import prediction_helperfunctions as ph


""" Given a dataframe with txt, return a df with predictions """
# docs_df = dataframe with the documents that need to be predicted
# text_column = name of the column that includes the input_text. Can be different based on the text representation method. 
# prompt_function = prompt template -> ONLY prompt templates that take doc as input (ZERO SHOT)

def zero_shot_predictions_incontextlearning(docs_df, text_column, prompt_function):
    results_df = pd.DataFrame(columns = ['id', 'path', 'text_column', 'prompt_function', 'response', 'prediction', 'label', 'runtime', 'date'])
    
    # prompt each document
    for index, row in docs_df.iterrows():
        if (index + 1) % 200 == 0:
            print(f"Iteration {index +1}/{len(docs_df)} completed.")

        start_time = time.time()

        # get the prompt, with the doc filled in
        txt = row[text_column]
        prompt = prompt_function(txt)

        # prompt and get the response
        converse = chatbot(Conversation(prompt))
        response = converse[1]['content']

        # extract prediction from response
        prediction = ph.get_prediction_from_response(response)

        # save results in dataframe
        results_df.loc[len(results_df)] = {
            'id': row['id'],
            'path' : row['path'],
            'text_column' : text_column,
            'prompt_function': ph.get_promptfunction_name(prompt_function),
            'response':response,
            'prediction':prediction,
            'label':row['label'].lower(),
            'runtime':time.time()-start_time,
            'date': ph.get_datetime()
        }
    return results_df

# """ Run a prediction function -> can be ZeroShot or FewShot """
# def run_prediction(docs_df, text_column, prompt_function, subset=None, learning='ZeroShot'):
#     if learning == 'ZeroShot':
#         # get the predictions
#         res = zero_shot_predictions_incontextlearning(docs_df, text_column, prompt_function)

#         # INSERT ELSE STATEMENT HERE FOR FEWSHOT

#         # get run_id
#         path = f"{cf.output_path}/predictions/ICgeitje_predictions.pkl"
#         res['run_id'], predictions_df = ph.get_runid(path)

#         # combine earlier predictions with new ones
#         all_predictions = pd.concat([predictions_df, res])

#         # save predictions
#         all_predictions.to_pickle(path)

#         # save the evaluation metrics for each run
#         ph.update_overview_results(res, 'Rijgersberg/GEITje-7B-chat-v2')
#         return res
# gestart om 10.15/
# res = run_prediction(df, 'text_trunc_100', pt.simple_prompt, 'val')
# display(res)


In [None]:
yeet = pd.read_pickle(f"{cf.output_path}/overview_results.pkl")
display(yeet)

yeet = pd.read_pickle(f"{cf.output_path}/predictions/ICgeitje_predictions.pkl")
display(yeet)


#### Tryout GEITje
Load chatbot

In [None]:
from transformers import pipeline, Conversation

chatbot = pipeline(task='conversational', model='Rijgersberg/GEITje-7B-chat-v2',
                   device_map='auto')

## simple query
print(chatbot(
    Conversation("Hallo, ik ben Bram. Ik wil vanavond graag een film kijken. Heb je enkele suggesties?")
))

In [None]:
from transformers import pipeline, Conversation

# load_in_8bit: lower precision but saves a lot of GPU memory
# device_map=auto: loads the model across multiple GPUs
# chatbot = pipeline("conversational", model="BramVanroy/GEITje-7B-ultra",  model_kwargs={"load_in_8bit": True}, device_map="auto")
chatbot = pipeline("conversational", model="BramVanroy/GEITje-7B-ultra",  device_map="auto")

# start_messages = [
#     # {"role": "system", "content": "Je bent een grappige chatbot die Bert heet. Je maakt vaak mopjes."},
#     {"role": "user", "content": "Hallo, ik ben Bram. Ik wil vanavond graag een film kijken. Heb je enkele suggesties?"}
# ]
# conversation = Conversation(start_messages)
# conversation = chatbot(conversation)
# response = conversation.messages[-1]["content"]
# print(response)


In [None]:
txt = df.iloc[0]['text']
prompt = f"""

Classificeer de gegeven tekst in 1 van de categoriën.
Geef als reactie enkel de naam van de categorie
Categorieën: ['Voordracht', 'Besluit', 'Schriftelijke Vragen', 'Brief', 'Raadsadres', 'Onderzoeksrapport', 'Termijnagenda', 'Raadsnotulen', 'Agenda', 'Motie', 'Actualiteit', 'Factsheets']
Tekst: 

{txt}

""" 

start_messages = [
    {"role": "system", "content": "Jouw enige taak is om teksten te classificeren. Je geeft geen uitleg voor je keuzes."},
    {"role": "user", "content": prompt}
]

In [None]:
chatbot(Conversation(start_messages))

In [None]:
%pip install accelerate

In [None]:
df = pd.read_pickle(f"{cf.output_path}/txtfiles.pkl")


In [None]:
display(df)

In [None]:
text = df.loc[df['clean_tokens_count'].idxmax()]['text']
print(df.loc[df['clean_tokens_count'].idxmax()]['clean_tokens_count'])

print(pt.simple_prompt(text))

In [None]:
print(chatbot(
    Conversation(pt.simple_prompt(text))
    ))

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


device = 'cuda' if torch.cuda.is_available() else 'cpu'

model_name = 'Rijgersberg/GEITje-7B-chat-v2'
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16,
                                             low_cpu_mem_usage=True, attn_implementation='eager',
                                             device_map=device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

def generate(conversation, temperature=0.2, top_k=50, max_new_tokens=1_000):
    tokenized = tokenizer.apply_chat_template(conversation, add_generation_prompt=True,
                                              return_tensors='pt').to(device)
    outputs = model.generate(tokenized, do_sample=True, temperature=temperature,
                             top_k=top_k, max_new_tokens=max_new_tokens)

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

conversation = [
    {
        'role': 'user',
        'content': 'Welk woord hoort er niet in dit rijtje thuis: "auto, vliegtuig, geitje, bus"?'
    }
]
print(generate(conversation))
# <|user|>
# Welk woord hoort er niet in dit rijtje thuis: "auto, vliegtuig, geitje, bus"? 
# <|assistant|>
# Het woord dat niet op zijn plaats staat is 'geit'. Een geit zou niet tussen een lijst van vervoersmiddelen moeten staan. Het past beter bij een boerderijthema of dierenlijst.

### BACK-UP CODE

In [None]:
from collections import Counter
import re
import time
import datetime
import pytz
import os
from sklearn.metrics import classification_report


""" Given the string response, extract the prediction """
def get_prediction_from_response(response):
    # get a list of the possible classes
    classes_list = pt.get_class_list()

    predictions = [True if category.lower() in response.lower() else False for category in classes_list]

    # check if multiple classes were named, this is a prediction error
    if Counter(predictions)[True] > 1:
        return "PredictionError"

    # check if exactly one class is named, this is the prediction
    elif Counter(predictions)[True] == 1:
        prediction = [category.lower() for category in classes_list if category.lower() in response.lower()]
        return prediction[0]

    # if no class is named, then this is a no prediction error
    else:
        return 'NoPrediction'

""" Extract the promptfunction name """
def get_promptfunction_name(prompt_function):
    string = f"{prompt_function}"
    match = re.search(r'<function\s+(\w+)', string)
    if match:
        function_name = match.group(1)
        return function_name
    else:
        return f"{prompt_function}"
    
""" Get the current time in the Netherlands """
def get_datetime():
    current_datetime_utc = datetime.datetime.now(pytz.utc)

    # Convert UTC time to Dutch time (CET)
    dutch_timezone = pytz.timezone('Europe/Amsterdam')
    current_datetime_dutch = current_datetime_utc.astimezone(dutch_timezone)
    return current_datetime_dutch
        
""" Get the new runid """
def get_runid(path):

    # if not first run, set runid to most recent run+1
    if os.path.exists(path):
        df = pd.read_pickle(path)
        return max(df['run_id'])+1, df

    # if first run, set runid to 0
    else:
        return 0, pd.DataFrame()
    
""" Save evaluation metrics of a run """
def update_overview_results(df, model_name, subset=None):
    # df= dataframe with predictions for each do, one row per doc/prediction
    # model_name = string with the name of the model
    # subset = can be train, val, or test, or left open
 
    # get evalaution scores
    evaluation_dict = classification_report(df['label'], df['prediction'], output_dict=True)
    evaluation = pd.DataFrame(evaluation_dict).transpose()
    
    new_row = {
        # stuff about the run
        'run_id':df.iloc[0]['run_id'],
        'model':model_name,
        'prompt_function':df.iloc[0]['prompt_function'],
        'text_column':df.iloc[0]['text_column'],
        'date': get_datetime(),
        'runtime':sum(df['runtime']),
        'set':subset,
        'support':evaluation.iloc[-1]['support'],

        # evaluation
        'accuracy': evaluation_dict['accuracy'],

        'recall_weighted_avg':evaluation.loc[evaluation.index=='weighted avg']['recall'].values[0],
        'precision_weighted_avg': evaluation.loc[evaluation.index=='weighted avg']['precision'].values[0],
        'f1_weighted_avg': evaluation.loc[evaluation.index=='weighted avg']['f1-score'].values[0],

        'recall_macro_avg':evaluation.loc[evaluation.index=='macro avg']['recall'].values[0],
        'precision_macro_avg': evaluation.loc[evaluation.index=='macro avg']['precision'].values[0],
        'f1_macro_avg': evaluation.loc[evaluation.index=='macro avg']['f1-score'].values[0],


        'recall_classes': dict(zip(evaluation.index[0:-3], evaluation['recall'][0:-3])),
        'precision_classes': dict(zip(evaluation.index[0:-3], evaluation['precision'][0:-3])),
        'f1_classes': dict(zip(evaluation.index[0:-3], evaluation['f1-score'][0:-3])),
        'support_classes': dict(zip(evaluation.index[0:-3], evaluation['support'][0:-3])),

        # docs that were predicted
        'doc_paths':list(df['path'].values)
        
    }

    # create a new dataframe with the evaluation, each run has one row
    results = pd.DataFrame(columns=new_row.keys())
    results.loc[len(results)] = new_row
   
    # if not the first run, get results from previous runs
    path = f"{cf.output_path}/overview_results.pkl"
    if os.path.exists(path):
        earlier_results = pd.read_pickle(path)

        # combine evaluation of previous runs with current run
        results = pd.concat([earlier_results, results])

    # save to overview_results.pkl
    results.to_pickle(path)
   


In [None]:
# update_overview_results(res, 'geitje')

In [None]:
# yeet = pd.read_pickle(f"{cf.output_path}/overview_results.pkl")
# display(yeet)

# yeet = pd.read_pickle(f"{cf.output_path}/predictions/ICgeitje_predictions.pkl")
# display(yeet)


In [None]:
import time
import datetime


""" Given a dataframe with txt, return a df with predictions """
# docs_df = dataframe with the documents that need to be predicted
# text_column = name of the column that includes the input_text. Can be different based on the text representation method. 
# prompt_function = prompt template -> ONLY prompt templates that take doc as input (ZERO SHOT)

def zero_shot_predictions_incontextlearning(docs_df, text_column, prompt_function):
    results_df = pd.DataFrame(columns = ['id', 'path', 'text_column', 'prompt_function', 'response', 'prediction', 'label', 'runtime', 'date'])
    
    # prompt each document
    for index, row in docs_df.iterrows():
        start_time = time.time()

        # get the prompt, with the doc filled in
        txt = row[text_column]
        prompt = prompt_function(txt)

        # prompt and get the response
        converse = chatbot(Conversation(prompt))
        response = converse[1]['content']

        # extract prediction from response
        prediction = get_prediction_from_response(response)

        # save results in dataframe
        results_df.loc[len(results_df)] = {
            'id': row['id'],
            'path' : row['path'],
            'text_column' : text_column,
            'prompt_function': get_promptfunction_name(prompt_function),
            'response':response,
            'prediction':prediction,
            'label':row['label'].lower(),
            'runtime':time.time()-start_time,
            'date': get_datetime()
        }
    return results_df

""" Run a prediction function -> can be ZeroShot or FewShot """
def run_prediction(docs_df, text_column, prompt_function, subset=None, learning='ZeroShot'):
    if learning == 'ZeroShot':
        # get the predictions
        res = zero_shot_predictions_incontextlearning(docs_df, text_column, prompt_function)

        # INSERT ELSE STATEMENT HERE FOR FEWSHOT

        # get run_id
        path = f"{cf.output_path}/predictions/ICgeitje_predictions.pkl"
        res['run_id'], predictions_df = get_runid(path)

        # combine earlier predictions with new ones
        all_predictions = pd.concat([predictions_df, res])

        # save predictions
        all_predictions.to_pickle(path)

        # save the evaluation metrics for each run
        update_overview_results(res, 'Rijgersberg/GEITje-7B-chat-v2')
        return res

res = run_prediction(df, 'text_trunc', pt.simple_prompt)
display(res)
