In [2]:
!bash /home/azureuser/cloudfiles/code/blobfuse/blobfuse_raadsinformatie.sh

In [3]:
import sys
sys.path.append("..")

# Select where to run notebook: "azure" or "local"
my_run = "azure"

# import my_secrets as sc
# import settings as st

if my_run == "azure":
    import config_azure as cf
elif my_run == "local":
    import config as cf


import os
if my_run == "azure":
    if not os.path.exists(cf.HUGGING_CACHE):
        os.mkdir(cf.HUGGING_CACHE)
    os.environ["TRANSFORMERS_CACHE"] = cf.HUGGING_CACHE

# set-up environment - GEITje-7b-chat InContextLearning:
# - install blobfuse -> sudo apt-get install blobfuse
# - pip install transformers
# - pip install torch
# - pip install accelerate
# - pip install jupyter
# - pip install ipywidgets

## Notebook overview
- Goal: Run experiment for InContext Learning GEITje
- Trial run model -> prompt GEITje using, example prompt
- Zeroshot prompts
- Fewshot prompts

Load data and functions:
- data is already split
- text is already converted to tokens using model tokenizer 

In [4]:
import pandas as pd
# df = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

import sys
sys.path.append('../scripts/') 
import prompt_template as pt
import prediction_helperfunctions as ph
import truncation as tf


In [5]:
import torch
torch.cuda.empty_cache()

#### Trial run Models 
Code to run the models with a simple prompt.

In [None]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='Rijgersberg/GEITje-7B-chat-v2',
                   device_map='auto', model_kwargs={'offload_buffers':True})


chatbot_llama = pipeline(task='conversational', model='meta-llama/Llama-2-7b-chat-hf',
                   device_map='auto', model_kwargs={'offload_buffers':True})

chatbot_mistral = pipeline(task='conversational', model='mistralai/Mistral-7B-Instruct-v0.2',
                   device_map='auto', model_kwargs={'offload_buffers':True})

## EXAMPLE PROMPT
# print(chatbot(
    # Conversation('Welk woord hoort er niet in dit rijtje thuis: "auto, vliegtuig, geitje, bus"?')
# ))

#### Experiment functions
Prompt GEITje for each document and save the prediction, return response, response time and the prompt version

Code structure:
- 2 functions/cells:
- predictions_incontextlearning -> given a df with docs that need to be predicted, prompt the model
- run the experiment -> built in failsaves (df run in parts, with saves in between)

In [6]:
import time
import os
import pandas as pd
from bm25 import BM25


""" Given a dataframe with txt, return a df with predictions """
# docs_df = dataframe with the documents that need to be predicted
# text_column = name of the column that includes the input_text. Can be different based on the text representation method. 
# prompt_function = prompt template 
# train_df = dataframe with docs, which can be used as examples/training data/context data
# num_examples = number of examples in the prompt

def predictions_incontextlearning(chatbot, docs_df, text_column, prompt_function, train_df, num_examples):
    results_df = pd.DataFrame(columns = ['id', 'path', 'text_column', 'prompt_function', 'response', 'prediction', 'label', 'runtime', 'date', 'prompt'])


    if prompt_function == pt.fewshot_prompt_bm25:
        BM25_model = BM25()
        BM25_model.fit(train_df[text_column])
   

    # prompt each document
    for index, row in docs_df.iterrows():
        # if (index + 1) % 200 == 0:
        #     print(f"Iteration {index +1}/{len(docs_df)} completed.")

        start_time = time.time()

        # get the prompt, with the doc filled in
        txt = row[text_column]

        # each prompt function takes different arguments
        # zeroshot prompt for geitje
        if prompt_function == pt.zeroshot_prompt_geitje:
            prompt = prompt_function(txt)

        # zeroshot function for mistral and llama
        elif prompt_function == pt.zeroshot_prompt_mistral_llama:
            prompt = prompt_function(txt)

        # select fewshot examples using bm25, fewshot is the same for all models
        elif prompt_function == pt.fewshot_prompt_bm25:
            prompt = prompt_function(txt, train_df, num_examples, text_column, BM25_model)

        else:
            raise ValueError("Prompt function not recognised. Check if prompt function is in prompt_template.py and included in the options above.")

        # prompt and get the response
        # print(prompt)
        converse = chatbot(Conversation(prompt))
        response = converse[1]['content']
        print("label: ", row['label'].lower())
        print("response: ", response)

        # extract prediction from response
        prediction = ph.get_prediction_from_response(response)
        print("prediction:", prediction)

        # save results in dataframe
        results_df.loc[len(results_df)] = {
            'id': row['id'],
            'path' : row['path'],
            'text_column' : docs_df.iloc[0]['trunc_col'],
            'prompt_function': ph.get_promptfunction_name(prompt_function),
            'response':response,
            'prediction':prediction,
            'label':row['label'].lower(),
            'runtime':time.time()-start_time,
            'date': ph.get_datetime(),
            'prompt':prompt
        }
    return results_df



In [17]:
import os
import time
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

"""
Function to run GEITje In-Context Learning experiment. 
The function allows to resume experiment, if run_id matches.
"""
# df = dataframe with all docs that need to have a prediction (docs still need to be predict + already predicted)
# run_id = unqiue for each experiment. 
# prompt_function = which prompt from prompt_template.py to use
# text_col = colum in df where the text is. (Needs to be already truncated)
# split_col = column with the dataset split. Either '2split' (train and test)or '4split'(train, test, dev and val)
# subset_train = indicates which subset to use as training. either 'train' or 'dev'
# subset_test = indicates which subset to use for testing. either 'test' or 'val'
# label_col = column with the true label
# prediction_path = path to file where predictions need to be saved.
# overview_path = path to file where results of each run need to be saved.
# model_name = name of the model. string.
# num_exmples = number of exaples given to prompt. zero in case of zeroshot. 

def run_experiment(chatbot, df, run_id, prompt_function, text_col, split_col, subset_train, subset_test, label_col, prediction_path, overview_path, model_name, num_examples=0):
    test_df = df.loc[df[split_col]==subset_test]
    train_df = df.loc[df[split_col]==subset_train]
    
    # get rows of df that still need to be predicted for the specific run_id
    to_predict, previous_predictions = ph.get_rows_to_predict(test_df, prediction_path, run_id)

    # devide to_predict into subsection of 50 predictions at a time. 
    # Allows to rerun without problem. And save subsections of 50 predictions.
    step_range = list(range(0, len(to_predict), 10))

    for i in range(len(step_range)):
        try:
            sub_to_predict = to_predict.iloc[step_range[i]:step_range[i+1]]
            print(f'Starting...{step_range[i]}:{step_range[i+1]} out of {len(to_predict)}')
        except Exception as e:
            sub_to_predict = to_predict[step_range[i]:]
            print(f'Starting...last {len(sub_to_predict)} docs')

        # prompt geitje
        predictions = predictions_incontextlearning(chatbot, sub_to_predict, text_col, prompt_function, train_df, num_examples)

        # save info
        predictions['run_id'] = run_id
        predictions['train_set'] = subset_train
        predictions['test_set'] = subset_test
        predictions['shots'] = num_examples

        # save new combinations in file
        print("Dont interrupt, saving predictions...")
        ph.combine_and_save_df(predictions, prediction_path)

        # if previous predictions, combine previous with new predictions, to get update classification report
        try:
            predictions = pd.concat([predictions, previous_predictions])

            # set previous predictions to all predictions made until now. Necessary for next loop
            previous_predictions = predictions
        except Exception as e:
            # set previous predictions to all predictions made until now. Necessary for next loop
            previous_predictions = predictions

        # save results in overview file
        date = ph.get_datetime()
        y_test = predictions['label']
        y_pred = predictions['prediction']

        # change error predictions to one error
        # error_names = ['NoPredictionInOutput', 'MultiplePredictionErrorInFormatting','NoPredictionFormat', 'MultiplePredictionErrorInOutput']
        # y_pred = ['OutputError' if x in error_names else x for x in y_pred]

        report = classification_report(y_test, y_pred)

        overview = pd.DataFrame(
            [{
                'model':model_name,
                'run_id':run_id,
                'date': date,
                'train_set': subset_train,
                'test_set': subset_test,
                'train_set_support':len(df.loc[df[split_col]==subset_train]),
                'test_set_support':len(predictions),
                'split_col':split_col,
                'text_col':df.iloc[0]['trunc_col'],
                'runtime':sum(predictions['runtime']),
                'accuracy': accuracy_score(y_test, y_pred),
                'macro_avg_precision': precision_score(y_test, y_pred, average='macro'),
                'macro_avg_recall': recall_score(y_test, y_pred, average='macro'),
                'macro_avg_f1': f1_score(y_test, y_pred, average='macro'),
                'classification_report':report
            }   ]
        )
        # remove previous results of run_id, replace with new/updated results
        ph.replace_and_save_df(overview, overview_path, run_id)
        print("Saving done! Interrupting is allowed.")



Set up variables that are the same for each model

In [8]:
#set  variables, same for each model
TRAIN_SET = 'train' # must be dev or train
TEST_SET = 'test' # must be val or test
SPLIT_COLUMN = 'balanced_split' #must be either 2split or 4split. 2split = data split into train and test. 4split = data split into train, test, dev and val. 
LABEL_COLUMN = 'label'
TEXT_COLUMN = 'trunc_txt'


In [9]:
txt = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

### GEITje

In [None]:
SHORT_MODEL_NAME = 'GEITje'
PROMPT = pt.zeroshot_prompt_geitje
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 100
BACK_THRESHOLD = 100

if PROMPT==pt.zeroshot_prompt_geitje:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_bm25:
    NUMBER_EXAMPLES = 2



#### Load model - In-context learning
Note - ONLY load one model: either in-context or fine-tuning

In [None]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='Rijgersberg/GEITje-7B-chat-v2',
                    device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'GEITje-7B-chat-v2'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'



#### Load model - Finetuning

In [None]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='FemkeBakker/GEITjeSmallData200Tokens',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'GEITjeSmallData200Tokens'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'

#### Set-up paths to save predictions

In [None]:
import os

if SPLIT_COLUMN == '4split' or SPLIT_COLUMN == '2split':
    OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/overview.pkl"
    PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/predictions.pkl"
    
elif SPLIT_COLUMN == 'balanced_split':
    if SUBFOLDER == 'finetuning':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

    elif SUBFOLDER == 'in_context':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

print(OVERVIEW_PATH)
print(PREDICTION_PATH)

if not os.path.isdir(os.path.dirname(os.path.abspath(OVERVIEW_PATH))):
    raise ValueError("Folder to OVERVIEW_PATH does not exist") 
if not os.path.isdir(os.path.dirname(os.path.abspath(PREDICTION_PATH))):
    raise ValueError("Folder to PREDICTION_PATH does not exist") 

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print ('\n', run_id)


#### Run experiment

In [None]:
# ----- EXPERIMENT --------

# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)


# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_geitje, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


In [None]:
pred = pd.read_pickle(OVERVIEW_PATH)
# pred_run = pred.loc[pred['run_id']==f'{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}']
display(pred)

### Llama


In [10]:
SHORT_MODEL_NAME = 'Llama'
PROMPT = pt.zeroshot_prompt_mistral_llama
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 100
BACK_THRESHOLD = 100

if PROMPT==pt.zeroshot_prompt_mistral_llama:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_bm25:
    NUMBER_EXAMPLES = 2



#### Load model - In-context learning
Note - ONLY load one model: either in-context or fine-tuning

In [11]:
from transformers import pipeline, Conversation

chatbot_llama = pipeline(task='conversational', model='meta-llama/Llama-2-7b-chat-hf',
                   device_map='cpu', model_kwargs={'offload_buffers':True})
# load llama using cpu, else will give cuda out of memory error when running fewshot bm25 prompt.

MODEL_NAME = 'Llama-2-7b-chat-hf'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'



Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

#### Load model - finetuning

In [None]:
from transformers import pipeline, Conversation

# chatbot_llama = pipeline(task='conversational', model='FemkeBakker/LlamaSmallData200Tokens',
#                    device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'LlamaSmallData200Tokens'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'

#### Set-up paths to save predictions

In [12]:
import os

if SPLIT_COLUMN == '4split' or SPLIT_COLUMN == '2split':
    OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/overview.pkl"
    PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/predictions.pkl"
    
elif SPLIT_COLUMN == 'balanced_split':
    if SUBFOLDER == 'finetuning':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

    elif SUBFOLDER == 'in_context':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

print(OVERVIEW_PATH)
print(PREDICTION_PATH)

if not os.path.isdir(os.path.dirname(os.path.abspath(OVERVIEW_PATH))):
    raise ValueError("Folder to OVERVIEW_PATH does not exist") 
if not os.path.isdir(os.path.dirname(os.path.abspath(PREDICTION_PATH))):
    raise ValueError("Folder to PREDICTION_PATH does not exist") 

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print ('\n', run_id)

/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/predictionsFinal/in_context/Llama/overview.pkl
/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/predictionsFinal/in_context/Llama/zeroshot_prompt_mistral_llama/First100Last100Predictions.pkl

 IC_Llama-2-7b-chat-hfzeroshot_prompt_mistral_llamaLlamaTokens100_100traintest_numEx0


#### Run experiment

In [14]:
# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)

# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_llama, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


Run-id already known, resuming predictions...
Starting...0:10 out of 220
label:  agenda
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Agenda' document.

Here is the output in JSON format:

{
"categorie": "Agenda"
}

The reason for this classification is that the document contains a list of items to be discussed during a meeting, including the date, time, location, and contact information for the meeting. This is a common characteristic of agenda documents.
prediction: agenda
label:  raadsnotulen
response:   Sure, I can classify the document for you. Based on the content you provided, I would classify the document as a 'Raadsnotulen'.

Here is the output in the form of a JSON file:

{
"categorie": "Raadsnotulen"
}

The reason for this classification is that the document appears to be a meeting minutes or agenda for a municipal council, with details of the meeting date, time, location, and attendees. The document also includes a list of am

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...10:20 out of 220
label:  raadsnotulen
response:   Sure, here is the classification of the document:

{
"categorie": "Besluit"
}

Explanation:

The document is a municipal council meeting minutes from the year 2019, specifically from the 18th of September 2019. The document contains a record of the decisions made during the meeting, including the voting results and the chairperson's remarks. Therefore, the most appropriate category for this document is "Besluit".
prediction: besluit
label:  schriftelijke vraag
response:   Sure, I can classify the document for you. Based on the content you provided, I would classify the document as:

{'categorie': 'Besluit'}

The document appears to be a decision or resolution made by the Gemeenteraad (City Council) of Amsterdam, as it mentions the date of indention (29 November 2018), the date of approval (13 January 2020), and the publication date (14 January 2020). The document also refers to the mayor 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...20:30 out of 220
label:  besluit
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Besluit' (Dutch for 'Decision').

Here is the output in the form of a JSON file:

{
"categorie": "Besluit"
}

The reason for this classification is that the document contains a formal decision made by the Stadsdeelraad (District Council) of Amsterdam, specifically the decision regarding the onttrekking aan het openbaar verkeer (withdrawal from public traffic) of the Sint Pieterspoort between Rokin and Nes, and the bestemmen (designation) of the area for sloopwerkzaamheden (demolition works) at the beginning of the project. The document is signed by the Voorzitter (Chair) of the Stadsdeelraad and the Griffier (Clerk), which further supports the classification of this document as a formal decision.
prediction: besluit
label:  raadsnotulen
response:   Sure, I can classify the document for you. Based on the content 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...30:40 out of 220
label:  raadsnotulen
response:   Sure, I can classify the document for you. Based on the content you provided, I would categorize the document as:

{'categorie': 'Raadsnotulen'}

The document appears to be a meeting agenda for the Gemeente Amsterdam, with details of the items to be discussed and the date and time of the meeting. It includes a motion put forward by three council members regarding the development of a new sanitation system in Buiksloterham, and knowledge sharing on the latest developments in this area. Therefore, it falls under the category of 'Raadsnotulen'.
prediction: raadsnotulen
label:  besluit
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Voordracht' (Dutch for 'Speech').

Here is the output in the form of a JSON file:

{
"categorie": "Voordracht"
}

The reason for this classification is that the document contains a speech or presentation, as evident f

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...40:50 out of 220
label:  besluit
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Vergadering Bestuurscommissie'.

Here is the output in JSON format:

{
"categorie": "Vergadering Bestuurscommissie"
}

Note that this category is based on the content of the document, and it may not be accurate if the document is not actually a meeting agenda.
prediction: NoPredictionInOutput
label:  raadsnotulen
response:   Sure, I can classify the document for you. Based on the content provided, I would categorize the document as:

{'categorie': 'Raadsnotulen'}

This document appears to be a copy of the agenda and minutes of a meeting of the Gemeenteraad (City Council) of Amsterdam, held on January 23, 2019. It includes details of the items discussed and voted on during the meeting, as well as any relevant motions or decisions made. Therefore, it falls under the category of 'Raadsnotulen' (City Council minutes

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...50:60 out of 220
label:  onderzoeksrapport
response:   Based on the content of the document you provided, I would classify it as a 'Besluit'.

Here is the output in the form of a JSON file:

{
"categorie": "Besluit"
}

The reason for this classification is that the document appears to be a decision or a policy statement, as it contains information about the activities and achievements of the Buurtteam Amsterdam in 2022, and it is addressed to a wide audience, including Amsterdammers and professionals. The use of formal language and the inclusion of contact information for more information also suggest that this is a formal document.
prediction: besluit
label:  factsheet
response:   Na het classificeer het document op basis van de categorieën die u hebt voorgelegd, blijkt dat het document onderdeel is van de categorie ['Besluit'].

Hier is de output in de vorm van een JSON-file:

{
"categorie": "Besluit"
}

Deze output betekent dat het do

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...60:70 out of 220
label:  factsheet
response:   Based on the content of the document you provided, I would classify it as a 'Factsheet'.

Here's the output in JSON format:

{
"categorie": "Factsheet"
}
prediction: factsheet
label:  agenda
response:   Sure, I can classify the document for you. Based on the content you provided, I would classify this document as ['Voordracht'].

Here's the output in JSON format:

{
"categorie": "Voordracht"
}

The reason for this classification is that the document appears to be a call to action from the chairman of the Stadsdeelraad, inviting members to attend a meeting to discuss and make decisions on various agenda points. This is a common characteristic of a voordracht, which is a formal invitation or call to action issued by a meeting chairman or other authority figure.
prediction: voordracht
label:  actualiteit
response:   Sure! Based on the content of the document you provided, I would classify it as

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...70:80 out of 220
label:  brief
response:   Based on the content of the document, I would classify it under the category of "Raadsnotulen".

Here is the output in JSON format:

{
"categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  raadsadres
response:   Het document belongs to the category ['Voordracht'].

Here is the output in JSON format:

{
"categorie": "Voordracht"
}
prediction: voordracht
label:  factsheet
response:   After analyzing the document provided, I can confidently classify it into the category of 'Onderzoeksrapport'.

Here is the output in the form of a JSON file:

{
"categorie": "Onderzoeksrapport"
}

The document is an in-depth analysis of the Armoedemonitor 2021, specifically focusing on the city districts of Amsterdam. It presents a detailed report on the findings, including tables and figures, and provides information on how to request an application. Therefore, it falls under the category of an research rep

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...80:90 out of 220
label:  raadsadres
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Besluit'.

Here's the output in JSON format:

{
"categorie": "Besluit"
}

The reason for this classification is that the document appears to be a formal decision or resolution made by the College of B&W (B&W stands for "Branche & Wijk", which is a Dutch term for "Sector & Neighborhood") regarding the normalization of the prostitution sector in Amsterdam. The document includes the names and contact information of the authors, as well as a statement of the desired outcome of the decision. All of these elements suggest that the document is a formal, official document rather than a personal letter or a research report.
prediction: besluit
label:  actualiteit
response:   Based on the content of the document you provided, I would classify it as a 'Brief'.

Here is the output in the form of a JSON file:

{
"categori

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...90:100 out of 220
label:  schriftelijke vraag
response:   Based on the content of the document provided, I would classify it as a 'Schriftelijke Vraag'.

Here is the output in the form of a JSON file:

{
"categorie": "Schriftelijke Vraag"
}
prediction: schriftelijke vraag
label:  onderzoeksrapport
response:   Based on the content of the document you provided, I would classify it as a 'Besluit'.

Here is the output in the form of a JSON file:

{
"categorie": "Besluit"
}

The reason for this classification is that the document appears to be a decision or recommendation made by a group of people, as it mentions a "samenwerking" (collaboration) of "kernpartners" (core partners) and outlines a plan for "veiligheid en zorg" (safety and care) in Amsterdam-Amstelland. The document also includes specific numbers and statistics related to the topic, which is consistent with the format of a decision-making document.
prediction: besluit
label:  besl

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...100:110 out of 220
label:  agenda
response:   Het document Termijnagenda stadsdeelcommissie Noord kan worden gecategoriseerd als 'Agenda'.

Hier is de output in de vorm van een JSON file:

{
"categorie": "Agenda"
}
prediction: agenda
label:  voordracht
response:   Based on the content of the document provided, I would classify it as a 'Voordracht' (Dutch for 'Speech').

Here is the output in JSON format:

{
"categorie": "Voordracht"
}
prediction: voordracht
label:  raadsnotulen
response:   Based on the content of the document you provided, I would classify it as a 'Raadsnotulen'.

Here is the output in JSON format:

{
"categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  motie
response:   Sure, I can classify the document for you. Based on the content you provided, the document belongs to the category "Motie".

Here is the output in JSON format:

{
"categorie": "Motie"
}

The reason for this classification is that the document i

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...110:120 out of 220
label:  motie
response:   Sure, I can classify the document for you. Based on the content provided, I would classify the document as ['Besluit'].

Here's the output in JSON format:

{
"categorie": "Besluit"
}

The reason for this classification is that the document appears to be a formal decision or resolution made by the Amsterdam City Council, with a specific date and agenda item. The document includes the names of the council members who proposed the amendment, as well as the status of the amendment (in this case, "Aangenomen" or "Adopted").
prediction: besluit
label:  agenda
response:   Sure, I can classify the document for you. Based on the content of the document, I would classify it under the category of 'Agenda'.

Here is the output in JSON format:

{
"categorie": "Agenda"
}
prediction: agenda
label:  onderzoeksrapport
response:   Based on the content of the document, the most appropriate category for classific

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...120:130 out of 220
label:  factsheet
response:   Based on the content of the document, I would classify it as a 'Raadsnotulen'.

Here is the output in the form of a JSON file:

{
"categorie": "Raadsnotulen"
}
prediction: raadsnotulen
label:  actualiteit
response:   Het document kan worden gecategoriseerd als:

{'categorie': 'Agenda'}
prediction: agenda
label:  voordracht
response:   Sure, I can classify the document for you. Based on the content of the document you provided, I would categorize it as a 'Voordracht' (Dutch for 'Speech').

Here's the output in JSON format:

{
"categorie": "Voordracht"
}

The reason for this categorization is that the document appears to be a speech or presentation given to the Raadscommissie voor Woningbouw, Volkshuisvesting en Monumenten in Amsterdam on October 31, 2023. The document contains a list of topics to be discussed, including "Portefeuille Volkshuisvesting" and "Onderwijs," which suggests that th

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...130:140 out of 220
label:  actualiteit
response:   Based on the content of the document you provided, I would classify it as a 'Besluit' (Dutch for 'Decision').

Here is the output in JSON format:

{
"categorie": "Besluit"
}

The reason for this classification is that the document appears to be a formal decision made by the College of Burgomaster and Aldermen of Amsterdam, with a formal structure and language typical of Dutch government documents. The document includes the names of the decision-makers, their contact information, and the date of the decision, which are all consistent with the format of a Dutch government decision.
prediction: besluit
label:  brief
response:   Based on the content of the document you provided, I would classify it as a ['Besluit'] (Decision).

Here's the output in JSON format:

{
"categorie": "Besluit"
}

The document contains information related to a decision or proposal for a decision regarding water-rela

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...140:150 out of 220
label:  raadsnotulen
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Raadsnotulen'.

Here is the output in JSON format:

{
"categorie": "Raadsnotulen"
}

The reason for this classification is that the document appears to be a record of a public meeting of the Gemeente Amsterdam's Gemeenteraad, including the topics discussed and the decisions made during the meeting. This is consistent with the definition of a 'Raadsnotulen', which is a document that records the proceedings of a municipal council meeting in the Netherlands.
prediction: raadsnotulen
label:  besluit
response:   Sure, I can classify the document for you. Based on the content provided, the document belongs to the category ['Voordracht']:

{
"categorie": "Voordracht"
}

The document is a voordracht (a presentation or proposal) for the Bestuurscommissie Oost (Executive Committee) of the municipality of Amsterdam,

Bad pipe message: %s [b'\xf8\xad\xec\xba\x1c\xbb\xd6\xa6\x7fo4Q\x0f\\\x91\xb52m \x1a3\xaf\xd0\xa2\xd8\xf8"\xcd>\x86\xc0\xa9|M\xb7=\\\xd4\xe9\xed0,\xde\xe1\x82\xde\xec\x18.:\xa2\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x00\x1e\x00\x1c\x04\x03\x05\x03\x06\x03\x08\x07\x08\x08\x08\t\x08\n\x08\x0b\x08\x04\x08\x05\x08\x06\x04\x01\x05\x01\x06\x01\x00+\x00\x03\x02\x03\x04\x00-\x00\x02\x01\x01\x003\x00&\x00$\x00\x1d\x00 Z\xf9P\xae\x10\xec\xe9\n\x10\xc3E\xf8\xebX\xdf\x8d\xc4\xca\x87j`\x04Q', b'']
Bad pipe message: %s [b'x\xcfIc\x0b<\xb5\x9d\x87\xa5', b'\xca\xce\x08\xbd\xba"\x00\x00|\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc\xa8\xcc\xaa\xc0\xaf\xc0\xad\xc0\xa3\xc0\x9f\xc0]\xc0a\xc0W\xc0S\xc0+\xc0/\x00\xa2\x00\x9e\xc0\xae\xc0\xac\xc0\xa2\xc0\x9e\xc0\\\xc0`\xc0V\xc0R\xc0$\xc0(\x00k\x00j\xc0#\xc0\'\x00g\x0

label:  schriftelijke vraag
response:   Sure, I can classify the document for you. Based on the provided text, I would categorize it as:

{'categorie': 'Schriftelijke Vraag'}

This document appears to be a written question submitted to the Gemeenteraad (Municipal Council) of Amsterdam, specifically to the mayor and aldermen. The document is addressed to the minister and seeks to draw attention to the issue of better European regulation to protect the housing stock and quality of life. Therefore, it falls under the category of 'Schriftelijke Vraag' or 'Written Question'.
prediction: schriftelijke vraag
label:  motie
response:   Sure, I can classify the document for you. Based on the content you provided, I would classify the document as ['Motie'].

Here is the output in the form of a JSON file:

{
"categorie": "Motie"
}

Explanation:

The document is a motion put forward by a member of the city council, J.F. Bloemberg-Issa, regarding the inclusion of volkstuinen (people's gardens) in th

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...150:160 out of 220
label:  voordracht
response:   Sure, I can classify the document for you. Based on the content of the document you provided, I would classify it under the category of "Voordracht" (Dutch for "Speech").

Here is the output in the form of a JSON file:

{
"categorie": "Voordracht"
}

The reason for this classification is that the document appears to be a speech or presentation given to the Gemeente Raadscommissie voor Verkeer, Vervoer en Luchtkwaliteit (City Council Committee for Traffic, Transportation, and Air Quality) on July 6, 2023. The document contains information on the agenda, including a report on the traffic and transportation portfolio, and is likely intended for the committee's consideration and decision-making.
prediction: voordracht
label:  besluit
response:   Based on the content of the document you provided, I would classify it as a 'Besluit'.

Here is the output in the form of a JSON file:

{
"categorie"

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...160:170 out of 220
label:  onderzoeksrapport
response:   Based on the content of the document, I would classify it under the category of "Agenda".

Here is the output in JSON format:

{
"categorie": "Agenda"
}
prediction: agenda
label:  brief
response:   Based on the content of the document you provided, I would classify it as a 'Brief'.

Here is the output in JSON format:

{
"categorie": "Brief"
}
prediction: brief
label:  actualiteit
response:   Based on the content of the document you provided, I would classify it as a 'Gemeenteblad'.

Here is the output in the form of a JSON file:

{
"categorie": "Gemeenteblad"
}

Explanation:

* 'Gemeenteblad' is a category that refers to a document that is related to the agenda and proceedings of a municipal council, such as the Gemeenteblad of Amsterdam.
* The document you provided contains information about the agenda and proceedings of the Gemeenteraad of Amsterdam, including the date, time, and

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...170:180 out of 220
label:  raadsnotulen
response:   Sure, I can classify the document for you. Based on the content provided, I would classify the document as a 'Raadsnotulen'.

Here is the output in JSON format:

{
"categorie": "Raadsnotulen"
}

The reason for this classification is that the document appears to be a record of a municipal council meeting in Amsterdam, with agenda items related to issues such as asbest and gas and electrical installations in various buildings, as well as a letter from a city councillor regarding the Bestuurlijk Arrangement Antilliaanse Risicojongeren. These are all topics that are typically addressed in a municipal council meeting, and are therefore classified as 'Raadsnotulen'.
prediction: raadsnotulen
label:  raadsnotulen
response:   Sure, I can classify the document for you. Based on the provided information, the document belongs to the category "Raadsnotulen" with a score of 100.

Here is the output i

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...180:190 out of 220
label:  brief
response:   Based on the content of the document you provided, I would classify it as a ['Besluit'].

Here is the JSON output:

{
"categorie": "Besluit"
}

The reason for this classification is that the document appears to be a formal decision or recommendation made by the College of Burgemeester and Wethouders of the City of Amsterdam, addressed to the members of the municipal council. The document includes a formal introduction, a list of attachments, and a signature block, which are all typical features of a municipal council decision.
prediction: besluit
label:  onderzoeksrapport
response:   Based on the content of the document, I would classify it as a ['Voordracht'].

Here's the output in JSON format:

{
"categorie": "Voordracht"
}
prediction: voordracht
label:  factsheet
response:   Sure! Based on the content of the document you provided, I would classify it under the category of "Raadsnotulen".

H

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...190:200 out of 220
label:  voordracht
response:   Sure, I can classify the document for you. Based on the provided text, I would categorize the document as ['Voordracht'].

Here's the output in JSON format:

{
"categorie": "Voordracht"
}
prediction: voordracht
label:  onderzoeksrapport
response:   Based on the content of the document you provided, I would classify it as a 'Besluit'.

Here is the output in the form of a JSON file:

{
"categorie": "Besluit"
}

Explanation:

The document appears to be a decision or policy document, outlining the requirements and procedures for conducting ecological and landscape assessments in the context of the Nature Conservation Act. The document includes specific requirements for the assessments, such as the scope of the assessment, the methods to be used, and the reporting requirements. It also includes contact information for Datura Molecular Solutions BV, a company that appears to be involved in the 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...200:210 out of 220
label:  motie
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Besluit'.

Here is the output in the form of a JSON file:

{
"categorie": "Besluit"
}

The reason for this classification is that the document appears to be a formal decision or resolution made by the Gemeenteraad of Amsterdam, outlining a specific goal or objective for the municipality. The document includes language such as "Besluit", "Gemeenteraad", "Jaar 2015", "Afdeling 1", and "Nummer 1051", which are all typical elements of a formal decision-making process. Additionally, the document includes a specific goal or objective for the municipality, which is a key component of a besluit.
prediction: besluit
label:  agenda
response:   Based on the content of the document you provided, I would classify it as an 'Agenda'.

Here is the output in the form of a JSON file:

{
"categorie": "Agenda"
}
prediction: agenda


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...last 10 docs
label:  besluit
response:   Sure, I can classify the document for you. Based on the content provided, I would categorize the document as a 'Besluit' (Decision).

Here is the output in the form of a JSON file:

{
"categorie": "Besluit"
}

The reason for this classification is that the document contains a decision or recommendation, as evident from the content of the document. The document mentions a meeting of the Bestuurscommissie (Executive Committee) on May 27, 2015, and includes a deco voordracht (presentation) and a besluit (decision) regarding the execution agenda for mobility, as well as an advice on the matter. This suggests that the document is a formal decision or recommendation made by the Executive Committee, which is consistent with the 'Besluit' category.
prediction: besluit
label:  motie
response:   Sure, I can classify the document for you. Based on the content provided, I would classify the document as:

{'ca

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.


In [24]:
pred = pd.read_pickle(OVERVIEW_PATH)
display(pred)

Unnamed: 0,model,run_id,date,train_set,test_set,train_set_support,test_set_support,split_col,text_col,runtime,accuracy,macro_avg_precision,macro_avg_recall,macro_avg_f1,classification_report
0,Llama-2-7b-chat-hf,IC_Llama-2-7b-chat-hfzeroshot_prompt_mistral_l...,2024-05-22 21:29:48.627017+02:00,train,test,9900,1100,balanced_split,TruncationLlamaTokensFront100Back0,140223.734147,0.422727,0.491578,0.31,0.287423,precision...
0,Llama-2-7b-chat-hf,IC_Llama-2-7b-chat-hfzeroshot_prompt_mistral_l...,2024-05-27 17:53:30.065278+02:00,train,test,9900,1100,balanced_split,TruncationLlamaTokensFront100Back100,164152.727557,0.425455,0.543238,0.334286,0.321521,precision ...
0,Llama-2-7b-chat-hf,IC_Llama-2-7b-chat-hfzeroshot_prompt_mistral_l...,2024-05-26 21:20:27.966620+02:00,train,test,9900,1100,balanced_split,TruncationLlamaTokensFront200Back0,132040.014386,0.474545,0.537874,0.372857,0.34995,precision ...


### Mistral

In [None]:
SHORT_MODEL_NAME = 'Mistral'
PROMPT = pt.zeroshot_prompt_mistral_llama
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 100
BACK_THRESHOLD = 0

if PROMPT==pt.zeroshot_prompt_mistral_llama:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_bm25:
    NUMBER_EXAMPLES = 2



#### Load model - In-context learning
Note - ONLY load one model: either in-context or fine-tuning

In [None]:
from transformers import pipeline, Conversation

chatbot_mistral = pipeline(task='conversational', model='mistralai/Mistral-7B-Instruct-v0.2',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'Mistral-7B-Instruct-v0.2'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'


#### Load model - finetuning

In [None]:
from transformers import pipeline, Conversation

chatbot_mistral = pipeline(task='conversational', model='FemkeBakker/MistralSmallData200Tokens',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'MistralSmallData200Tokens'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'

#### Set-up paths to save predictions

In [None]:
import os

if SPLIT_COLUMN == '4split' or SPLIT_COLUMN == '2split':
    OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/overview.pkl"
    PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/predictions.pkl"
    
elif SPLIT_COLUMN == 'balanced_split':
    if SUBFOLDER == 'finetuning':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

    elif SUBFOLDER == 'in_context':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

print(OVERVIEW_PATH)
print(PREDICTION_PATH)

if not os.path.isdir(os.path.dirname(os.path.abspath(OVERVIEW_PATH))):
    raise ValueError("Folder to OVERVIEW_PATH does not exist") 
if not os.path.isdir(os.path.dirname(os.path.abspath(PREDICTION_PATH))):
    raise ValueError("Folder to PREDICTION_PATH does not exist") 

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print ('\n', run_id)

#### Run experiment

In [None]:
# run experiment

# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)

# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_mistral, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


In [None]:
pred = pd.read_pickle(OVERVIEW_PATH)
display(pred)
