In [1]:
!bash /home/azureuser/cloudfiles/code/blobfuse/blobfuse_raadsinformatie.sh

In [3]:
import sys
sys.path.append("..")

# Select where to run notebook: "azure" or "local"
my_run = "azure"

# import my_secrets as sc
# import settings as st

if my_run == "azure":
    import config_azure as cf
elif my_run == "local":
    import config as cf


import os
if my_run == "azure":
    if not os.path.exists(cf.HUGGING_CACHE):
        os.mkdir(cf.HUGGING_CACHE)
    os.environ["TRANSFORMERS_CACHE"] = cf.HUGGING_CACHE

# set-up environment - GEITje-7b-chat InContextLearning:
# - install blobfuse -> sudo apt-get install blobfuse
# - pip install transformers
# - pip install torch
# - pip install accelerate
# - pip install jupyter
# - pip install ipywidgets

import pandas as pd

## Notebook overview
- Goal: Run experiment for InContext Learning GEITje
- Trial run model -> prompt GEITje using, example prompt
- Zeroshot prompts
- Fewshot prompts

Load data and functions:
- data is already split
- text is already converted to tokens using model tokenizer 

In [3]:
import pandas as pd
# df = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

import sys
sys.path.append('../scripts/') 
import prompt_template as pt
import prediction_helperfunctions as ph
import truncation as tf


In [4]:
import torch
torch.cuda.empty_cache()

In [5]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

#### Trial run Models 
Code to run the models with a simple prompt.

In [None]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='Rijgersberg/GEITje-7B-chat-v2',
                   device_map='auto', model_kwargs={'offload_buffers':True})


chatbot_llama = pipeline(task='conversational', model='meta-llama/Llama-2-7b-chat-hf',
                   device_map='auto', model_kwargs={'offload_buffers':True})

chatbot_mistral = pipeline(task='conversational', model='mistralai/Mistral-7B-Instruct-v0.2',
                   device_map='auto', model_kwargs={'offload_buffers':True})

## EXAMPLE PROMPT
# print(chatbot(
    # Conversation('Welk woord hoort er niet in dit rijtje thuis: "auto, vliegtuig, geitje, bus"?')
# ))

#### Experiment functions
Prompt GEITje for each document and save the prediction, return response, response time and the prompt version

Code structure:
- 2 functions/cells:
- predictions_incontextlearning -> given a df with docs that need to be predicted, prompt the model
- run the experiment -> built in failsaves (df run in parts, with saves in between)

In [6]:
import time
import os
import pandas as pd
from bm25 import BM25


""" Given a dataframe with txt, return a df with predictions """
# docs_df = dataframe with the documents that need to be predicted
# text_column = name of the column that includes the input_text. Can be different based on the text representation method. 
# prompt_function = prompt template 
# train_df = dataframe with docs, which can be used as examples/training data/context data
# num_examples = number of examples in the prompt

def predictions_incontextlearning(chatbot, docs_df, text_column, prompt_function, train_df, num_examples):
    results_df = pd.DataFrame(columns = ['id', 'path', 'text_column', 'prompt_function', 'response', 'prediction', 'label', 'runtime', 'date', 'prompt'])


    if prompt_function == pt.fewshot_prompt_with_template or prompt_function == pt.fewshot_prompt_no_template:
        BM25_model = BM25()
        BM25_model.fit(train_df[text_column])
   

    # prompt each document
    for index, row in docs_df.iterrows():
        # if (index + 1) % 200 == 0:
        #     print(f"Iteration {index +1}/{len(docs_df)} completed.")

        start_time = time.time()

        # get the prompt, with the doc filled in
        txt = row[text_column]

        # each prompt function takes different arguments
        # zeroshot prompt for geitje
        if prompt_function == pt.zeroshot_prompt_geitje:
            prompt = prompt_function(txt)

        # zeroshot function for mistral and llama
        elif prompt_function == pt.zeroshot_prompt_mistral_llama:
            prompt = prompt_function(txt)

        # select fewshot examples using bm25, fewshot is the same for all models
        # elif prompt_function == pt.fewshot_prompt_bm25:
        #     prompt = prompt_function(txt, train_df, num_examples, text_column, BM25_model)
        
        elif prompt_function == pt.fewshot_prompt_no_template:
            prompt = prompt_function(txt, train_df, num_examples, text_column, BM25_model)

        elif prompt_function == pt.fewshot_prompt_with_template:
            prompt = prompt_function(txt, train_df, num_examples, text_column, BM25_model)

        else:
            raise ValueError("Prompt function not recognised. Check if prompt function is in prompt_template.py and included in the options above.")

        # prompt and get the response
        # print(prompt)
        converse = chatbot(Conversation(prompt))
        response = converse[1]['content']
        print("label: ", row['label'].lower())
        print("response: ", response)

        # extract prediction from response
        prediction = ph.get_prediction_from_response(response)
        print("prediction:", prediction)

        # save results in dataframe
        results_df.loc[len(results_df)] = {
            'id': row['id'],
            'path' : row['path'],
            'text_column' : docs_df.iloc[0]['trunc_col'],
            'prompt_function': ph.get_promptfunction_name(prompt_function),
            'response':response,
            'prediction':prediction,
            'label':row['label'].lower(),
            'runtime':time.time()-start_time,
            'date': ph.get_datetime(),
            'prompt':prompt
        }
    return results_df



In [7]:
import os
import time
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

"""
Function to run GEITje In-Context Learning experiment. 
The function allows to resume experiment, if run_id matches.
"""
# df = dataframe with all docs that need to have a prediction (docs still need to be predict + already predicted)
# run_id = unqiue for each experiment. 
# prompt_function = which prompt from prompt_template.py to use
# text_col = colum in df where the text is. (Needs to be already truncated)
# split_col = column with the dataset split. Either '2split' (train and test)or '4split'(train, test, dev and val)
# subset_train = indicates which subset to use as training. either 'train' or 'dev'
# subset_test = indicates which subset to use for testing. either 'test' or 'val'
# label_col = column with the true label
# prediction_path = path to file where predictions need to be saved.
# overview_path = path to file where results of each run need to be saved.
# model_name = name of the model. string.
# num_exmples = number of exaples given to prompt. zero in case of zeroshot. 

def run_experiment(chatbot, df, run_id, prompt_function, text_col, split_col, subset_train, subset_test, label_col, prediction_path, overview_path, model_name, num_examples=0):
    test_df = df.loc[df[split_col]==subset_test]
    train_df = df.loc[df[split_col]==subset_train]
    
    # get rows of df that still need to be predicted for the specific run_id
    to_predict, previous_predictions = ph.get_rows_to_predict(test_df, prediction_path, run_id)

    # devide to_predict into subsection of 50 predictions at a time. 
    # Allows to rerun without problem. And save subsections of 50 predictions.
    step_range = list(range(0, len(to_predict), 10))

    for i in range(len(step_range)):
        try:
            sub_to_predict = to_predict.iloc[step_range[i]:step_range[i+1]]
            print(f'Starting...{step_range[i]}:{step_range[i+1]} out of {len(to_predict)}')
        except Exception as e:
            sub_to_predict = to_predict[step_range[i]:]
            print(f'Starting...last {len(sub_to_predict)} docs')

        # prompt geitje
        predictions = predictions_incontextlearning(chatbot, sub_to_predict, text_col, prompt_function, train_df, num_examples)

        # save info
        predictions['run_id'] = run_id
        predictions['train_set'] = subset_train
        predictions['test_set'] = subset_test
        predictions['shots'] = num_examples

        # save new combinations in file
        print("Dont interrupt, saving predictions...")
        ph.combine_and_save_df(predictions, prediction_path)

        # if previous predictions, combine previous with new predictions, to get update classification report
        try:
            predictions = pd.concat([predictions, previous_predictions])

            # set previous predictions to all predictions made until now. Necessary for next loop
            previous_predictions = predictions
        except Exception as e:
            # set previous predictions to all predictions made until now. Necessary for next loop
            previous_predictions = predictions

        # save results in overview file
        date = ph.get_datetime()
        y_test = predictions['label']
        y_pred = predictions['prediction']

        # change error predictions to one error
        # error_names = ['NoPredictionInOutput', 'MultiplePredictionErrorInFormatting','NoPredictionFormat', 'MultiplePredictionErrorInOutput']
        # y_pred = ['OutputError' if x in error_names else x for x in y_pred]

        report = classification_report(y_test, y_pred)

        overview = pd.DataFrame(
            [{
                'model':model_name,
                'run_id':run_id,
                'date': date,
                'train_set': subset_train,
                'test_set': subset_test,
                'train_set_support':len(df.loc[df[split_col]==subset_train]),
                'test_set_support':len(predictions),
                'split_col':split_col,
                'text_col':df.iloc[0]['trunc_col'],
                'runtime':sum(predictions['runtime']),
                'accuracy': accuracy_score(y_test, y_pred),
                'macro_avg_precision': precision_score(y_test, y_pred, average='macro'),
                'macro_avg_recall': recall_score(y_test, y_pred, average='macro'),
                'macro_avg_f1': f1_score(y_test, y_pred, average='macro'),
                'weighted_avg_precision': precision_score(y_test, y_pred, average='weighted'),
                'weighted_avg_recall': recall_score(y_test, y_pred, average='weighted'),
                'weighted_avg_f1': f1_score(y_test, y_pred, average='weighted'),
                'classification_report':report
            }   ]
        )
        # remove previous results of run_id, replace with new/updated results
        ph.replace_and_save_df(overview, overview_path, run_id)
        print("Saving done! Interrupting is allowed.")
        print("Accuracy: ", accuracy_score(y_test, y_pred))



Set up variables that are the same for each model

In [8]:
#set  variables, same for each model
TRAIN_SET = 'train' # must be dev or train
TEST_SET = 'test' # must be val or test
SPLIT_COLUMN = 'balanced_split' #must be either 2split or 4split. 2split = data split into train and test. 4split = data split into train, test, dev and val. 
LABEL_COLUMN = 'label'
TEXT_COLUMN = 'trunc_txt'


In [9]:
txt = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

### GEITje

In [None]:
SHORT_MODEL_NAME = 'GEITje'
PROMPT = pt.zeroshot_prompt_geitje
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 200
BACK_THRESHOLD = 0

if PROMPT==pt.zeroshot_prompt_geitje:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_no_template or PROMPT == pt.fewshot_prompt_with_template:
    NUMBER_EXAMPLES = 2



#### Load model - In-context learning
Note - ONLY load one model: either in-context or fine-tuning

In [None]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='Rijgersberg/GEITje-7B-chat-v2',
                    device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'GEITje-7B-chat-v2'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'



#### Load model - Finetuning

In [None]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='FemkeBakker/AmsterdamDocClassificationGEITje200T1Epochs',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'AmsterdamDocClassificationGEITje200T1Epochs'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'
EPOCHS = 1

#### Set-up paths to save predictions

In [None]:
import os

if SPLIT_COLUMN == '4split' or SPLIT_COLUMN == '2split':
    OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/overview.pkl"
    PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/predictions.pkl"
    
elif SPLIT_COLUMN == 'balanced_split':
    if SUBFOLDER == 'finetuning':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{EPOCHS}epochs/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{EPOCHS}epochs/{SHORT_MODEL_NAME}First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

    elif SUBFOLDER == 'in_context':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

print(OVERVIEW_PATH)
print(PREDICTION_PATH)

if not os.path.isdir(os.path.dirname(os.path.abspath(OVERVIEW_PATH))):
    raise ValueError("Folder to OVERVIEW_PATH does not exist") 
if not os.path.isdir(os.path.dirname(os.path.abspath(PREDICTION_PATH))):
    raise ValueError("Folder to PREDICTION_PATH does not exist") 

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print ('\n', run_id)


#### Run experiment

In [None]:
# ----- EXPERIMENT --------
# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)


# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_geitje, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


In [None]:
pred = pd.read_pickle(OVERVIEW_PATH)
display(pred)

### Llama


In [20]:
SHORT_MODEL_NAME = 'Llama'
PROMPT = pt.fewshot_prompt_with_template
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 200
BACK_THRESHOLD = 0

if PROMPT==pt.zeroshot_prompt_mistral_llama:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_with_template or PROMPT == pt.fewshot_prompt_no_template:
    NUMBER_EXAMPLES = 2



#### Load model - In-context learning
Note - ONLY load one model: either in-context or fine-tuning

In [21]:
from transformers import pipeline, Conversation

chatbot_llama = pipeline(task='conversational', model='meta-llama/Llama-2-7b-chat-hf',
                   device_map='cpu', model_kwargs={'offload_buffers':True})
# load llama using cpu, else will give cuda out of memory error when running fewshot bm25 prompt.

MODEL_NAME = 'Llama-2-7b-chat-hf'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'



Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

#### Load model - finetuning

In [10]:
from transformers import pipeline, Conversation

chatbot_llama = pipeline(task='conversational', model='FemkeBakker/AmsterdamDocClassificationLlama200T3Epochs',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'AmsterdamDocClassificationLlama200T1Epochs'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'
EPOCHS = 3



config.json:   0%|          | 0.00/732 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/183 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.76k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

#### Set-up paths to save predictions

In [22]:
import os

if SPLIT_COLUMN == '4split' or SPLIT_COLUMN == '2split':
    OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/overview.pkl"
    PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/predictions.pkl"
    
elif SPLIT_COLUMN == 'balanced_split':
    if SUBFOLDER == 'finetuning':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{EPOCHS}epochs/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{EPOCHS}epochs/{SHORT_MODEL_NAME}First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

    elif SUBFOLDER == 'in_context':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

print(OVERVIEW_PATH)
print(PREDICTION_PATH)

if not os.path.isdir(os.path.dirname(os.path.abspath(OVERVIEW_PATH))):
    raise ValueError("Folder to OVERVIEW_PATH does not exist") 
if not os.path.isdir(os.path.dirname(os.path.abspath(PREDICTION_PATH))):
    raise ValueError("Folder to PREDICTION_PATH does not exist") 

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print ('\n', run_id)


/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/predictionsFinal/in_context/Llama/overview.pkl
/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/predictionsFinal/in_context/Llama/fewshot_prompt_with_template/First200Last0Predictions.pkl

 IC_Llama-2-7b-chat-hffewshot_prompt_with_templateLlamaTokens200_0traintest_numEx2


#### Run experiment

In [24]:
# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)

# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_llama, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


Run-id already known, resuming predictions...
Starting...0:10 out of 930
label:  factsheet
response:   Based on the provided examples and the document that needs to be categorized, I would classify it under the category of "Onderzoeksrapport".

Here is the output in JSON format:

{
"categorie": "Onderzoeksrapport"
}

Explanation:

The document provides an overview of the development of the social assistance budget in Amsterdam, including the number of recipients and the amount of outlays. It also presents data on the number of applications and the proportion of unprocessed applications. The document is written in a formal and structured tone, indicating that it is a research report rather than a brief or a letter. Additionally, the document includes tables and figures, which are commonly used in research reports to present data and analysis.
prediction: onderzoeksrapport
label:  agenda
response:   Sure, I can help you classify the document. Based on the provided examples and the conten

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.7833333333333333
Starting...10:20 out of 930
label:  raadsadres
response:   Based on the provided examples and the category list, I would categorize the third document as:

{'categorie': 'Raadsadres'}

The document is a formal letter addressed to the Gemeente Amsterdam, requesting attention for a specific issue regarding the protection and support of the green edges in the Burgwallengebied. It contains the name, email address, and address of the sender, as well as a brief message. The document is written in a formal and professional tone, and it appears to be a standard raadsadres (council address) format.
prediction: raadsadres
label:  voordracht
response:   Sure! Based on the provided documents and the category options you provided, I can categorize the document VN2023-023417 Raadscommissie voor Ruimtelijke Ordening en Grond en Ontwikkeling RO. Here is the output in JSON format:

{
"categorie": "Voordracht"
}

Explanation:

Based on 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.7894736842105263
Starting...20:30 out of 930
label:  factsheet
response:   Sure! Based on the provided examples and the information provided in the document, I would categorize the document as:

{'categorie': 'Factsheet'}

The document provides a summary of the Regiobeeld Amsterdam Vitaal & Gezond, including its content and focus areas, and provides information on the preventive health care in the city of Amsterdam. It is a factual document that provides a brief overview of the topic, making it a good fit for the 'Factsheet' category.
prediction: factsheet
label:  brief
response:   Sure, I can help you with that! Based on the provided examples and the information provided in the question, I would categorize the third document as follows:

{ 'categorie': 'Brief' }

The document appears to be a brief informing the members of the city council of Amsterdam about an errata note regarding the answer to the city center development project Osd

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.79
Starting...30:40 out of 930
label:  raadsadres
response:   Sure, I can help you categorize the document! Based on the provided examples and the list of categories, I would categorize the given document as:

{'categorie': 'Raadsadres'}

The document appears to be a letter addressed to the municipal council, requesting them to take action regarding the problem of Zwarte Piet. It mentions that there is a growing understanding among Amsterdammers that it is not appropriate to present Zwarte Piet as he has been depicted for generations, and that there is a need to address this issue. The document also mentions that 6% of the population wants to abolish Zwarte Piet completely, while 8% wants to keep him but in a different form.

Therefore, based on the content and tone of the document, it seems to fit best into the category of a 'Raadsadres' (Council Address).
prediction: raadsadres
label:  motie
response:   Sure, I can help you categoriz

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.780952380952381
Starting...40:50 out of 930
label:  schriftelijke vraag
response:   Sure! Based on the provided examples and the document that needs to be categorized, I can classify it as follows:

Document: x Gemeente Amsterdam R
Gemeenteraad
% Gemeenteblad
% Schriftelijke vragen
Jaar 2019
Afdeling 1
Nummer 427
Datum indiening 7 maart 2019
Datum akkoord 17 april 2019
Publicatiedatum 18 april 2019
Onderwerp
Beantwoording schriftelijke vragen van het lid El Ksaihi inzake de oproep van ziekenhuizen voor een rookvrije omgeving rondom ziekenhuizen.
Aan de gemeenteraad
Toelichting door vragenstelster: De Amsterdamse nieuwszender AT5 berichtte op 7 maart 2019 dat ziekenhuizen wethouder Kukenheim oproepen.

Output: {'categorie': 'Schriftelijke Vraag'}
prediction: schriftelijke vraag


In [23]:
pred = pd.read_pickle(OVERVIEW_PATH)
display(pred)

Unnamed: 0,model,run_id,date,train_set,test_set,train_set_support,test_set_support,split_col,text_col,runtime,accuracy,macro_avg_precision,macro_avg_recall,macro_avg_f1,classification_report,weighted_avg_precision,weighted_avg_recall,weighted_avg_f1
0,Llama-2-7b-chat-hf,IC_Llama-2-7b-chat-hfzeroshot_prompt_mistral_l...,2024-05-22 21:29:48.627017+02:00,train,test,9900,1100,balanced_split,TruncationLlamaTokensFront100Back0,140223.734147,0.422727,0.491578,0.31,0.287423,precision...,0.670333,0.422727,0.39194
0,Llama-2-7b-chat-hf,IC_Llama-2-7b-chat-hfzeroshot_prompt_mistral_l...,2024-05-27 17:53:30.065278+02:00,train,test,9900,1100,balanced_split,TruncationLlamaTokensFront100Back100,164152.727557,0.425455,0.543238,0.334286,0.321521,precision ...,0.691394,0.425455,0.409208
0,Llama-2-7b-chat-hf,IC_Llama-2-7b-chat-hfzeroshot_prompt_mistral_l...,2024-05-26 21:20:27.966620+02:00,train,test,9900,1100,balanced_split,TruncationLlamaTokensFront200Back0,132040.014386,0.474545,0.537874,0.372857,0.34995,precision ...,0.684567,0.474545,0.44539
0,Llama-2-7b-chat-hf,IC_Llama-2-7b-chat-hffewshot_prompt_with_templ...,2024-06-04 06:20:53.503020+02:00,train,test,9900,170,balanced_split,TruncationLlamaTokensFront200Back0,38531.901579,0.776471,0.706896,0.634042,0.650321,precision...,0.909212,0.776471,0.809418


### Mistral

In [14]:
SHORT_MODEL_NAME = 'Mistral'
PROMPT = pt.zeroshot_prompt_mistral_llama
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 200
BACK_THRESHOLD = 0

if PROMPT==pt.zeroshot_prompt_mistral_llama:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_with_template or PROMPT == pt.fewshot_prompt_no_template:
    NUMBER_EXAMPLES = 2



#### Load model - In-context learning
Note - ONLY load one model: either in-context or fine-tuning

In [10]:
from transformers import pipeline, Conversation

chatbot_mistral = pipeline(task='conversational', model='mistralai/Mistral-7B-Instruct-v0.2',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'Mistral-7B-Instruct-v0.2'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'




Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

#### Load model - finetuning

In [18]:
from transformers import pipeline, Conversation

chatbot_mistral = pipeline(task='conversational', model='FemkeBakker/AmsterdamDocClassificationMistral200T2Epochs',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'AmsterdamDocClassificationMistral200T2Epochs'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'
EPOCHS = 2



Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

#### Set-up paths to save predictions

In [16]:
import os

if SPLIT_COLUMN == '4split' or SPLIT_COLUMN == '2split':
    OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/overview.pkl"
    PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/predictions.pkl"
    
elif SPLIT_COLUMN == 'balanced_split':
    if SUBFOLDER == 'finetuning':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{EPOCHS}epochs/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{EPOCHS}epochs/{SHORT_MODEL_NAME}First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

    elif SUBFOLDER == 'in_context':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

print(OVERVIEW_PATH)
print(PREDICTION_PATH)

if not os.path.isdir(os.path.dirname(os.path.abspath(OVERVIEW_PATH))):
    raise ValueError("Folder to OVERVIEW_PATH does not exist") 
if not os.path.isdir(os.path.dirname(os.path.abspath(PREDICTION_PATH))):
    raise ValueError("Folder to PREDICTION_PATH does not exist") 

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print ('\n', run_id)


/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/predictionsFinal/finetuning/2epochs/overview.pkl
/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/predictionsFinal/finetuning/2epochs/MistralFirst200Last0Predictions.pkl

 FT_AmsterdamDocClassificationMistral200T2Epochszeroshot_prompt_mistral_llamaLlamaTokens200_0traintest_numEx0


#### Run experiment

In [19]:
# run experiment

# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)

# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_mistral, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


Run-id already known, resuming predictions...
Starting...0:10 out of 160
label:  factsheet
response:  "factsheet": Factsheet}
prediction: NoPredictionFormat
label:  agenda
response:  "agenda": Actualiteit, Raadsadres, Schriftelijke Vraag, Besluit, Voordracht, Motie, Factsheet, Onderzoeksrapport, Raadsnotulen, Agenda
"actualiteit": 1
"raadsadres": 2
"schriftelijke vragen": 3
"besluit": 4
"voordracht": 5
"motie": 6
"factsheet": 7
"onderzoeksrapport": 8
"raadsnotulen": 9
"agenda": 10
}
}
"agenda": {
"type": Agenda,
"punten": [
{
"nummer": 1,
"onderwerp": Opening en mededelingen
},
{
"nummer": 1a,
"onderwerp": Vaststelling van de agenda
},
{
"nummer": 2,
"onderwerp": Actualiteit inzake het verzoek van het Dagelijks Bestuur aan de
Stadsdeelraad om het
prediction: MultiplePredictionErrorInFormatting
label:  actualiteit
response:  'categorie': Actualiteit}
prediction: NoPredictionFormat
label:  voordracht
response:  "categorie": Voordracht}
prediction: NoPredictionFormat
label:  besluit
respo

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05263157894736842
Starting...10:20 out of 160
label:  brief
response:  "categorie": Brief}
prediction: NoPredictionFormat
label:  raadsadres
response:  'categorie': Raadsadres}
prediction: NoPredictionFormat
label:  factsheet
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  'categorie': Schriftelijke Vraag}
prediction: NoPredictionFormat
label:  brief
response:  "Brief"
}
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  'categorie': Onderzoeksrapport}
prediction: NoPredictionFormat
label:  raadsnotulen
response:  "raadsnotulen": {
"categorie": Raadsnotulen
}
prediction: raadsnotulen
label:  agenda
response:  "agenda": Agenda, woensdag 18 november 2015
"categorie": Agenda}
prediction: NoPredictionFormat
label:  raadsadres
response:  'categorie': Raadsadres}
prediction: NoPredictionFormat
label:  brief
response:  "categorie": Brief}
prediction: NoPredict

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.053125
Starting...20:30 out of 160
label:  raadsadres
response:  'categorie': Raadsadres}
prediction: NoPredictionFormat
label:  actualiteit
response:  "categorie": Raadsadres}
prediction: NoPredictionFormat
label:  agenda
response:  "agenda": {
"categorie": Agenda
}
}
prediction: agenda
label:  factsheet
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  agenda
response:  "agenda": Agenda, 5 juli 2023

"categorie": Agenda}
prediction: NoPredictionFormat
label:  voordracht
response:  "categorie": Voordracht}
prediction: NoPredictionFormat
label:  voordracht
response:  "categorie": Voordracht}
prediction: NoPredictionFormat
label:  motie
response:  'categorie': Motie}
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  besluit
response:  "categorie": Besluit}
prediction: NoPredictionFormat
Dont interrupt, saving predicti

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05360824742268041
Starting...30:40 out of 160
label:  schriftelijke vraag
response:  'categorie': Schriftelijke Vraag}
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  besluit
response:  'categorie': Besluit}
prediction: NoPredictionFormat
label:  besluit
response:  'categorie': Besluit}
prediction: NoPredictionFormat
label:  factsheet
response:  "categorie": Factsheet}
prediction: NoPredictionFormat
label:  raadsadres
response:  'categorie': Raadsadres}
prediction: NoPredictionFormat
label:  actualiteit
response:  "actualiteit": {
"categorie": Actualiteit
}
}
prediction: actualiteit
label:  motie
response:  'categorie': Motie}
prediction: NoPredictionFormat
label:  motie
response:  "categorie": Motie}
prediction: NoPredictionFormat
label:  besluit
response:  'categorie': Besluit}
prediction: NoPredictionFormat
Dont interrupt, saving predictions..

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05408163265306123
Starting...40:50 out of 160
label:  agenda
response:  "agenda": Agenda
} "type": Agenda
} "items": [
{
"date": "6-jan-2021",
"time": "19:30",
"locatie": "Virtueel",
"name": "Presentatie Aanpak bijplaatsingen",
"description": "Presentatie Aanpak bijplaatsingen"
},
{
"date": "6-jan-2021",
"time": "20:00",
"locatie": "Virtueel",
"name": "Agenderingsvoorstel ‘gekapte bomen terug in Noord’",
"description": "Agenderingsvoorstel ‘gekapte bomen terug in Noord’ (uitgesteld in SDC 2 december 2020)"
},
{
"date": "6-jan-2021",
"time": "20:30",
"locatie": "Virtueel",
"name": "Adviesaanvraag Buiteneiland",
"description": "
prediction: MultiplePredictionErrorInFormatting
label:  voordracht
response:  'categorie': Voordracht}
prediction: NoPredictionFormat
label:  raadsnotulen
response:  "raadsnotulen": {
"vergaderdatum": "10 maart 2021",
"publicatiedatum": "24 maart 2021",
"openingstijd": "13:00 uur",
"mededelingen": "De voorzitter 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05353535353535353
Starting...50:60 out of 160
label:  motie
response:  "categorie": Motie}
prediction: NoPredictionFormat
label:  agenda
response:  "agenda": Agenda

"type": Agenda
}
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  raadsadres
response:  'categorie': Raadsadres}
prediction: NoPredictionFormat
label:  raadsnotulen
response:  "categorie": Raadsnotulen}
prediction: NoPredictionFormat
label:  motie
response:  'categorie': Motie}
prediction: NoPredictionFormat
label:  agenda
response:  "agenda": {
"categorie": Agenda
}
}
prediction: agenda
label:  besluit
response:  "categorie": Besluit
}
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  'categorie': Schriftelijke Vraag}
prediction: NoPredictionFormat
Dont interrupt, s

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.054
Starting...60:70 out of 160
label:  factsheet
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  actualiteit
response:  'categorie': Actualiteit}
prediction: NoPredictionFormat
label:  voordracht
response:  "categorie": Voordracht}
prediction: NoPredictionFormat
label:  factsheet
response:  "factsheet": Factsheet bevolkingscijfers Noord A'dam, juli 2017
}
prediction: NoPredictionFormat
label:  agenda
response:  "agenda": {
"categorie": Agenda
} }
prediction: agenda
label:  voordracht
response:  'categorie': Voordracht}
prediction: NoPredictionFormat
label:  besluit
response:  "categorie": Besluit}
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  'categorie': Schriftelijke Vraag}
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
predi

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.054455445544554455
Starting...70:80 out of 160
label:  actualiteit
response:  'categorie': Actualiteit}
prediction: NoPredictionFormat
label:  brief
response:  'categorie': Brief}
prediction: NoPredictionFormat
label:  motie
response:  'categorie': Motie}
prediction: NoPredictionFormat
label:  motie
response:  'categorie': Motie}
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  'categorie': Schriftelijke Vraag}
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  'categorie': Schriftelijke Vraag}
prediction: NoPredictionFormat
label:  brief
response:  "categorie": Brief}
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  motie
response:  'categorie': Motie}
prediction: NoPredictionFormat
label:  raadsnotulen
response:  "categorie": Raadsnotulen }
prediction: NoPredictionFormat
Dont interrupt, saving predictions.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05392156862745098
Starting...80:90 out of 160
label:  raadsnotulen
response:  "raadsnotulen": {
"categorie": Raadsnotulen
}
prediction: raadsnotulen
label:  besluit
response:  'categorie': Besluit}
prediction: NoPredictionFormat
label:  factsheet
response:  "factsheet": Factsheet
}
}
prediction: NoPredictionFormat
label:  motie
response:  'categorie': Motie}
prediction: NoPredictionFormat


Bad pipe message: %s [b"\xe6\xdb/\xba\x97?T\x92\xd5\xe9\xc5\xad\xe0nedx\xf6\x00\x00|\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc\xa8\xcc\xaa\xc0\xaf\xc0\xad\xc0\xa3\xc0\x9f\xc0]\xc0a\xc0W\xc0S\xc0+\xc0/\x00\xa2\x00\x9e\xc0\xae\xc0\xac\xc0\xa2\xc0\x9e\xc0\\\xc0`\xc0V\xc0R\xc0$\xc0(\x00k\x00j\xc0#\xc0'\x00g\x00@\xc0\n\xc0\x14\x009\x008\xc0\t\xc0\x13\x003\x002\x00\x9d\xc0\xa1\xc0\x9d\xc0Q\x00\x9c\xc0\xa0\xc0\x9c\xc0P\x00=\x00<\x005\x00/\x00\x9a\x00\x99\xc0"]
Bad pipe message: %s [b'\x11\x00\x96\x00\x05\x00']
Bad pipe message: %s [b'\xc0\x1f08\x81\x1d\xf0\x13\x95\xf0\xde\x97<\x90DTTs\x00\x00\xa2\xc0\x14\xc0\n\x009\x008\x007\x006\x00\x88\x00\x87\x00\x86\x00\x85\xc0\x19\x00:']
Bad pipe message: %s [b'\x9c)\xf6\xcf\xe0\x97\xd9\x98\x92\x14\x1e\xc3\xe2\xfd\xf3\xc7\x01\x8c\x00\x00\xa2\xc0\x14\xc0\n\x009\x008\x007\x006\x00\x88\x00\x87\x00\x86\x00\x85\xc0\x19\x00:\x00\x89\xc0\x0f\xc0\x05\x005\x00\x84\xc0\x13\xc0\t\x003\x002\x001\x000\x00\x9a\x00\x99\x00\x98\x00\x97\x00E\x00D\x00C\x00B\xc0\x18\x004\x00\x

label:  agenda
response:  "agenda": {
"type": "Agenda",
"commissie": "BWK",
"ruimte": "Ruimte voor de commissie",
"start": "09.00",
"end": "12.30",
"blokA": {
"nummer": 1,
"start": "09.00",
"end": "12.30",
"onderwerp": "Openbare vergadering van de Raadscommissie BWK",
"tekst": "U wordt uitgenodigd voor de openbare vergadering van de Raadscommissie BWK. De
vergadering vindt plaats in de Boekmanzaal, stadhuis. De commissie vergadert van 09.00
tot 12.30 uur. De vergadering is openbaar en vrij te bezoeken. De commissie vergadert
als volgt:
- Opening
- Mededelingen
- Vaststellen agenda
- _Inspreekhalfuur
prediction: NoPredictionFormat
label:  actualiteit
response:  "categorie": Actualiteit}
prediction: NoPredictionFormat
label:  besluit
response:  'categorie': Besluit}
prediction: NoPredictionFormat
label:  motie
response:  'categorie': Motie}
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  'categorie': Schriftelijke Vraag}
prediction: NoPredictionFormat
label:  motie


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05436893203883495
Starting...90:100 out of 160
label:  voordracht
response:  "categorie": Voordracht}
prediction: NoPredictionFormat
label:  besluit
response:  "categorie": Besluit}
prediction: NoPredictionFormat
label:  brief
response:  "categorie": Brief}
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  'categorie': Schriftelijke Vraag}
prediction: NoPredictionFormat
label:  raadsnotulen
response:  "raadsnotulen": {
"categorie": Raadsnotulen
}
prediction: raadsnotulen
label:  raadsadres
response:  'categorie': Raadsadres}
prediction: NoPredictionFormat
label:  raadsnotulen
response:  "categorie": Raadsnotulen }
prediction: NoPredictionFormat
label:  agenda
response:  'agenda': Agenda}
prediction: NoPredictionFormat
label:  besluit
response:  "categorie": Besluit |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05480769230769231
Starting...100:110 out of 160
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  brief
response:  "categorie": Brief}
prediction: NoPredictionFormat
label:  actualiteit
response:  'categorie': Actualiteit}
prediction: NoPredictionFormat
label:  agenda
response:  "agenda": Agenda
} "programma": Programma
} "soort": Actualiteit
} "datum in Cie": 13-Mei-15
} "opmerkingen t.b.v. de commissie": Extra voorbereidingstijd nodig voor kwalificatie activiteiten
} "datum in Cie": 13-Mei-15
} "opmerkingen t.b.v. de commissie": Opdracht tot uitvoering
} "programma": Programma
} "soort": Besluit
} "datum in Cie": 13-Mei-15
} "opmerkingen t.b.v. de commissie": Nvt.
} "programma": Programma
} "soort": Schriftelijke Vraag
} "datum in Cie": 13-Mei-15
} "opmerkingen t.b.v. de commissie": Nvt.
} "programma": Programma
} "soort": Besluit
} "datum
prediction: NoPredictionFormat
label:

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05523809523809524
Starting...110:120 out of 160
label:  raadsnotulen
response:  "Raadsnotulen": {
"categorie": Raadsnotulen
}
prediction: raadsnotulen
label:  raadsnotulen
response:  "categorie": Raadsnotulen }
prediction: NoPredictionFormat
label:  raadsadres
response:  "categorie": Raadsadres}
prediction: NoPredictionFormat
label:  factsheet
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  'categorie': Schriftelijke Vraag}
prediction: NoPredictionFormat
label:  actualiteit
response:  'categorie': Actualiteit}
prediction: NoPredictionFormat
label:  besluit
response:  "categorie": Besluit}
prediction: NoPredictionFormat
label:  raadsnotulen
response:  "raadsnotulen": {
"categorie": Raadsnotulen
}
prediction: raadsnotulen
label:  agenda
response:  "agenda": Agenda,
"commissie": Commissie,
"datum": 2011-09-06,
"titel": Termijnagenda Commissie Samenleving tbv 6 september 2011

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05660377358490566
Starting...120:130 out of 160
label:  brief
response:  "categorie": Brief}
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  factsheet
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  brief
response:  "categorie": Brief}
prediction: NoPredictionFormat
label:  voordracht
response:  "categorie": Voordracht}
prediction: NoPredictionFormat
label:  actualiteit
response:  'categorie': Actualiteit}
prediction: NoPredictionFormat
label:  motie
response:  "categorie": Motie}
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  raadsnotulen
response:  "raadsnotulen": {
"categorie": Raadsnotulen
}
prediction: raadsnotulen
label:  motie
response:  'categorie': Motie}
prediction: NoPredictionFormat
Dont interrupt, saving predi

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05700934579439252
Starting...130:140 out of 160
label:  voordracht
response:  "categorie": Voordracht}
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  raadsadres
response:  'categorie': Raadsadres}
prediction: NoPredictionFormat
label:  factsheet
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  factsheet
response:  "factsheet": Factsheet Economische ontwikkelingen Amsterdam, augustus 2020

"indicatoren":

* _ Gross Domestic Product (GDP) per kwartaal
* _ Beroepsbeschikkingen
* _ Consumentenvertrouwen
* _ Zelfstandigenindex
* _ Woningbouw
* _ Erfpacht
* _ Leningen
* _ Inkomens
* _ Inflatie
* _ Bedrijfsruimte
* _ Toerisme
* _ Bezoekersaantallen
* _ Overnachtingsbezoek
* _ Conferentiebranche
* _ Deelnemingen
* _ Personeel en organisatie
* _ Dienstverlening
* _ ICT
* _ Duurzaamheid

"alternatieve indicatoren":

* _ Go

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05740740740740741
Starting...140:150 out of 160
label:  motie
response:  'categorie': Motie}
prediction: NoPredictionFormat
label:  agenda
response:  "agenda": Agenda

"commissie": Commissie

"griffie": Griffie

"mw": Mevrouw

"postbus": Postbus

"raad": Raad

"telefoon": Telefoon

"wmo": Wmo

"zw": Zorg

"zuid": Zuid

"raadscommissie": {

"welzijn": {

"zorg": {

"onderwijs": {

"sport": {

"kunst en cultuur": {

"maatschappelijke dienstverlening": {

"voorzitter": {

"griffier": {

"contactpersoon": {

"raadscommissie": {

"welzijn": {

"zorg": {

"onderwijs": {

"sport": {

"kunst en cultuur": {

"maatschappelijke dienstverlening": {

"raadscommissie": {

"
prediction: NoPredictionFormat
label:  onderzoeksrapport
response:  "categorie": Onderzoeksrapport}
prediction: NoPredictionFormat
label:  brief
response:  ' categorie': Brief}
prediction: NoPredictionFormat
label:  voordracht
response:  "categorie": Voordracht}
prediction: NoPre

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.05779816513761468
Starting...last 10 docs
label:  besluit
response:  ' categorie': Besluit}
prediction: NoPredictionFormat
label:  motie
response:  'categorie': Motie}
prediction: NoPredictionFormat
label:  brief
response:  "categorie": Brief}
prediction: NoPredictionFormat
label:  voordracht
response:  "categorie": Voordracht}
prediction: NoPredictionFormat
label:  schriftelijke vraag
response:  "categorie": Actualiteit}
prediction: NoPredictionFormat
label:  agenda
response:  "agenda": Agenda voor de vergadering van de stadsdeelraad Zuidoost op 30 mei 2006

"1": Opening en Mededelingen

"1a": Vaststelling van de agenda

"2": Verslag vergadering 11 april 2006

"3": Toezeggingen en termijnagenda

"4": Vragenhalfuur publiek

"5": Actualiteiten

"6": Stem van Zuidoost

"7": Vragen aan het DB

"8": Kijk op de wijk

"9": Sluiting

"10": Termijnagenda (t.b.v. raadsgriffie)

"11": Voorjaarsnota 2006 (t.b.v. raadsgriffie)

"12": Voorjaarsnota

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Accuracy:  0.057272727272727274


In [17]:
pred = pd.read_pickle(OVERVIEW_PATH)
display(pred)


Unnamed: 0,model,run_id,date,train_set,test_set,train_set_support,test_set_support,split_col,text_col,runtime,accuracy,macro_avg_precision,macro_avg_recall,macro_avg_f1,weighted_avg_precision,weighted_avg_recall,weighted_avg_f1,classification_report
0,AmsterdamDocClassificationGEITje200T,FT_AmsterdamDocClassificationGEITje200Tzerosho...,2024-05-31 17:00:16.806328+02:00,train,test,9900,1100,balanced_split,TruncationLlamaTokensFront200Back0,23948.996279,0.898182,0.92641,0.898182,0.891699,0.92641,0.898182,0.891699,precision recall f1-s...
1,AmsterdamDocClassificationMistral200T,OLDFT_AmsterdamDocClassificationMistral200Tzer...,2024-05-31 18:41:44.412997+02:00,train,test,9900,200,balanced_split,TruncationLlamaTokensFront200Back0,4586.422368,0.0,0.0,0.0,0.0,0.0,0.0,0.0,precision recall f1-s...
2,AmsterdamDocClassificationLlama200T,FT_AmsterdamDocClassificationLlama200Tzeroshot...,2024-05-31 16:53:49.451759+02:00,train,test,9900,1100,balanced_split,TruncationLlamaTokensFront200Back0,21965.290097,0.864545,0.83216,0.7925,0.780662,0.907811,0.864545,0.851631,precision recall f1-...
0,AmsterdamDocClassificationMistral200T,FT_AmsterdamDocClassificationMistral200Tzerosh...,2024-06-04 16:09:46.493691+02:00,train,test,9900,80,balanced_split,TruncationLlamaTokensFront200Back0,4198.556982,0.0375,0.083333,0.035714,0.05,0.0875,0.0375,0.0525,precision recall f1-s...
0,AmsterdamDocClassificationMistral200T2Epochs,FT_AmsterdamDocClassificationMistral200T2Epoch...,2024-06-05 07:31:54.724291+02:00,train,test,9900,940,balanced_split,TruncationLlamaTokensFront200Back0,49201.589993,0.052128,0.32684,0.042827,0.065659,0.411573,0.052128,0.080087,precision...
