In [None]:
!bash /home/azureuser/cloudfiles/code/blobfuse/blobfuse_raadsinformatie.sh

In [16]:
import sys
sys.path.append("..")

# Select where to run notebook: "azure" or "local"
my_run = "azure"

# import my_secrets as sc
# import settings as st

if my_run == "azure":
    import config_azure as cf
elif my_run == "local":
    import config as cf


import os
if my_run == "azure":
    if not os.path.exists(cf.HUGGING_CACHE):
        os.mkdir(cf.HUGGING_CACHE)
    os.environ["TRANSFORMERS_CACHE"] = cf.HUGGING_CACHE

# set-up environment - GEITje-7b-chat InContextLearning:
# - install blobfuse -> sudo apt-get install blobfuse
# - pip install transformers
# - pip install torch
# - pip install accelerate
# - pip install jupyter
# - pip install ipywidgets

## Notebook overview
- Goal: Run experiment for InContext Learning GEITje
- Trial run model -> prompt GEITje using, example prompt
- Zeroshot prompts
- Fewshot prompts

Load data and functions:
- data is already split
- text is already converted to tokens using model tokenizer 

In [17]:
import pandas as pd
# df = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

import sys
sys.path.append('../scripts/') 
import prompt_template as pt
import prediction_helperfunctions as ph
import truncation as tf


In [18]:
import torch
torch.cuda.empty_cache()

#### Trial run Models 
Code to run the models with a simple prompt.

In [None]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='Rijgersberg/GEITje-7B-chat-v2',
                   device_map='auto', model_kwargs={'offload_buffers':True})


chatbot_llama = pipeline(task='conversational', model='meta-llama/Llama-2-7b-chat-hf',
                   device_map='auto', model_kwargs={'offload_buffers':True})

chatbot_mistral = pipeline(task='conversational', model='mistralai/Mistral-7B-Instruct-v0.2',
                   device_map='auto', model_kwargs={'offload_buffers':True})

## EXAMPLE PROMPT
# print(chatbot(
    # Conversation('Welk woord hoort er niet in dit rijtje thuis: "auto, vliegtuig, geitje, bus"?')
# ))

#### Experiment functions
Prompt GEITje for each document and save the prediction, return response, response time and the prompt version

Code structure:
- 2 functions/cells:
- predictions_incontextlearning -> given a df with docs that need to be predicted, prompt the model
- run the experiment -> built in failsaves (df run in parts, with saves in between)

In [19]:
import time
import os
import pandas as pd
from bm25 import BM25


""" Given a dataframe with txt, return a df with predictions """
# docs_df = dataframe with the documents that need to be predicted
# text_column = name of the column that includes the input_text. Can be different based on the text representation method. 
# prompt_function = prompt template 
# train_df = dataframe with docs, which can be used as examples/training data/context data
# num_examples = number of examples in the prompt

def predictions_incontextlearning(chatbot, docs_df, text_column, prompt_function, train_df, num_examples):
    results_df = pd.DataFrame(columns = ['id', 'path', 'text_column', 'prompt_function', 'response', 'prediction', 'label', 'runtime', 'date', 'prompt'])


    if prompt_function == pt.fewshot_prompt_bm25:
        BM25_model = BM25()
        BM25_model.fit(train_df[text_column])
   

    # prompt each document
    for index, row in docs_df.iterrows():
        # if (index + 1) % 200 == 0:
        #     print(f"Iteration {index +1}/{len(docs_df)} completed.")

        start_time = time.time()

        # get the prompt, with the doc filled in
        txt = row[text_column]

        # each prompt function takes different arguments
        # zeroshot prompt for geitje
        if prompt_function == pt.zeroshot_prompt_geitje:
            prompt = prompt_function(txt)

        # zeroshot function for mistral and llama
        elif prompt_function == pt.zeroshot_prompt_mistral_llama:
            prompt = prompt_function(txt)

        # select fewshot examples using bm25, fewshot is the same for all models
        elif prompt_function == pt.fewshot_prompt_bm25:
            prompt = prompt_function(txt, train_df, num_examples, text_column, BM25_model)

        else:
            raise ValueError("Prompt function not recognised. Check if prompt function is in prompt_template.py and included in the options above.")

        # prompt and get the response
        # print(prompt)
        converse = chatbot(Conversation(prompt))
        response = converse[1]['content']
        print("label: ", row['label'].lower())
        print("response: ", response)

        # extract prediction from response
        prediction = ph.get_prediction_from_response(response)
        print("prediction:", prediction)

        # save results in dataframe
        results_df.loc[len(results_df)] = {
            'id': row['id'],
            'path' : row['path'],
            'text_column' : docs_df.iloc[0]['trunc_col'],
            'prompt_function': ph.get_promptfunction_name(prompt_function),
            'response':response,
            'prediction':prediction,
            'label':row['label'].lower(),
            'runtime':time.time()-start_time,
            'date': ph.get_datetime(),
            'prompt':prompt
        }
    return results_df



In [20]:
import os
import time
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

"""
Function to run GEITje In-Context Learning experiment. 
The function allows to resume experiment, if run_id matches.
"""
# df = dataframe with all docs that need to have a prediction (docs still need to be predict + already predicted)
# run_id = unqiue for each experiment. 
# prompt_function = which prompt from prompt_template.py to use
# text_col = colum in df where the text is. (Needs to be already truncated)
# split_col = column with the dataset split. Either '2split' (train and test)or '4split'(train, test, dev and val)
# subset_train = indicates which subset to use as training. either 'train' or 'dev'
# subset_test = indicates which subset to use for testing. either 'test' or 'val'
# label_col = column with the true label
# prediction_path = path to file where predictions need to be saved.
# overview_path = path to file where results of each run need to be saved.
# model_name = name of the model. string.
# num_exmples = number of exaples given to prompt. zero in case of zeroshot. 

def run_experiment(chatbot, df, run_id, prompt_function, text_col, split_col, subset_train, subset_test, label_col, prediction_path, overview_path, model_name, num_examples=0):
    test_df = df.loc[df[split_col]==subset_test]
    train_df = df.loc[df[split_col]==subset_train]
    
    # get rows of df that still need to be predicted for the specific run_id
    to_predict, previous_predictions = ph.get_rows_to_predict(test_df, prediction_path, run_id)

    # devide to_predict into subsection of 50 predictions at a time. 
    # Allows to rerun without problem. And save subsections of 50 predictions.
    step_range = list(range(0, len(to_predict), 10))

    for i in range(len(step_range)):
        try:
            sub_to_predict = to_predict.iloc[step_range[i]:step_range[i+1]]
            print(f'Starting...{step_range[i]}:{step_range[i+1]} out of {len(to_predict)}')
        except Exception as e:
            sub_to_predict = to_predict[step_range[i]:]
            print(f'Starting...last {len(sub_to_predict)} docs')

        # prompt geitje
        predictions = predictions_incontextlearning(chatbot, sub_to_predict, text_col, prompt_function, train_df, num_examples)

        # save info
        predictions['run_id'] = run_id
        predictions['train_set'] = subset_train
        predictions['test_set'] = subset_test
        predictions['shots'] = num_examples

        # save new combinations in file
        print("Dont interrupt, saving predictions...")
        ph.combine_and_save_df(predictions, prediction_path)

        # if previous predictions, combine previous with new predictions, to get update classification report
        try:
            predictions = pd.concat([predictions, previous_predictions])

            # set previous predictions to all predictions made until now. Necessary for next loop
            previous_predictions = predictions
        except Exception as e:
            # set previous predictions to all predictions made until now. Necessary for next loop
            previous_predictions = predictions

        # save results in overview file
        date = ph.get_datetime()
        y_test = predictions['label']
        y_pred = predictions['prediction']
        report = classification_report(y_test, y_pred)

        overview = pd.DataFrame(
            [{
                'model':model_name,
                'run_id':run_id,
                'date': date,
                'train_set': subset_train,
                'test_set': subset_test,
                'train_set_support':len(df.loc[df[split_col]==subset_train]),
                'test_set_support':len(predictions),
                'split_col':split_col,
                'text_col':df.iloc[0]['trunc_col'],
                'runtime':sum(predictions['runtime']),
                'accuracy': accuracy_score(y_test, y_pred),
                'macro_avg_precision': precision_score(y_test, y_pred, average='macro'),
                'macro_avg_recall': recall_score(y_test, y_pred, average='macro'),
                'macro_avg_f1': f1_score(y_test, y_pred, average='macro'),
                'classification_report':report
            }   ]
        )
        # remove previous results of run_id, replace with new/updated results
        ph.replace_and_save_df(overview, overview_path, run_id)
        print("Saving done! Interrupting is allowed.")



Set up variables that are the same for each model

In [21]:
#set  variables, same for each model
TRAIN_SET = 'train' # must be dev or train
TEST_SET = 'test' # must be val or test
SPLIT_COLUMN = 'balanced_split' #must be either 2split or 4split. 2split = data split into train and test. 4split = data split into train, test, dev and val. 
LABEL_COLUMN = 'label'
TEXT_COLUMN = 'trunc_txt'


In [22]:
txt = pd.read_pickle(f"{cf.output_path}/txtfiles_tokenizer.pkl")

### GEITje

In [None]:
SHORT_MODEL_NAME = 'GEITje'
PROMPT = pt.zeroshot_prompt_geitje
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 100
BACK_THRESHOLD = 100

if PROMPT==pt.zeroshot_prompt_geitje:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_bm25:
    NUMBER_EXAMPLES = 2



#### Load model - In-context learning
Note - ONLY load one model: either in-context or fine-tuning

In [None]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='Rijgersberg/GEITje-7B-chat-v2',
                    device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'GEITje-7B-chat-v2'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'



#### Load model - Finetuning

In [None]:
from transformers import pipeline, Conversation

chatbot_geitje = pipeline(task='conversational', model='FemkeBakker/GEITjeSmallData200Tokens',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'GEITjeSmallData200Tokens'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'

#### Set-up paths to save predictions

In [None]:
import os

if SPLIT_COLUMN == '4split' or SPLIT_COLUMN == '2split':
    OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/overview.pkl"
    PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/predictions.pkl"
    
elif SPLIT_COLUMN == 'balanced_split':
    if SUBFOLDER == 'finetuning':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

    elif SUBFOLDER == 'in_context':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

print(OVERVIEW_PATH)
print(PREDICTION_PATH)

if not os.path.isdir(os.path.dirname(os.path.abspath(OVERVIEW_PATH))):
    raise ValueError("Folder to OVERVIEW_PATH does not exist") 
if not os.path.isdir(os.path.dirname(os.path.abspath(PREDICTION_PATH))):
    raise ValueError("Folder to PREDICTION_PATH does not exist") 

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print ('\n', run_id)


#### Run experiment

In [None]:
# ----- EXPERIMENT --------

# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)


# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_geitje, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


In [None]:
pred = pd.read_pickle(OVERVIEW_PATH)
# pred_run = pred.loc[pred['run_id']==f'{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}']
display(pred)

### Llama


In [23]:
SHORT_MODEL_NAME = 'Llama'
PROMPT = pt.zeroshot_prompt_mistral_llama
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 100
BACK_THRESHOLD = 100

if PROMPT==pt.zeroshot_prompt_mistral_llama:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_bm25:
    NUMBER_EXAMPLES = 2



#### Load model - In-context learning
Note - ONLY load one model: either in-context or fine-tuning

In [24]:
from transformers import pipeline, Conversation

chatbot_llama = pipeline(task='conversational', model='meta-llama/Llama-2-7b-chat-hf',
                   device_map='cpu', model_kwargs={'offload_buffers':True})
# load llama using cpu, else will give cuda out of memory error when running fewshot bm25 prompt.

MODEL_NAME = 'Llama-2-7b-chat-hf'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'



Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

#### Load model - finetuning

In [None]:
from transformers import pipeline, Conversation

# chatbot_llama = pipeline(task='conversational', model='FemkeBakker/LlamaSmallData200Tokens',
#                    device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'LlamaSmallData200Tokens'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'

#### Set-up paths to save predictions

In [25]:
import os

if SPLIT_COLUMN == '4split' or SPLIT_COLUMN == '2split':
    OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/overview.pkl"
    PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/predictions.pkl"
    
elif SPLIT_COLUMN == 'balanced_split':
    if SUBFOLDER == 'finetuning':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

    elif SUBFOLDER == 'in_context':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

print(OVERVIEW_PATH)
print(PREDICTION_PATH)

if not os.path.isdir(os.path.dirname(os.path.abspath(OVERVIEW_PATH))):
    raise ValueError("Folder to OVERVIEW_PATH does not exist") 
if not os.path.isdir(os.path.dirname(os.path.abspath(PREDICTION_PATH))):
    raise ValueError("Folder to PREDICTION_PATH does not exist") 

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print ('\n', run_id)

/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/predictionsFinal/in_context/Llama/overview.pkl
/home/azureuser/cloudfiles/code/blobfuse/raadsinformatie/processed_data/woo_document_classification/predictionsFinal/in_context/Llama/zeroshot_prompt_mistral_llama/First100Last100Predictions.pkl

 IC_Llama-2-7b-chat-hfzeroshot_prompt_mistral_llamaLlamaTokens100_100traintest_numEx0


#### Run experiment

In [28]:
# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)

# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_llama, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


Run-id already known, resuming predictions...
Starting...0:10 out of 700
label:  brief
response:   Sure, I can classify the document for you. Based on the content provided, I would categorize the 4 Gemeente Raadsinformatiebrief as follows:

{'categorie': 'Brief'}

The document appears to be a formal letter addressed to the members of the city council of Amsterdam, providing information on a specific topic related to the city's transportation and air quality. The content includes details on a route description, which suggests that the document is intended to inform and provide guidance to the council members on a particular issue. Therefore, the most appropriate category for this document is 'Brief'.
prediction: brief
label:  actualiteit
response:   Sure, I can classify the document for you. Based on the content provided, I would categorize the document as:

{'categorie': 'Agenda'}

The document appears to be an agenda for a municipal council meeting, outlining the items to be discussed

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...10:20 out of 700
label:  factsheet
response:   Based on the content of the document you provided, I would classify it as a 'Factsheet'.

Here is the output in the form of a JSON file:

{
"categorie": "Factsheet"
}

The reason for this classification is that the document appears to be providing information on a specific topic (cultural diversity in Amsterdam) in a concise and informative manner, which is a common purpose of factsheets. The document includes data and statistics related to the topic, and provides a brief overview of the information, which is also consistent with the format of a factsheet.
prediction: factsheet
label:  actualiteit
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Raadsvergadering' document. Here is the output in the form of a JSON file:

{
"categorie": "Raadsvergadering"
}

The reason for this classification is that the document appears to be a agenda and minutes 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...20:30 out of 700
label:  agenda
response:   Sure, I can classify the document for you. Based on the content of the document, I would classify it under the category of 'Agenda'.

Here is the output in the form of a JSON file:

{
"categorie": "Agenda"
}

The reason for this classification is that the document appears to be an agenda for a meeting of the Raadscommissie voor Ruimtelijke Ordening, Grondzaken, Waterbeheer en ICT in Amsterdam, with the date of the meeting being December 12, 2007. The document includes information on the topics to be discussed during the meeting, as well as instructions for speakers and attendees.
prediction: agenda
label:  factsheet
response:   Based on the content of the document you provided, I would classify it as a 'Voorlopig Rapport' (Dutch for 'Interim Report').

Here is the output in JSON format:

{
"categorie": "Voorlopig Rapport"
}

The reason for this classification is that the document appears to be 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...30:40 out of 700
label:  agenda
response:   Based on the content of the document provided, I would classify it as a 'Agenda'.

Here is the output in JSON format:

{
"categorie": "Agenda"
}
prediction: agenda
label:  voordracht
response:   Based on the content of the document you provided, I would classify it as a 'Voordracht' (Dutch for 'Speech').

Here is the output in the form of a JSON file:

{
"categorie": "Voordracht"
}

The reason for this classification is that the document contains a speech or presentation for the Gemeente Raadscommissie voor Ruimtelijke Ordening en Grond en Ontwikkeling (City Council Committee for Spatial Planning and Land Development) on the topic of sustainability in Amsterdam. The document includes the name of the speaker (K. Vreeker), the date and time of the speech, and the agenda point number. All of these elements suggest that the document is a speech or presentation, rather than another type of document 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...40:50 out of 700
label:  raadsnotulen
response:   Na analyse van het document, het categoriseert het als:

{'categorie': 'Raadsnotulen'}
prediction: raadsnotulen
label:  brief
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Raadsnotulen'.

Here is the output in JSON format:

{
"categorie": "Raadsnotulen"
}

The reason for this classification is that the document appears to be a summary of a meeting of the municipal council of Amsterdam, with information on the agenda items discussed and the decisions made. This is a common type of document used in local government to keep council members informed about the progress of various projects and initiatives.
prediction: raadsnotulen
label:  besluit
response:   Based on the content of the document, I would classify it as a 'Voordracht'.

Here is the output in JSON format:

{
"categorie": "Voordracht"
}
prediction: voordracht
label:  voordracht
respo

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...50:60 out of 700
label:  raadsadres
response:   Het document belongs to the category: ['Voordracht'].

Here is the output in JSON format:

{
"categorie": "Voordracht"
}
prediction: voordracht
label:  raadsadres
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Besluit'.

Here is the output in JSON format:

{
"categorie": "Besluit"
}

The reason for this classification is that the document appears to be a formal decision or recommendation, with a clear structure and language that is typical of official documents. The use of formal language, the inclusion of contact information, and the focus on establishing a better understanding between different parties all suggest that this is a document with a formal purpose.
prediction: besluit
label:  voordracht
response:   Sure, I can classify the document for you. Based on the content of the document you provided, I would categorize it as a 'Voordracht'

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...60:70 out of 700
label:  motie
response:   Based on the content of the document provided, I would classify it as a 'Motie'.

Here is the output in the form of a JSON file:

{
"categorie": "Motie"
}
prediction: motie
label:  raadsnotulen
response:   Sure, I can classify the document for you. Based on the content provided, I would categorize the document as ['Raadsnotulen']:

{
"categorie": "Raadsnotulen"
}

The document appears to be a meeting minutes of the Gemeente Amsterdam, specifically the year 2020, and the agenda item is related to information regarding the recovery phase of the COVID-19 pandemic. The document includes mentions of assembly dates, attendance, and collected letters from the mayor and city secretary, which are all typical elements found in raadsnotulen.
prediction: raadsnotulen
label:  raadsadres
response:   Sure, I can help you with that! Based on the content of the document you provided, I would classify it as a 'Br

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...70:80 out of 700
label:  actualiteit
response:   Based on the content of the document provided, I would classify it as a 'Besluit'.

Here is the output in the form of a JSON file:

{
"categorie": "Besluit"
}

The reason for this classification is that the document appears to be a proposal or recommendation for action, rather than a statement of opinion or a summary of information. The document includes specific details about the topic being addressed, such as the reason for urgency and the proposed course of action, which are typical features of a besluit.
prediction: besluit
label:  raadsnotulen
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Raadsnotulen' document.

Here is the output in the form of a JSON file:

{
"categorie": "Raadsnotulen"
}

Explanation:

The document is a meeting minutes of the Gemeente Amsterdam council meeting held on January 23, 2020. It includes the list of attend

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...80:90 out of 700
label:  brief
response:   Sure, I can classify the document for you. Based on the content you provided, I would categorize the document as:

{'categorie': 'Raadsnotulen'}

This document appears to be a council agenda or meeting notes, as it contains information about the topics to be discussed and decided upon by the council members of Amsterdam. The document mentions the portfolio holder for the topic of "Verkeer en Openbare Ruimte" (Traffic and Public Space), and includes a brief description of the alternative options for creating additional parking facilities for visitors, which may conflict with the municipality's autoless ambitions. The document also includes a route description for visitors to find the meeting location. Therefore, it falls under the category of "Raadsnotulen" (Council Meeting Notes).
prediction: raadsnotulen
label:  onderzoeksrapport
response:   Based on the text you provided, I would classify the 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...90:100 out of 700
label:  onderzoeksrapport
response:   Based on the content of the document, I would classify it as a 'Besluit'.

Here is the JSON output:

{
"categorie": "Besluit"
}
prediction: besluit
label:  voordracht
response:   Based on the content of the document provided, I would classify it as a 'Voordracht' (Dutch for 'Speech').

Here is the output in the form of a JSON file:

{
"categorie": "Voordracht"
}

The document contains a speech given to the Commissie RO (Ruimtelijke Ordening) of the Gemeente Amsterdam on August 31, 2022, regarding the agenda point 2 of the meeting. The speech is addressed to the members of the commissie and contains information on the topic of "Ruimte en Duurzaamheid" (Dutch for 'Space and Sustainability'). Therefore, it falls under the category of a 'Voordracht', which is a type of document that is used to present information or arguments in a formal setting, such as a meeting or a conference.
predi

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...100:110 out of 700
label:  brief
response:   Sure! Based on the content of the document you provided, I would classify it as a 'Raadsnotulen'.

Here is the output in JSON format:

{
"categorie": "Raadsnotulen"
}

The reason for this classification is that the document appears to be a formal notice or agenda for a meeting of the Amsterdam City Council, with information on an update on school assignments and a reference to a forthcoming report in February 2023. The document includes the names of the council members and the wethouder (mayor) responsible for the topic, as well as contact information for the office of OJZ bestuurszaken. The tone and format of the document suggest that it is a formal, official document rather than a personal communication or a informational piece.
prediction: raadsnotulen
label:  raadsadres
response:   Sure, I can classify the document you provided and give you the output in JSON format.

After analyzing the c

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...110:120 out of 700
label:  voordracht
response:   Sure! Based on the content of the document you provided, I would classify it as a "Voordracht" (Dutch for "Presentation"). Therefore, the output would be:

{"categorie": "Voordracht"}

Here's why:

* The document is titled "VN2023-009414 Raadscommissie voor Ruimtelijke Ordening en Grond en Ontwikkeling van 12 april 2023".
* The content of the document is a presentation or speech given to the Raadcommissie voor Ruimtelijke Ordening en Grond en Ontwikkeling (Committee for Spatial Planning and Ground and Development) of Amsterdam.
* The document includes information on the agenda point "Ceuvel", which suggests that the presentation is related to a specific topic or issue being discussed by the committee.

I hope this helps! Let me know if you have any other questions.
prediction: voordracht
label:  motie
response:   Sure! Based on the content of the document you provided, I would classify it

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...120:130 out of 700
label:  onderzoeksrapport
response:   Based on the text you provided, I would classify the document as a 'Besluit'.

Here is the output in the form of a JSON file:

{
"categorie": "Besluit"
}
prediction: besluit
label:  factsheet
response:   Based on the content of the document, I would classify it as a ['Voordracht'].

Here is the JSON output:

{
"categorie": "Voordracht"
}
prediction: voordracht
label:  factsheet
response:   After analyzing the document, I can categorize it as a ['Besluit'].

The document contains a decision or recommendation made by the buurtteam of Amsterdam regarding the Life Project in the Zuidoost area. The document outlines the team's findings and experiences with the project, including the results of 3 observations and the team's conclusions.

Therefore, the category for this document is:

{'categorie': 'Besluit'}
prediction: besluit
label:  onderzoeksrapport
response:   Based on the content o

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...130:140 out of 700
label:  raadsadres
response:   Sure, I can classify the document and provide the output in JSON format.

After analyzing the document, I have determined that it belongs to the category: ['Brief'].

Here is the output in JSON format:

{
"categorie": "Brief"
}

The reason for this classification is that the document is a formal letter addressed to the mayor and councilors of Amsterdam, with a clear subject and a formal tone. It contains information about a route description and provides contact details for more information. All of these characteristics are typical of a business letter or brief.
prediction: brief
label:  factsheet
response:   Based on the content of the document you provided, I would classify it under the category of "Onderzoeksrapport".

Here is the output in the form of a JSON file:

{
"categorie": "Onderzoeksrapport"
}
prediction: onderzoeksrapport
label:  factsheet
response:   Based on the content of 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...140:150 out of 700
label:  voordracht
response:   Based on the content of the document you provided, I would classify it as a "Voordracht" (Dutch for "Motion").

Here is the output in JSON format:

{
"categorie": "Voordracht"
}

The document contains a motion for the Tijdelijke Algemene Raadscommissie of Amsterdam, with the title "Voordracht voor de Tijdelijke Algemene Raadscommissie van o7 april 2022". The document includes the agenda point number, the date, and the name of the responsible officer. Based on this information, it is clear that the document is a formal motion or proposal to be discussed and voted on by the city council.
prediction: voordracht
label:  besluit
response:   Sure, I can classify the document for you. Based on the content you provided, I would classify this document as ['Besluit'].

Here is the output in JSON format:

{
"categorie": "Besluit"
}
prediction: besluit
label:  onderzoeksrapport
response:   Based on t

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...150:160 out of 700
label:  motie
response:   Het document kan worden gecategoriseerd als:

{'categorie': 'Voordracht'}

De tekst van het document bevat elementen van een voordracht, zoals de formulering van een standpunt en de uittreksel van een besluit.
prediction: voordracht
label:  raadsadres
response:   {'categorie': 'Voordracht'}
prediction: voordracht
label:  motie
response:   Based on the content of the document, I would classify it under the category of "Motie".

Here is the output in the form of a JSON file:

{
"categorie": "Motie"
}
prediction: motie
label:  motie
response:   Sure, I can classify the document for you. Based on the content provided, I would classify the document as a 'Motie' (Motion) with a score of 100%.

Here is the output in JSON format:

{
"categorie": "Motie"
}

The reason for this classification is that the document contains a formal motion proposed by a member of the Gemeenteraad (City Council) of Amsterd

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...160:170 out of 700
label:  voordracht
response:   Based on the content of the document you provided, I would classify it as a 'Voordracht' (Dutch for 'Motion').

Here is the output in JSON format:

{
"categorie": "Voordracht"
}

The document contains a motion or proposal for discussion and decision-making by the Raad Commissie, which is a typical characteristic of a Voordracht document.
prediction: voordracht
label:  actualiteit
response:   Sure, I can classify the document for you. Based on the content provided, I would categorize the document as a ['Mondelinge vragen'] (Dutch for "Oral questions").

Here is the output in JSON format:

{
"categorie": "Mondelinge vragen"
}
prediction: NoPredictionInOutput
label:  raadsnotulen
response:   Sure, I can classify the document for you. Based on the content provided, I would classify the document as:

{
"categorie": "Raadsnotulen"
}

The document appears to be a meeting agenda for the Gemeenter

Bad pipe message: %s [b'\xab\tm\xcfD\x7fU2Y\xec\x12\x90\xc1\x1dr8\x8a\xcb \xcf\xcbqS8\r\xec\xf8[]\xec\x07\x8b\xf9+\x14\x11\x1b<\xda\xc8\x92\xde\xb6\xc0\x1bF[\xce\x97']
Bad pipe message: %s [b'\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01', b'\n']
Bad pipe message: %s [b"Req\x93\xfd1\xdd*\x0b\xa1\x90\xdd\x1dJ\xb6`\x83>\x00\x00|\xc0,\xc00\x00\xa3\x00\x9f\xcc\xa9\xcc\xa8\xcc\xaa\xc0\xaf\xc0\xad\xc0\xa3\xc0\x9f\xc0]\xc0a\xc0W\xc0S\xc0+\xc0/\x00\xa2\x00\x9e\xc0\xae\xc0\xac\xc0\xa2\xc0\x9e\xc0\\\xc0`\xc0V\xc0R\xc0$\xc0(\x00k\x00j\xc0#\xc0'\x00g\x00@\xc0\n\xc0\x14\x009\x008\xc0\t\xc0\x13\x003\x002\x00\x9d\xc0\xa1\xc0\x9d\xc0Q\x00\x9c\xc0\xa0\xc0\x9c\xc0P\x00=\x00<\x005\x00/\x00\x9a\x00\x99\xc0\x07\xc0\x11\x00\x96\x00\x05\x00\xff\x01\x00\x00j\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x

label:  factsheet
response:   Based on the content of the document you provided, I would classify it as a 'Besluit'.

Here is the output in the form of a JSON file:

{
"categorie": "Besluit"
}

The reason for this classification is that the document appears to be a decision or policy document, as it contains information about the distribution of a budget and the allocation of funds to different areas. The use of terms like "tent rent", "EO", "ET", and "k TR Kit mi" suggest that the document is related to financial or administrative matters, which are typical of a 'Besluit' category.
prediction: besluit
label:  actualiteit
response:   Sure, I can classify the document for you. Based on the content provided, I would classify the document as a 'Raadscommissie Verklarende Nota' (Raadscommissie Explanatory Note).

Here is the output in JSON format:

{
"categorie": "Raadscommissie Verklarende Nota"
}

The reason for this classification is that the document appears to be a preparatory note fo

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saving done! Interrupting is allowed.
Starting...170:180 out of 700
label:  onderzoeksrapport
response:   Based on the content of the document you provided, I would classify it as a 'Raadsnotulen'.

Here is the output in JSON format:

{
"categorie": "Raadsnotulen"
}

The reason for this classification is that the document appears to be a meeting minutes or agenda for a council or municipal meeting, with details of items discussed, decisions made, and next steps. The document includes references to previous meetings and actions, and is written in a formal and structured manner, which are all characteristics of a raadsnotulen.
prediction: raadsnotulen
label:  schriftelijke vraag
response:   {'categorie': 'Schriftelijke Vraag'}
prediction: schriftelijke vraag
label:  raadsadres
response:   Based on the content of the document, I would classify it as a 'Brief'.

Here is the output in JSON format:

{
"categorie": "Brief"
}
prediction: brief
label:  schriftelijke vraag
response:   Sure, I ca

KeyboardInterrupt: 

In [29]:
pred = pd.read_pickle(OVERVIEW_PATH)
display(pred)

Unnamed: 0,model,run_id,date,train_set,test_set,train_set_support,test_set_support,split_col,text_col,runtime,accuracy,macro_avg_precision,macro_avg_recall,macro_avg_f1,classification_report
0,Llama-2-7b-chat-hf,IC_Llama-2-7b-chat-hfzeroshot_prompt_mistral_l...,2024-05-22 21:29:48.627017+02:00,train,test,9900,1100,balanced_split,TruncationLlamaTokensFront100Back0,140223.734147,0.422727,0.491578,0.31,0.287423,precision...
0,Llama-2-7b-chat-hf,IC_Llama-2-7b-chat-hfzeroshot_prompt_mistral_l...,2024-05-25 00:52:02.538722+02:00,train,test,9900,570,balanced_split,TruncationLlamaTokensFront100Back100,85781.196154,0.401754,0.534931,0.325793,0.305298,precision ...


### Mistral

In [None]:
SHORT_MODEL_NAME = 'Mistral'
PROMPT = pt.zeroshot_prompt_mistral_llama
PROMPT_NAME = ph.get_promptfunction_name(PROMPT)
TOKENS_COL = 'LlamaTokens' # column with text split using tokenizer of either mistral (MistralTokens) or Llama (LlamaTokens). Using Llama, because Llama split into more tokens. 
FRONT_THRESHOLD = 100
BACK_THRESHOLD = 0

if PROMPT==pt.zeroshot_prompt_mistral_llama:
    NUMBER_EXAMPLES = 0
elif PROMPT == pt.fewshot_prompt_bm25:
    NUMBER_EXAMPLES = 2



#### Load model - In-context learning
Note - ONLY load one model: either in-context or fine-tuning

In [None]:
from transformers import pipeline, Conversation

chatbot_mistral = pipeline(task='conversational', model='mistralai/Mistral-7B-Instruct-v0.2',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'Mistral-7B-Instruct-v0.2'
SUBFOLDER = 'in_context'
SHORT_ID = 'IC'


#### Load model - finetuning

In [None]:
from transformers import pipeline, Conversation

chatbot_mistral = pipeline(task='conversational', model='FemkeBakker/MistralSmallData200Tokens',
                   device_map='cpu', model_kwargs={'offload_buffers':True})

MODEL_NAME = 'MistralSmallData200Tokens'
SUBFOLDER = 'finetuning'
SHORT_ID = 'FT'

#### Set-up paths to save predictions

In [None]:
import os

if SPLIT_COLUMN == '4split' or SPLIT_COLUMN == '2split':
    OVERVIEW_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/overview.pkl"
    PREDICTION_PATH = f"{cf.output_path}/predictionsVal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/predictions.pkl"
    
elif SPLIT_COLUMN == 'balanced_split':
    if SUBFOLDER == 'finetuning':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

    elif SUBFOLDER == 'in_context':
        OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/overview.pkl"
        PREDICTION_PATH = f"{cf.output_path}/predictionsFinal/{SUBFOLDER}/{SHORT_MODEL_NAME}/{PROMPT_NAME}/First{FRONT_THRESHOLD}Last{BACK_THRESHOLD}Predictions.pkl"

print(OVERVIEW_PATH)
print(PREDICTION_PATH)

if not os.path.isdir(os.path.dirname(os.path.abspath(OVERVIEW_PATH))):
    raise ValueError("Folder to OVERVIEW_PATH does not exist") 
if not os.path.isdir(os.path.dirname(os.path.abspath(PREDICTION_PATH))):
    raise ValueError("Folder to PREDICTION_PATH does not exist") 

run_id = f'{SHORT_ID}_{MODEL_NAME}{PROMPT_NAME}{TOKENS_COL}{FRONT_THRESHOLD}_{BACK_THRESHOLD}{TRAIN_SET}{TEST_SET}_numEx{NUMBER_EXAMPLES}'
print ('\n', run_id)

#### Run experiment

In [None]:
# run experiment

# add new column with truncated text -> new dataframe with column + new column name
trunc_df = tf.add_truncation_column(txt,'text', TOKENS_COL, FRONT_THRESHOLD, BACK_THRESHOLD)

# if new run MAKE SURE RUN_ID IS UNIQUE, if want to resume run, pass in that run_id
run_experiment(chatbot_mistral, trunc_df, run_id, PROMPT, TEXT_COLUMN, SPLIT_COLUMN, TRAIN_SET, TEST_SET, LABEL_COLUMN, PREDICTION_PATH, OVERVIEW_PATH, MODEL_NAME, NUMBER_EXAMPLES)


In [None]:
pred = pd.read_pickle(OVERVIEW_PATH)
display(pred)
