In [1]:
!bash /home/azureuser/cloudfiles/code/blobfuse/blobfuse_raadsinformatie.sh


In [12]:
import sys
sys.path.append("..")

# Select where to run notebook: "azure" or "local"
my_run = "local"

import my_secrets as sc
import settings as st

if my_run == "azure":
    import config_azure as cf
elif my_run == "local":
    import config as cf


import os
if my_run == "azure":
    if not os.path.exists(cf.HUGGING_CACHE):
        os.mkdir(cf.HUGGING_CACHE)
    os.environ["TRANSFORMERS_CACHE"] = cf.HUGGING_CACHE

import pandas as pd

## Notebook overview
This notebook creates predictions for the baseline models. In total, five models are tried out.
- Training function. Given a baseline model, will return scores.
- Load Data. Load all the documents, and set parameters.
- save predictions


*Previous notebook: GetPredictions*

*Next notebook: plot*

### Load file with training funcation


In [13]:
import sys
sys.path.append('../src/') 
import baseline as bf

### Load data

In [14]:
import pandas as pd

df = pd.read_pickle(f"{cf.output_path}/txtfiles.pkl")

In [15]:
from collections import Counter
sys.path.append('../src/') 
import baseline as bf
from truncation import add_truncation_column

#set  variables, same for each model
SPLIT_COLUMN = 'balanced_split' #column that has the data split saved. must be either 2split, 4split or balanced_split. 2split = data split into train and test. 4split = data split into train, test, dev and val. 
print('Distribution of sets: ', Counter(df[SPLIT_COLUMN]))
TRAIN_SET = 'train' # must be dev or train
TEST_SET = 'test' # must be val or test
# this split column, train_set and test_set might be a bit confusing. The split_column need to have values about the split, so a row either belongs, in my case, to 'train', 'test', 'dev' or 'val'.
# Then the train_set indates which rows will be selected based on the filtering of the split column. 
# Thus if TRAIN_SET = 'train', then all rows where split_col is 'train', will be selected as the training set.
# The same goes for TEST_SET    


TEXT_COLUMN = 'text' # column where the text is
LABEL_COLUMN = 'label' # column with truth label
DATAFRAME = df.copy() # df where each row is a doc. 
FOLDER = f"{cf.output_path}/predictionsFinal/baselines" # folder where each individual prediction is saved
OVERVIEW_PATH = f"{cf.output_path}/predictionsFinal/baselines/overview.pkl" # file where score and extra data about run is saved

# needed for truncation experiment on baselines
TRUNC_COLUMN = 'trunc_txt' # column with truncated text
TOKENS_COL = 'LlamaTokens' # column with text split into tokens using model tokenizer, in this case Llama, could also be MistralTokens
THRESHOLD_COMBINATIONS =[(100,0), (200,0), (100,100)] # combinations of front and back truncation thresholds. First value in tuple is first N tokens, second value is last N tokens.

Distribution of sets:  Counter({'train': 39, 'val': 5, 'test': 5})


In [16]:
# Function to run the baseline on each truncation threshold

def run_truncation_on_baselines(baseline_function, model_name, predictions_path):
    for thresholds in THRESHOLD_COMBINATIONS:

        # select thresholds
        front_threshold = thresholds[0]
        back_threshold = thresholds[1]

        # set run_id
        run_id = f"{model_name}_first{front_threshold}_last{back_threshold}"

        # get df with truncated text column
        trunc = add_truncation_column(DATAFRAME, TEXT_COLUMN, TOKENS_COL, front_threshold,back_threshold)

        # train and get predictions
        bf.run_baseline(baseline_function, model_name, trunc, SPLIT_COLUMN, TRAIN_SET, TEST_SET, TRUNC_COLUMN, LABEL_COLUMN, predictions_path, OVERVIEW_PATH, run_id)

### Baselines

##### Baseline 1: linear SVM+tf-idf

In [17]:
from sklearn.svm import LinearSVC
model_name = 'LinearSVC'
baseline_function = LinearSVC()
run_id = f"{model_name}_fulltext"
predictions_path = f"{FOLDER}/{model_name}predictions.pkl"

print(OVERVIEW_PATH)
print(predictions_path)
linear_svm = bf.run_baseline(baseline_function, model_name , DATAFRAME, SPLIT_COLUMN, TRAIN_SET, TEST_SET,TEXT_COLUMN, LABEL_COLUMN, predictions_path, OVERVIEW_PATH, run_id)

run_truncation_on_baselines(baseline_function, model_name, predictions_path)

../local_data/predictionsFinal/baselines/overview.pkl
../local_data/predictionsFinal/baselines/LinearSVCpredictions.pkl
                     precision    recall  f1-score   support

             agenda       0.50      1.00      0.67         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       1.00      1.00      1.00         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5

                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##### Baseline 2: Naive Bayes+tf-idf

In [18]:
from sklearn.naive_bayes import MultinomialNB
model_name = 'MultinomialNB'
baseline_function = MultinomialNB()
run_id = f"{model_name}_fulltext"
predictions_path = f"{FOLDER}/{model_name}predictions.pkl"
print(OVERVIEW_PATH)
print(predictions_path)

naive_bayes = bf.run_baseline(baseline_function, model_name , DATAFRAME, SPLIT_COLUMN, TRAIN_SET, TEST_SET,TEXT_COLUMN, LABEL_COLUMN, predictions_path, OVERVIEW_PATH, run_id)

run_truncation_on_baselines(baseline_function, model_name, predictions_path)

../local_data/predictionsFinal/baselines/overview.pkl
../local_data/predictionsFinal/baselines/MultinomialNBpredictions.pkl
                     precision    recall  f1-score   support

             agenda       0.50      1.00      0.67         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       1.00      1.00      1.00         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5

                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##### Baseline 3: Logistic Regression + tf-idf

In [19]:
from sklearn.linear_model import LogisticRegression
model_name = 'LogisticRegression'
baseline_function = LogisticRegression()
run_id = f"{model_name}_fulltext"
predictions_path = f"{FOLDER}/{model_name}predictions.pkl"

print(OVERVIEW_PATH)
print(predictions_path)

log_reg = bf.run_baseline(baseline_function, model_name , DATAFRAME, SPLIT_COLUMN, TRAIN_SET, TEST_SET,TEXT_COLUMN, LABEL_COLUMN, predictions_path, OVERVIEW_PATH, run_id)

run_truncation_on_baselines(baseline_function, model_name, predictions_path)

../local_data/predictionsFinal/baselines/overview.pkl
../local_data/predictionsFinal/baselines/LogisticRegressionpredictions.pkl


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       0.50      1.00      0.67         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       1.00      1.00      1.00         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5

                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##### Baseline 4: k Nearest Neigbors + tf-idf

In [20]:
from sklearn.neighbors import KNeighborsClassifier
model_name = 'KNeighborsClassifier'
baseline_function = KNeighborsClassifier()
run_id = f"{model_name}_fulltext"
predictions_path = f"{FOLDER}/{model_name}predictions.pkl"

print(OVERVIEW_PATH)
print(predictions_path)
knn = bf.run_baseline(baseline_function, model_name , DATAFRAME, SPLIT_COLUMN, TRAIN_SET, TEST_SET,TEXT_COLUMN, LABEL_COLUMN, predictions_path, OVERVIEW_PATH, run_id)

run_truncation_on_baselines(baseline_function, model_name, predictions_path)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


../local_data/predictionsFinal/baselines/overview.pkl
../local_data/predictionsFinal/baselines/KNeighborsClassifierpredictions.pkl
                     precision    recall  f1-score   support

             agenda       0.50      1.00      0.67         1
              motie       0.00      0.00      0.00         1
         raadsadres       1.00      1.00      1.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       1.00      1.00      1.00         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       1.00      1.00      1.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       1.00      1.00      1.00         1

           accuracy                           1.00         5
          macro avg       1.00      1.00      1.00         5
       weighted avg       1.00      1.00      1.00         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5

                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##### Baseline 5: RandomForest + tf-idf

In [21]:
from sklearn.ensemble import RandomForestClassifier
model_name = 'RandomForestClassifier'
baseline_function = RandomForestClassifier()
run_id = f"{model_name}_fulltext"
predictions_path = f"{FOLDER}/{model_name}predictions.pkl"

print(OVERVIEW_PATH)
print(predictions_path)

random_forest = bf.run_baseline(baseline_function, model_name , DATAFRAME, SPLIT_COLUMN, TRAIN_SET, TEST_SET,TEXT_COLUMN, LABEL_COLUMN, predictions_path, OVERVIEW_PATH, run_id)

run_truncation_on_baselines(baseline_function, model_name, predictions_path)

../local_data/predictionsFinal/baselines/overview.pkl
../local_data/predictionsFinal/baselines/RandomForestClassifierpredictions.pkl
                     precision    recall  f1-score   support

             agenda       0.50      1.00      0.67         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       1.00      1.00      1.00         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       0.50      1.00      0.67         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       1.00      1.00      1.00         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                     precision    recall  f1-score   support

             agenda       1.00      1.00      1.00         1
              motie       1.00      1.00      1.00         1
         raadsadres       0.00      0.00      0.00         1
schriftelijke vraag       1.00      1.00      1.00         1
         voordracht       0.50      1.00      0.67         1

           accuracy                           0.80         5
          macro avg       0.70      0.80      0.73         5
       weighted avg       0.70      0.80      0.73         5



### Overview of all runs

In [22]:
overview = pd.read_pickle(OVERVIEW_PATH)
display(overview)

Unnamed: 0,model,date,run_id,train_set,test_set,train_set_support,test_set_support,split_col,text_col,runtime,accuracy,macro_avg_precision,macro_avg_recall,macro_avg_f1,classification_report,weighted_avg_precision,weighted_avg_recall,weighted_avg_f1
0,LinearSVC,2024-07-09 14:19:27.394186+02:00,LinearSVC_fulltext,train,test,39,5,balanced_split,text,0.160124,0.8,0.7,0.8,0.733333,precision recall f1-s...,,,
0,LinearSVC,2024-07-09 14:19:27.816805+02:00,LinearSVC_first100_last0,train,test,39,5,balanced_split,TruncationLlamaTokensFront100Back0,0.128362,0.8,0.7,0.8,0.733333,precision recall f1-s...,0.7,0.8,0.733333
0,LinearSVC,2024-07-09 14:19:28.030720+02:00,LinearSVC_first200_last0,train,test,39,5,balanced_split,TruncationLlamaTokensFront200Back0,0.131622,0.8,0.7,0.8,0.733333,precision recall f1-s...,0.7,0.8,0.733333
0,LinearSVC,2024-07-09 14:19:28.259214+02:00,LinearSVC_first100_last100,train,test,39,5,balanced_split,TruncationLlamaTokensFront100Back100,0.101406,0.8,0.7,0.8,0.733333,precision recall f1-s...,0.7,0.8,0.733333
0,MultinomialNB,2024-07-09 14:19:28.482457+02:00,MultinomialNB_fulltext,train,test,39,5,balanced_split,TruncationLlamaTokensFront100Back100,0.135828,0.8,0.7,0.8,0.733333,precision recall f1-s...,0.7,0.8,0.733333
0,MultinomialNB,2024-07-09 14:19:28.702312+02:00,MultinomialNB_first100_last0,train,test,39,5,balanced_split,TruncationLlamaTokensFront100Back0,0.09193,0.8,0.7,0.8,0.733333,precision recall f1-s...,0.7,0.8,0.733333
0,MultinomialNB,2024-07-09 14:19:28.862797+02:00,MultinomialNB_first200_last0,train,test,39,5,balanced_split,TruncationLlamaTokensFront200Back0,0.089648,0.8,0.7,0.8,0.733333,precision recall f1-s...,0.7,0.8,0.733333
0,MultinomialNB,2024-07-09 14:19:29.076918+02:00,MultinomialNB_first100_last100,train,test,39,5,balanced_split,TruncationLlamaTokensFront100Back100,0.101284,0.8,0.7,0.8,0.733333,precision recall f1-s...,0.7,0.8,0.733333
0,LogisticRegression,2024-07-09 14:19:30.748370+02:00,LogisticRegression_fulltext,train,test,39,5,balanced_split,TruncationLlamaTokensFront100Back100,1.669312,0.8,0.7,0.8,0.733333,precision recall f1-s...,0.7,0.8,0.733333
0,LogisticRegression,2024-07-09 14:19:31.013191+02:00,LogisticRegression_first100_last0,train,test,39,5,balanced_split,TruncationLlamaTokensFront100Back0,0.123145,0.8,0.7,0.8,0.733333,precision recall f1-s...,0.7,0.8,0.733333
