prefix_ratio = len(prefix) / min(pred, gt) 

is_prefix = is_prefix(pred, gt) | is_prefix(gt, pred)

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from io import StringIO
from ipywidgets import interact, widgets
from xlwings.utils import chunk
import numpy as np
from pathlib import Path

from plotter import read_results, merge_df, get_group_columns, filter_target_columns, plot_dropdown, plot_dropdown_with_group_by, make_interaction

%matplotlib inline

In [23]:
base_path = Path("/mnt/data/kolomyttseva/long-contex-eval/output/rag_results/all_files")

paths_list = [
    "results_embed_full_file.jsonl", # full_file
    "results_embed_python_chunk_score.jsonl", # completion_last_chunk_size + fixed_line
    "results_fixed_line.jsonl",
    "results_langchain.jsonl",
    "results_scorers_splitters1_python_chunk_score.jsonl",
    "results_scorers_splitters1_kotlin_chunk_score.jsonl",
    "results_py_modules_python_draco.jsonl",
    "results_py_full_file_python_draco.jsonl",
    "results_multi_python_multi_score.jsonl",
    "completion_last_chunk_size.jsonl",
]

paths_list_only_py = [
    "results_chunk_completion_file_only_py_python_chunk_score.jsonl", # completion_last_chunk_size + only_python
    # "results_embed_full_file_only_py_python_chunk_score.jsonl", # only_python + full_file
]

In [None]:
'''
results_embed_full_file.jsonl
    - languages: [python, kotlin]
    - composer: chunk_score
    - chunker: full_file
    - scorer: dense
    - splitter: None
    - _allowed_extensions: [".md", ".txt", ".rst"]

results_embed_full_file_only_py_python_chunk_score.jsonl
    - languages: python
    - composer: chunk_score
    - chunker: full_file
    - scorer: dense
    - splitter: None
    - _allowed_extensions: []

results_embed_python_chunk_score.jsonl
    - languages: python
    - composer: chunk_score
    - chunker: fixed_line
    - scorer: dense
    - splitter: None
    - chunk_completion_file = True
    - completion_last_chunk_size = [8, 16, 32, 64, 128]
    - _allowed_extensions: [".md", ".txt", ".rst"]

-------------------------------------------------------------------------    

results_scorers_splitters1_python_chunk_score.jsonl
    - languages: python
    - composer: chunk_score
    - chunker: full_file
    - scorer: [bm25, iou]
    - splitter: [word_splitter, line_splitter, word_tokenizer]
    - _allowed_extensions: [".md", ".txt", ".rst"]

results_scorers_splitters1_kotlin_chunk_score.jsonl
    - languages: kotlin
    - composer: chunk_score
    - chunker: full_file
    - scorer: [bm25, iou]
    - splitter: [word_splitter, line_splitter, word_tokenizer]
    - _allowed_extensions: [".md", ".txt", ".rst"]

results_fixed_line.jsonl
    - languages: [python, kotlin]
    - composer: chunk_score
    - chunker: fixed_line
    - scorer: bm25
    - splitter: word_splitter
    - chunk_lines_size = [8, 16, 32, 64, 128]
    - chunk_completion_file = [True, False]
    - completion_last_chunk_size = 32
    - _allowed_extensions: [".md", ".txt", ".rst"]

results_langchain.jsonl
    - languages: [python, kotlin]
    - composer: chunk_score
    - chunker: langchain
    - scorer: bm25
    - splitter: word_splitter
    - chunk_lines_size = [8, 16, 32, 64, 128]
    - chunk_completion_file = True
    - completion_last_chunk_size = 32
    - _allowed_extensions: [".md", ".txt", ".rst"]

completion_last_chunk_size.jsonl
    - languages: python
    - composer: chunk_score
    - chunker: fixed_line
    - scorer: bm25
    - splitter: word_splitter
    - chunk_lines_size = [8, 16, 32, 64, 128]
    - chunk_completion_file = True
    - completion_last_chunk_size = [8, 16, 32, 64, 128]
    - _allowed_extensions: [".md", ".txt", ".rst"]

results_chunk_completion_file_only_py_python_chunk_score.jsonl
    - languages: python
    - composer: chunk_score
    - chunker: fixed_line
    - scorer: bm25
    - splitter: word_splitter
    - chunk_lines_size = [8, 16, 32, 64, 128]
    - chunk_completion_file = True
    - completion_last_chunk_size = [8, 16, 32, 64, 128]
    - _allowed_extensions: []

-------------------------------------------------------------------------    

results_py_modules_python_draco.jsonl
    - languages: python
    - composer: draco
    - draco_use_full_file = False
    - _allowed_extensions: []
    
results_py_full_file_python_draco.jsonl
    - languages: python
    - composer: draco
    - draco_use_full_file = True
    - _allowed_extensions: []

-------------------------------------------------------------------------    

results_multi_python_multi_score.jsonl
    - languages: python
    - composer: multi_score
    - composers_list: [["chunk_score", "draco"], ["draco", "chunk_score"]]
    - draco_use_full_file = True
    - chunker: fixed_line
    - scorer: bm25
    - splitter: word_splitter
    - chunk_lines_size = [8, 16, 32, 64, 128]
    - chunk_completion_file = True
    - completion_last_chunk_size = 32
    - _allowed_extensions: [".md", ".txt", ".rst"]
'''

'\nresults_embed_full_file.jsonl\n    - languages: [python, kotlin]\n    - composer: chunk_score\n    - chunker: full_file\n    - scorer: dense\n    - splitter: None\n    - _allowed_extensions: [".md", ".txt", ".rst"]\n\nresults_embed_full_file_only_py_python_chunk_score.jsonl\n    - languages: python\n    - composer: chunk_score\n    - chunker: full_file\n    - scorer: dense\n    - splitter: None\n    - _allowed_extensions: []\n\nresults_embed_python_chunk_score.jsonl\n    - languages: python\n    - composer: chunk_score\n    - chunker: fixed_line\n    - scorer: dense\n    - splitter: None\n    - chunk_completion_file = True\n    - completion_last_chunk_size = [8, 16, 32, 64, 128]\n    - _allowed_extensions: [".md", ".txt", ".rst"]\n\n-------------------------------------------------------------------------    \n\nresults_scorers_splitters1_python_chunk_score.jsonl\n    - languages: python\n    - composer: chunk_score\n    - chunker: full_file\n    - scorer: [bm25, iou]\n    - splitte

In [25]:
df = merge_df(base_path, paths_list)

# Drop all columns for which we don't want aggregation
group_columns = get_group_columns(df)

results = read_results(df, group_columns)

In [26]:
df_only_py = merge_df(base_path, paths_list_only_py)

# Drop all columns for which we don't want aggregation
group_columns_only_py = get_group_columns(df_only_py)

results_only_py = read_results(df_only_py, group_columns_only_py)

# Analyse chunk_score

## _allowed_extensions: [".md", ".txt", ".rst"]

### Analyze scorers and splitters

chunk_score:
- chunker: full_file
- scorer: iou/bm25/dense
- splitter: line_splitter/word_splitter/model_tokenizer

In [27]:
full_file_results_without_embeds = results[(results["composer"] == "chunk_score") & (results["chunker"] == "full_file") & (results["scorer"] != "dense")]
full_file_results_with_embeds = results[(results["composer"] == "chunk_score") & (results["chunker"] == "full_file")]

#### Analyse scorers for each splitter

In [28]:
# plot_params = {
#     'plot_by': 'scorer',
# }
# make_interaction(results=full_file_results, 
#                  group_columns=group_columns, 
#                  dropdown=plot_dropdown, 
#                  plot_params=plot_params,
#                  metrics=['em', 'is_prefix', 'prefix_ratio'],
#                  delete_columns=["embed_model"],
#                  )

In [29]:
dropdown_params = {
    'plot_by': 'scorer',
    'group_by': 'splitter',
    'metric' : ['em', 'is_prefix', 'prefix_ratio']
}

plot_params = {
    'title': 'Full file chunker',
    # 'name_by': ['chunker', 'scorer']
}

make_interaction(results=full_file_results_without_embeds, 
                 group_columns=group_columns, 
                 dropdown=plot_dropdown_with_group_by,
                 dropdown_params=dropdown_params,
                 plot_params=plot_params,
                 delete_columns=["embed_model"],
                 )

interactive(children=(Dropdown(description='category', options=('infile', 'inproject'), value='infile'), Dropd…

#### Analyse scorers for each splitter DENSE

In [30]:
dropdown_params = {
    'plot_by': 'scorer',
    'group_by': 'splitter',
    'metric' : ['em', 'is_prefix', 'prefix_ratio']
}

plot_params = {
    'title': 'Full file chunker',
}

make_interaction(results=full_file_results_with_embeds, 
                 group_columns=group_columns, 
                 dropdown=plot_dropdown_with_group_by,
                 dropdown_params=dropdown_params,
                 plot_params=plot_params,
                 delete_columns=["embed_model"],
                 )

interactive(children=(Dropdown(description='category', options=('infile', 'inproject'), value='infile'), Dropd…

#### Analyse splitters for each scorer

In [31]:
dropdown_params = {
    'plot_by': 'splitter',
    'group_by': 'scorer',
    'metric' : ['em', 'is_prefix', 'prefix_ratio']
}

plot_params = {
    'title': 'Full file chunker',
}

make_interaction(results=full_file_results_without_embeds, 
                 group_columns=group_columns, 
                 dropdown=plot_dropdown_with_group_by,
                 dropdown_params=dropdown_params,
                 plot_params=plot_params,
                 delete_columns=["embed_model"],
                 )

interactive(children=(Dropdown(description='category', options=('infile', 'inproject'), value='infile'), Dropd…

### Analyse chunk_completion_file

chunk_score:
- chunker: fixed_line
- scorer: bm25
- splitter: word_splitter
- chunk_lines_size = [8, 16, 32, 64, 128]
- chunk_completion_file = [True, False]
- completion_last_chunk_size = 32

In [32]:
chunk_completion_file_results = results[(results["composer"] == "chunk_score") & (results["chunker"] == "fixed_line") & (results["completion_last_chunk_size"] == 32) & (results["scorer"] != "dense")]

list_columns = [col for col in chunk_completion_file_results.columns if isinstance(chunk_completion_file_results[col].iloc[0], list)]
drop_duplicates_columns = [col for col in chunk_completion_file_results.columns if col not in list_columns]
chunk_completion_file_results.drop_duplicates(subset=drop_duplicates_columns, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chunk_completion_file_results.drop_duplicates(subset=drop_duplicates_columns, inplace=True)


In [33]:
dropdown_params = {
    'plot_by': 'chunk_lines_size',
    'group_by': 'chunk_completion_file',
    'metric' : ['em', 'is_prefix', 'prefix_ratio']
}

plot_params = {
    'title': 'Full file chunker',
}

make_interaction(results=chunk_completion_file_results, 
                 group_columns=group_columns, 
                 dropdown=plot_dropdown_with_group_by,
                 dropdown_params=dropdown_params,
                 plot_params=plot_params,
                 delete_columns=["embed_model"],
                 )

interactive(children=(Dropdown(description='category', options=('infile', 'inproject'), value='infile'), Dropd…

### Analyse completion_last_chunk_size

chunk_score:
- chunker: fixed_line
- scorer: [bm25, dense]
- splitter: [word_splitter, None]
- chunk_lines_size = [8, 16, 32, 64, 128]
- chunk_completion_file = True
- completion_last_chunk_size = [8, 16, 32, 64, 128]


In [44]:
completion_last_chunk_size_file_results = results[(results["language"] == "python") & (results["composer"] == "chunk_score") & (results["chunker"] == "fixed_line") & (results["chunk_completion_file"] == True)]

list_columns = [col for col in completion_last_chunk_size_file_results.columns if isinstance(completion_last_chunk_size_file_results[col].iloc[0], list)] + ['embed_model']
drop_duplicates_columns = [col for col in completion_last_chunk_size_file_results.columns if col not in list_columns]
completion_last_chunk_size_file_results.drop_duplicates(subset=drop_duplicates_columns, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  completion_last_chunk_size_file_results.drop_duplicates(subset=drop_duplicates_columns, inplace=True)


In [None]:
dropdown_params = {
    'plot_by': 'chunk_lines_size',
    'group_by': 'completion_last_chunk_size',
    'metric' : ['em', 'is_prefix', 'prefix_ratio']
}

plot_params = {
    'title': 'Full file chunker',
}

make_interaction(results=completion_last_chunk_size_file_results, 
                 group_columns=group_columns, 
                 dropdown=plot_dropdown_with_group_by,
                 dropdown_params=dropdown_params,
                 plot_params=plot_params,
                 delete_columns=["embed_model"],
                 )

interactive(children=(Dropdown(description='category', options=('infile', 'inproject'), value='infile'), Dropd…

### Analyse chunkers

chunk_score:
- chunker: full_file/fixed_line/langchain
- scorer: bm25
- splitter: word_splitter

setup only for fixed_line/langchain:
- chunk_lines_size = [8, 16, 32, 64, 128]
- chunk_completion_file = True
- completion_last_chunk_size = 32

In [5]:
bm25_word_splitter_results = results[(results["composer"] == "chunk_score")
                                     & (results["scorer"] == "bm25") 
                                     & (results["splitter"] == "word_splitter") 
                                     & (results["chunk_completion_file"] == True)]

bm25_word_splitter_results["chunker"] = bm25_word_splitter_results["chunker"].apply(lambda x: " full_file" if x == "full_file" else x)

fixed_line_results = bm25_word_splitter_results[(bm25_word_splitter_results["chunker"] == " full_file") | (bm25_word_splitter_results["chunker"] == "fixed_line")]
langchain_results = bm25_word_splitter_results[(bm25_word_splitter_results["chunker"] == " full_file") | (bm25_word_splitter_results["chunker"] == "langchain")]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bm25_word_splitter_results["chunker"] = bm25_word_splitter_results["chunker"].apply(lambda x: " full_file" if x == "full_file" else x)


#### Full_file VS Fixed_Line

In [6]:
dropdown_params = {
    'plot_by': 'chunk_lines_size',
    'metric' : ['em', 'is_prefix', 'prefix_ratio']
}

plot_params = {
    'title': 'Full file chunker',
    'name_by': ['chunker', 'chunk_lines_size']
}

make_interaction(results=fixed_line_results, 
                 group_columns=group_columns, 
                 dropdown=plot_dropdown,
                 dropdown_params=dropdown_params,
                 plot_params=plot_params,
                 delete_columns=["embed_model", "chunker"],
                 )

interactive(children=(Dropdown(description='category', options=('infile', 'inproject'), value='infile'), Dropd…

#### Full_file VS Langchain

In [7]:
dropdown_params = {
    'plot_by': 'chunk_lines_size',
    'metric' : ['em', 'is_prefix', 'prefix_ratio']
}

plot_params = {
    'title': 'Full file chunker',
    'name_by': ['chunker', 'chunk_lines_size']
}

make_interaction(results=langchain_results, 
                 group_columns=group_columns, 
                 dropdown=plot_dropdown,
                 dropdown_params=dropdown_params,
                 plot_params=plot_params,
                 delete_columns=["embed_model", "chunker"],
                 )

interactive(children=(Dropdown(description='category', options=('infile', 'inproject'), value='infile'), Dropd…

## _allowed_extensions: [] ONLY_PY

### Analyse completion_last_chunk_size

chunk_score:
- chunker: fixed_line
- scorer: bm25
- splitter: word_splitter
- chunk_lines_size = [8, 16, 32, 64, 128]
- chunk_completion_file = True
- completion_last_chunk_size = [8, 16, 32, 64, 128]


In [None]:
dropdown_params = {
    'plot_by': 'chunk_lines_size',
    'group_by': 'completion_last_chunk_size',
    'metric' : ['em', 'is_prefix', 'prefix_ratio']
}

plot_params = {
    'title': 'Full file chunker',
}

make_interaction(results=results_only_py, 
                 group_columns=group_columns, 
                 dropdown=plot_dropdown_with_group_by,
                 dropdown_params=dropdown_params,
                 plot_params=plot_params,
                 delete_columns=["embed_model"],
                 )

interactive(children=(Dropdown(description='category', options=('infile', 'inproject'), value='infile'), Text(…

# Analyze draco

In [8]:
draco_results = results[(results["composer"] == "draco")]
draco_results['draco_use_full_file'] = [False, False, True, True]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  draco_results['draco_use_full_file'] = [False, False, True, True]


In [9]:
dropdown_params = {
    'plot_by': 'draco_use_full_file',
    'metric' : ['em', 'is_prefix', 'prefix_ratio']
}

plot_params = {
    'title': 'Full file chunker',
}

make_interaction(results=draco_results, 
                 group_columns=group_columns, 
                 dropdown=plot_dropdown,
                 dropdown_params=dropdown_params,
                 plot_params=plot_params,
                 delete_columns=["embed_model", "chunker"],
                 )

interactive(children=(Dropdown(description='category', options=('infile', 'inproject'), value='infile'), Text(…

# Analyze multi_score

- draco_use_full_file: True

In [10]:
multi_results = results[(results["composer"].str.contains("multi"))]

In [11]:
dropdown_params = {
    'plot_by': 'chunker',
    'group_by': "composer",
    'metric' : ['em', 'is_prefix', 'prefix_ratio']
}

plot_params = {
    'title': 'Full file chunker',
}

make_interaction(results=multi_results, 
                 group_columns=group_columns, 
                 dropdown=plot_dropdown_with_group_by,
                 dropdown_params=dropdown_params,
                 plot_params=plot_params,
                 delete_columns=["embed_model"],
                 )

interactive(children=(Dropdown(description='category', options=('infile', 'inproject'), value='infile'), Text(…

# Analyse multi_score vs chunk_score

In [15]:
bm25_word_splitter_results = results[(results["language"] == "python") 
                                     & (results["composer"] == "chunk_score")
                                     & (results["scorer"] == "bm25") 
                                     & (results["splitter"] == "word_splitter") 
                                     & (results["chunk_completion_file"] == True)]

# bm25_word_splitter_results["chunker"] = bm25_word_splitter_results["chunker"].apply(lambda x: " full_file" if x == "full_file" else x)
fixed_line_results = bm25_word_splitter_results[(bm25_word_splitter_results["chunker"] == "fixed_line")]

multi_results = results[(results["composer"] == "multi_score: ['draco', 'chunk_score']") & (results["chunker"] == "fixed_line")]
multi_results["composer"] = multi_results["composer"].apply(lambda x: ' multi')

fixed_line_vs_multi_results = pd.concat([fixed_line_results, multi_results])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  multi_results["composer"] = multi_results["composer"].apply(lambda x: ' multi')


In [17]:
dropdown_params = {
    'plot_by': 'chunk_lines_size',
    'metric' : ['em', 'is_prefix', 'prefix_ratio']
}

plot_params = {
    'title': 'Full file chunker',
    'name_by': ['composer', 'chunk_lines_size']
}

make_interaction(results=fixed_line_vs_multi_results, 
                 group_columns=group_columns, 
                 dropdown=plot_dropdown,
                 dropdown_params=dropdown_params,
                 plot_params=plot_params,
                 delete_columns=["embed_model", "composer"],
                 )

interactive(children=(Dropdown(description='category', options=('infile', 'inproject'), value='infile'), Text(…