In [22]:
import pandas as pd
import matplotlib.pyplot as plt
from io import StringIO
from ipywidgets import interact, widgets
from xlwings.utils import chunk
import numpy as np

In [47]:
def read_results(file, group_columns):

    dataframes = []

    with open(file, 'r') as file:
        for line in file:
            json_data = StringIO(line.strip())
            df = pd.read_json(json_data)
            df["em"] = df["scores"].apply(lambda x: x["exact_match_valid"]["mean"])
            grouped = df.groupby(group_columns).agg({
                'context_len_config': list,
                'em': list,
            }).reset_index()
            dataframes.append(grouped)
    final_dataframe = pd.concat(dataframes, ignore_index=True)

    return final_dataframe

In [48]:
def filter_target_columns(results, group_columns, delete_columns=[]):    
    target_columns = dict()

    for column in group_columns:
        column_values = results[column].unique().tolist()
        if len(column_values) > 1:
            target_columns[column] = sorted(column_values)

    # del target_columns["stride"]
    for del_col in delete_columns:
        del target_columns[del_col]
    return target_columns

In [49]:
def plot_dropdown(plot_by, fontsize=11, **kwargs):
    filter_cond = ' & '.join(
        [f'{key}==@params["{key}"]' if isinstance(value, (int, float)) 
         else f'{key}=="{value}"' for key, value in kwargs.items()]
    )

    params = {key: value for key, value in kwargs.items()}
    
    filtered_df = results.query(filter_cond, local_dict={'params': params})

    fig, ax = plt.subplots(figsize=(6, 4))
    for idx, row in filtered_df.iterrows():
        name = f"{row['chunker']}_{row[plot_by]}"
        ax.plot(row['context_len_config'], row['em'], label=name)
    ax.legend()
    
    plt.xlabel('Context length', fontsize=fontsize)
    plt.ylabel('EM', fontsize=fontsize)
    plt.xticks(fontsize=fontsize)
    plt.yticks(fontsize=fontsize)
    plt.grid(True)
    plt.ylim(0.1, 0.63)
    
    title = ', '.join([f'{key} = {value}' for key, value in kwargs.items()])
    plt.title(f"EM for {title}", fontsize=fontsize)
    plt.show()

In [29]:
def plot_dropdown_with_group_by(plot_by, group_by, fontsize=11, **kwargs):
    params = {key: value for key, value in kwargs.items()}
    unique_groups = results[group_by].unique()

    fig, axes = plt.subplots(1, len(unique_groups), figsize=(6 * len(unique_groups), 4))

    if len(unique_groups) == 1:
        axes = [axes]

    for ax, group_value in zip(axes, unique_groups):
        params[group_by] = group_value

        filter_cond = ' & '.join(
            [f'{key}==@params["{key}"]' if isinstance(val, (int, float, np.integer, np.bool_)) 
             else f'{key}=="{val}"' for key, val in params.items()]
        )
        filtered_df = results.query(filter_cond, local_dict={'params': params})

        for idx, row in filtered_df.iterrows():
            name = f"{row['chunker']}_{row[plot_by]}"
            ax.plot(row['context_len_config'], row['em'], label=name)
        ax.legend()
        
        ax.set_xlabel('Context length', fontsize=fontsize)
        ax.set_ylabel('EM', fontsize=fontsize)
        ax.tick_params(axis='both', which='major', labelsize=fontsize)
        ax.grid(True)
        ax.set_ylim(0.1, 0.63)
        ax.set_title(f"{group_by} = {group_value}", fontsize=fontsize)

    plt.tight_layout()
    plt.show()

### Analyze scorers and splitters

chunk_score:
- chunker: full_file
- scorer: iou/bm25/dense
- splitter: line_splitter/word_splitter/model_tokenizer

In [30]:
from pathlib import Path
base_path = Path("/mnt/data/galimzyanov/long-context-eval/output/rag/")
# base_path = Path("/mnt/data/kolomyttseva/long-contex-eval/output/rag/")

results_full_file_old = base_path / "results_all_python_chunk_score.jsonl"
results_full_file_new_with_embed = base_path / "new" / "results_embed_full_file_python_chunk_score.jsonl"

df = pd.read_json(results_full_file_old, orient="records", lines=True)

# Drop all columns for which we don't want aggregation
group_columns = df.columns.drop(['context_len_config', 'count', 'context_len_mean', 'time_gen_per_item', 'scores',
       'time_data_load_per_item']).tolist()

#### Analyse scorers for each splitter

In [31]:
df_full_file = read_results(results_full_file_old, group_columns)
# Drop all rows where chunk_completion_file is False
df_full_file = df_full_file[df_full_file['n_grams_max'] == 1]

df_full_file_embed = read_results(results_full_file_new_with_embed, group_columns)

results = pd.concat([df_full_file, df_full_file_embed], ignore_index=True)
target_columns = filter_target_columns(results, group_columns, delete_columns=["scorer"])

additional_params = {
    'plot_by': 'scorer',
}

# Merge additional_params with target_columns
all_params = {**target_columns, **additional_params}

interact(plot_dropdown, **all_params)

interactive(children=(Text(value='scorer', description='plot_by'), IntSlider(value=11, description='fontsize',…

<function __main__.plot_dropdown(plot_by, fontsize=11, **kwargs)>

In [32]:
target_columns = filter_target_columns(results, group_columns, delete_columns=["scorer", "splitter"])

additional_params = {
    'plot_by': 'scorer',
    'group_by': 'splitter'
}

# Merge additional_params with target_columns
all_params = {**target_columns, **additional_params}
interact(plot_dropdown_with_group_by, **all_params)

interactive(children=(Text(value='scorer', description='plot_by'), Text(value='splitter', description='group_b…

<function __main__.plot_dropdown_with_group_by(plot_by, group_by, fontsize=11, **kwargs)>

#### Analyse splitters for each scorer

In [33]:
df_full_file = read_results(results_full_file_old, group_columns)
# Drop all rows where chunk_completion_file is False
df_full_file = df_full_file[df_full_file['n_grams_max'] == 1]

df_full_file_embed = read_results(results_full_file_new_with_embed, group_columns)

results = pd.concat([df_full_file, df_full_file_embed], ignore_index=True)
target_columns = filter_target_columns(results, group_columns, delete_columns=["splitter"])

additional_params = {
    'plot_by': 'splitter',
}

# Merge additional_params with target_columns
all_params = {**target_columns, **additional_params}

interact(plot_dropdown, **all_params)

interactive(children=(Text(value='splitter', description='plot_by'), IntSlider(value=11, description='fontsize…

<function __main__.plot_dropdown(plot_by, fontsize=11, **kwargs)>

In [34]:
target_columns = filter_target_columns(results, group_columns, delete_columns=["scorer", "splitter"])

additional_params = {
    'plot_by': 'splitter',
    'group_by': 'scorer'
}

# Merge additional_params with target_columns
all_params = {**target_columns, **additional_params}
interact(plot_dropdown_with_group_by, **all_params)

interactive(children=(Text(value='splitter', description='plot_by'), Text(value='scorer', description='group_b…

<function __main__.plot_dropdown_with_group_by(plot_by, group_by, fontsize=11, **kwargs)>

### Analyse completion_last_chunk_size

chunk_score:
- chunker: fixed_line
- scorer: bm25
- splitter: word_splitter
- chunk_lines_size = [8, 16, 32, 64, 128]
- chunk_completion_file = True
- completion_last_chunk_size = [8, 16, 32, 64, 128]

In [35]:
from pathlib import Path
base_path = Path("/mnt/data/galimzyanov/long-context-eval/output/rag/new")

path = base_path / "results_python_chunk_score.jsonl"
df = pd.read_json(path, orient="records", lines=True)

# Drop all columns for which we don't want aggregation
group_columns = df.columns.drop(['context_len_config', 'count', 'context_len_mean', 'time_gen_per_item', 'scores',
       'time_data_load_per_item', 'stride']).tolist()

In [36]:
path = base_path / "results_python_chunk_score.jsonl"
results = read_results(path, group_columns)

target_columns = filter_target_columns(results, ["chunk_lines_size"])

additional_params = {
    'plot_by': 'chunk_lines_size',
}

# Merge additional_params with target_columns
all_params = {**target_columns, **additional_params}

interact(plot_dropdown, **all_params)

interactive(children=(Text(value='chunk_lines_size', description='plot_by'), IntSlider(value=11, description='…

<function __main__.plot_dropdown(plot_by, fontsize=11, **kwargs)>

In [37]:
target_columns = filter_target_columns(results, group_columns, delete_columns=["chunk_lines_size", "completion_last_chunk_size"])

additional_params = {
    'plot_by': 'chunk_lines_size',
    'group_by': 'completion_last_chunk_size'
}

# Merge additional_params with target_columns
all_params = {**target_columns, **additional_params}
interact(plot_dropdown_with_group_by, **all_params)

interactive(children=(Text(value='chunk_lines_size', description='plot_by'), Text(value='completion_last_chunk…

<function __main__.plot_dropdown_with_group_by(plot_by, group_by, fontsize=11, **kwargs)>

### Analyse chunk_completion_file

chunk_score:
- chunker: fixed_line
- scorer: bm25
- splitter: word_splitter
- chunk_lines_size = [8, 16, 32, 64, 128]
- chunk_completion_file = [True, False]
- completion_last_chunk_size = 32

In [38]:
from pathlib import Path
# base_path = Path("/mnt/data/galimzyanov/long-context-eval/output/rag/new")
base_path = Path("/mnt/data/kolomyttseva/long-contex-eval/output/rag/")
results_fixed_line = base_path / "results_fixed_python_chunk_score.jsonl"

df = pd.read_json(results_fixed_line, orient="records", lines=True)

# Drop all columns for which we don't want aggregation
group_columns = df.columns.drop(['context_len_config', 'count', 'context_len_mean', 'time_gen_per_item', 'scores',
       'time_data_load_per_item', 'stride']).tolist()

In [39]:
results_fixed_line = base_path / "results_fixed_python_chunk_score.jsonl"

results = read_results(results_fixed_line, group_columns)

target_columns = filter_target_columns(results, ["chunk_lines_size"])

additional_params = {
    'plot_by': 'chunk_lines_size',
}

# Merge additional_params with target_columns
all_params = {**target_columns, **additional_params}

interact(plot_dropdown, **all_params)

interactive(children=(Text(value='chunk_lines_size', description='plot_by'), IntSlider(value=11, description='…

<function __main__.plot_dropdown(plot_by, fontsize=11, **kwargs)>

In [40]:
target_columns = filter_target_columns(results, group_columns, delete_columns=["chunk_lines_size", "chunk_completion_file"])

additional_params = {
    'plot_by': 'chunk_lines_size',
    'group_by': 'chunk_completion_file'
}

# Merge additional_params with target_columns
all_params = {**target_columns, **additional_params}
interact(plot_dropdown_with_group_by, **all_params)

interactive(children=(Text(value='chunk_lines_size', description='plot_by'), Text(value='chunk_completion_file…

<function __main__.plot_dropdown_with_group_by(plot_by, group_by, fontsize=11, **kwargs)>

### Analyse chunkers

chunk_score:
- chunker: full_file/fixed_line/langchain
- scorer: bm25
- splitter: word_splitter

setup only for fixed_line/langchain:
- chunk_lines_size = [8, 16, 32, 64, 128]
- chunk_completion_file = True
- completion_last_chunk_size = 32

In [44]:
from pathlib import Path
# base_path = Path("/mnt/data/galimzyanov/long-context-eval/output/rag/new")
base_path = Path("/mnt/data/kolomyttseva/long-contex-eval/output/rag/")
results_full_file = base_path / "results_all_python_chunk_score.jsonl"

df = pd.read_json(results_full_file, orient="records", lines=True)

# Drop all columns for which we don't want aggregation
group_columns = df.columns.drop(['context_len_config', 'count', 'context_len_mean', 'time_gen_per_item', 'scores',
       'time_data_load_per_item', 'stride']).tolist()

#### Full_file VS Fixed_Line

In [51]:
from pathlib import Path
# base_path = Path("/mnt/data/galimzyanov/long-context-eval/output/rag/new")
base_path = Path("/mnt/data/kolomyttseva/long-contex-eval/output/rag/")

results_fixed_line = base_path / "results_fixed_python_chunk_score.jsonl"
results_full_file = base_path / "results_all_python_chunk_score.jsonl"

df_fixed_line = read_results(results_fixed_line, group_columns)
# Drop all rows where chunk_completion_file is False
df_fixed_line = df_fixed_line[df_fixed_line['chunk_completion_file'] == True]

df_full_file = read_results(results_full_file, group_columns)

results = pd.concat([df_full_file, df_fixed_line], ignore_index=True)
target_columns = filter_target_columns(results, group_columns, delete_columns=["chunk_lines_size", "chunk_completion_file", "chunker"])

additional_params = {
    'plot_by': 'chunk_lines_size',
}

# Merge additional_params with target_columns
all_params = {**target_columns, **additional_params}

interact(plot_dropdown, **all_params)

interactive(children=(Text(value='chunk_lines_size', description='plot_by'), IntSlider(value=11, description='…

<function __main__.plot_dropdown(plot_by, fontsize=11, **kwargs)>

#### Full_file VS Langchain

In [None]:
from pathlib import Path
# base_path = Path("/mnt/data/galimzyanov/long-context-eval/output/rag/new")
base_path = Path("/mnt/data/kolomyttseva/long-contex-eval/output/rag/")

results_full_file = base_path / "results_all_python_chunk_score.jsonl"
results_langchain = base_path / "results_langchain_python_chunk_score.jsonl"

df_full_file = read_results(results_full_file, group_columns)
df_langchain = read_results(results_langchain, group_columns)

results = pd.concat([df_full_file, df_langchain], ignore_index=True)
target_columns = filter_target_columns(results, group_columns, delete_columns=["chunk_lines_size", "chunk_completion_file", "chunker"])

additional_params = {
    'plot_by': 'chunk_lines_size',
}

# Merge additional_params with target_columns
all_params = {**target_columns, **additional_params}

interact(plot_dropdown, **all_params)

interactive(children=(Text(value='chunk_lines_size', description='plot_by'), IntSlider(value=11, description='…

<function __main__.plot_dropdown(plot_by, fontsize=11, **kwargs)>