In [1]:
# %%capture
# %pip install datasets
# %pip install transformers

## Libraries and Dependencies

In [2]:
import pandas as pd
import numpy as np
import torch
import nltk
from nltk.tokenize import sent_tokenize
from datasets import load_dataset
from tqdm.notebook import tqdm

# The models the authors used:
from transformers import BertForMaskedLM, BertTokenizer
from transformers import AlbertForMaskedLM, AlbertTokenizer

nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Liora\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE

device(type='cuda')

## Algorithm Implementation

In [4]:
def mask_sentence(sentence, mask_token, i, M, L_min):
    return [mask_token
            if (j - i) % M == 0
            and (len(sentence[j]) >= L_min
                 or sentence[j].startswith('##')
                 or sentence[min(j+1, len(sentence)-1)].startswith('##'))
            else sentence[j]
            for j in range(len(sentence))]

In [5]:
def BLANC_help(sentence, translation, model, tokenizer, M=6, L_min=4, sep='[SEP]', device='cpu'):
    """
    Calculates BLANC score between a given sentence and its translation using a specified model.

    Parameters:
    - sentence (List[str]): A tokenized sentence.
    - translation (List[str]): The tokenized translation.
    - model: BERT-type model
    - tokenizer: The tokenizer associated with the model used.
    - M (int): Parameter M for the algorithm (default is 6).
    - L_min (int): Minimum length requirement for masked words (default is 4).
    - sep (str): Separator between the inference help (filler/summary) and a sentence from the text (default is '[SEP]').

    Returns:
    - float: BLANC score for the given sentence and its translation.
    """

    filler = ['.'] * len(translation)
    S = [[0, 0], [0, 0]]

    for i in range(M):
        masked_sentence = mask_sentence(sentence, tokenizer.mask_token, i, M, L_min)
        print(masked_sentence)

        input_base = filler + [sep] + masked_sentence
        input_help = translation + [sep] + masked_sentence

        tokenized_input_base = torch.tensor(tokenizer.convert_tokens_to_ids(input_base)).to(device) # Shape: [sequence_length]
        tokenized_input_help = torch.tensor(tokenizer.convert_tokens_to_ids(input_help)).to(device) # Shape: [sequence_length]

        out_base = model(input_ids=tokenized_input_base.unsqueeze(0)).logits  # Shape: [1, sequence_length, model_vocab_size]
        out_help = model(input_ids=tokenized_input_help.unsqueeze(0)).logits  # Shape: [1, sequence_length, model_vocab_size]

        out_base = torch.argmax(out_base.squeeze(0), dim=-1)  # Shape: [sequence_length]
        out_help = torch.argmax(out_help.squeeze(0), dim=-1)  # Shape: [sequence_length]

        masked_tokens = [idx for idx, word in enumerate(masked_sentence) if word == tokenizer.mask_token]

        for j in masked_tokens:
            idx = len(translation + [sep]) + j
            predicted_word_base = tokenizer.convert_ids_to_tokens(out_base[idx].item())
            predicted_word_help = tokenizer.convert_ids_to_tokens(out_help[idx].item())

            print(f'predicted_word_base[{idx - len(translation + [sep])}]: {predicted_word_base}')
            print(f'predicted_word_help[{idx - len(translation + [sep])}]: {predicted_word_help}')
            print(f'sentence[{j}]: {sentence[j]}')

            k = int(predicted_word_base == sentence[j])
            m = int(predicted_word_help == sentence[j])
            S[k][m] += 1


    B = (S[0][1] - S[1][0]) / (S[0][0] + S[1][1] + S[0][1] + S[1][0])

    return B

## Datasets

In [6]:
news_commentary_ds = load_dataset('news_commentary', 'en-fr', split='train')
news_commentary_ds

Dataset({
    features: ['id', 'translation'],
    num_rows: 209479
})

In [7]:
parallel_df = pd.DataFrame(news_commentary_ds['translation'][:300])
parallel_df

Unnamed: 0,en,fr
0,"$10,000 Gold?",L’or à 10.000 dollars l’once ?
1,SAN FRANCISCO – It has never been easy to have...,SAN FRANCISCO – Il n’a jamais été facile d’avo...
2,"Lately, with gold prices up more than 300% ove...","Et aujourd’hui, alors que le cours de l’or a a..."
3,"Just last December, fellow economists Martin F...","En décembre dernier, mes collègues économistes..."
4,Wouldn’t you know it?,Mais devinez ce qui s’est passé ?
...,...,...
295,Although Abdullah is usually referred to in th...,Bien qu'Abdallah soit généralement considéré à...
296,"The Sudairis, it seems, have apparently left t...",Ils semblent avoir laissé leur demi-frère se c...
297,For although Crown Prince Abdullah has his own...,Même si le prince héritier Abdallah bénéficie ...
298,The idea of normalizing relations with Israel ...,L'idée d'une normalisation des relations avec ...


## Model and Tokenizer

In [8]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case = True)
model = BertForMaskedLM.from_pretrained('bert-base-uncased').to(DEVICE)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Preprocessing

In [9]:
sentences = [tokenizer.tokenize(sentence)
             for sentence in parallel_df['en']]  # (List[List[str]])

translations = [tokenizer.tokenize(translation)
                for translation in parallel_df['fr']] # (List[List[str]])

In [10]:
import random
random.seed(42)

def random_words_translation(translation):
    random_translation = ""
    translation = translation.split()
    for _ in range(len(translation)):
        random_translation += random.choice(translation) + ' '
    return random_translation

random_translations = [random_words_translation(translation) for translation in parallel_df['fr'].unique()]
tokenized_random_translations = [tokenizer.tokenize(translation) for translation in random_translations]


In [10]:
# print(f'longest sentence: {max([len(sent) for sent in sentences])}')
# print(f'longest translation: {max([len(sent) for sent in translations])}')

## Running the Program

In [9]:
BLANC_help(translations[0], sentences[0], model, tokenizer, device=DEVICE)

0.0

In [11]:
%%time
scores = [BLANC_help(translation, sentence, model, tokenizer, device=DEVICE)
          for translation, sentence in zip(translations, sentences)]

['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
['l', '’', 'or', 'a', '10', '.', '000', '[MASK]', 'l', '’', 'once', '?']
predicted_word_base[7]: -
predicted_word_help[7]: gold
sentence[7]: dollars
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', '[MASK]', '?']
predicted_word_base[10]: ##y
predicted_word_help[10]: ##y
sentence[10]: once
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
['san', 'francisco', '–', 'il', 'n', '’', 'a', 'jam', '##ais', 'et', '##e', 'fa', '[MASK]', '##le', 'd', '’', 'av', '##oir', 'une', 'discussion', 'ratio', '##nne', '##lle', 'sur', 'la', 'vale', '##ur', 'du', 'metal', 'ja', '[MASK]', '.']
predicted_word_base[12]: ##ci
predicted_word_help[12]: ##ci
sentence[12]: ##ci
predicted_word_base[30]: ##une
predicted_word_help[30]: ##une
sentence[30]: ##une
['s

In [12]:
scores

[0.0,
 0.05263157894736842,
 0.037037037037037035,
 0.03508771929824561,
 0.0,
 0.0,
 0.0,
 -0.05555555555555555,
 0.046511627906976744,
 0.0,
 0.06666666666666667,
 0.05,
 0.0,
 0.09523809523809523,
 0.043478260869565216,
 0.0,
 0.0,
 0.0,
 -0.041666666666666664,
 0.0,
 0.0,
 0.10606060606060606,
 -0.045454545454545456,
 0.09090909090909091,
 0.047619047619047616,
 0.0,
 0.058823529411764705,
 0.05405405405405406,
 0.06896551724137931,
 -0.029411764705882353,
 -0.05128205128205128,
 0.027777777777777776,
 -0.04,
 0.0,
 0.0,
 -0.07692307692307693,
 -0.047619047619047616,
 0.0,
 -0.029411764705882353,
 0.0,
 -0.07142857142857142,
 0.034482758620689655,
 0.025,
 -0.03225806451612903,
 0.05555555555555555,
 0.0,
 0.0,
 0.06666666666666667,
 0.023809523809523808,
 0.05,
 0.0,
 -0.041666666666666664,
 -0.03571428571428571,
 0.1111111111111111,
 0.0,
 0.0,
 0.07142857142857142,
 0.023255813953488372,
 0.0,
 0.0392156862745098,
 0.09090909090909091,
 0.02040816326530612,
 0.01818181818181818,

In [18]:
def study_results(index = 253, model = model, tokenizer = tokenizer):
    print("translation : ", parallel_df['fr'].iloc[index])
    print("sentence : ", parallel_df['en'].iloc[index])
    print("tokenization of trans. : ", translations[index])
    print("tokenization of sentence : ",sentences[index])
    print('\n')
    BLANC_help(translations[index], sentences[index], model, tokenizer, device=DEVICE)

In [14]:
#Bad results 1
study_results(71)

translation :  Quand les riches deviennent moins riches, les pauvres deviennent encore plus pauvres.
sentence :  When the rich get less rich, the poor get poorer.
tokenization of trans. :  ['quan', '##d', 'les', 'riches', 'devi', '##enne', '##nt', 'moi', '##ns', 'riches', ',', 'les', 'pau', '##vres', 'devi', '##enne', '##nt', 'encore', 'plus', 'pau', '##vres', '.']
tokenization of sentence :  ['when', 'the', 'rich', 'get', 'less', 'rich', ',', 'the', 'poor', 'get', 'poorer', '.']


['[MASK]', '##d', 'les', 'riches', 'devi', '##enne', '[MASK]', 'moi', '##ns', 'riches', ',', 'les', '[MASK]', '##vres', 'devi', '##enne', '##nt', 'encore', '[MASK]', 'pau', '##vres', '.']
predicted_word_base[0]: quan
predicted_word_help[0]: quan
sentence[0]: quan
predicted_word_base[6]: ##nt
predicted_word_help[6]: ##nt
sentence[6]: ##nt
predicted_word_base[12]: pau
predicted_word_help[12]: pau
sentence[12]: pau
predicted_word_base[18]: les
predicted_word_help[18]: les
sentence[18]: plus
['quan', '[MASK]', '

In [15]:
#Bad results 2
print(scores.index(-0.25))
study_results(156)

156
translation :  Le 11 septembre en perspective
sentence :  9/11 in Perspective
tokenization of trans. :  ['le', '11', 'sept', '##em', '##bre', 'en', 'perspective']
tokenization of sentence :  ['9', '/', '11', 'in', 'perspective']


['le', '11', 'sept', '##em', '##bre', 'en', '[MASK]']
predicted_word_base[6]: france
predicted_word_help[6]: france
sentence[6]: perspective
['le', '11', 'sept', '##em', '##bre', 'en', 'perspective']
['le', '11', '[MASK]', '##em', '##bre', 'en', 'perspective']
predicted_word_base[2]: sept
predicted_word_help[2]: nov
sentence[2]: sept
['le', '11', 'sept', '[MASK]', '##bre', 'en', 'perspective']
predicted_word_base[3]: ##em
predicted_word_help[3]: ##em
sentence[3]: ##em
['le', '11', 'sept', '##em', '[MASK]', 'en', 'perspective']
predicted_word_base[4]: ##bre
predicted_word_help[4]: ##bre
sentence[4]: ##bre
['le', '11', 'sept', '##em', '##bre', 'en', 'perspective']


In [43]:
#Good results 1
print(np.argmax(scores), max(scores))
study_results(253)

253 0.3333333333333333
translation :  Abba Eban : La Voix d\u0027Israel
sentence :  Abba Eban: The Voice of Israel
tokenization of trans. :  ['ab', '##ba', 'e', '##ban', ':', 'la', 'vo', '##ix', 'd', '\\', 'u', '##00', '##27', '##is', '##rae', '##l']
tokenization of sentence :  ['ab', '##ba', 'e', '##ban', ':', 'the', 'voice', 'of', 'israel']


['[MASK]', '##ba', 'e', '##ban', ':', 'la', '[MASK]', '##ix', 'd', '\\', 'u', '##00', '[MASK]', '##is', '##rae', '##l']
predicted_word_base[0]: e
predicted_word_help[0]: ab
sentence[0]: ab
predicted_word_base[6]: vo
predicted_word_help[6]: vo
sentence[6]: vo
predicted_word_base[12]: ##0
predicted_word_help[12]: ##0
sentence[12]: ##27
['ab', '[MASK]', 'e', '##ban', ':', 'la', 'vo', '[MASK]', 'd', '\\', 'u', '##00', '##27', '[MASK]', '##rae', '##l']
predicted_word_base[1]: -
predicted_word_help[1]: ##ba
sentence[1]: ##ba
predicted_word_base[7]: ##ix
predicted_word_help[7]: ##ix
sentence[7]: ##ix
predicted_word_base[13]: ##z
predicted_word_help[13]

0.3333333333333333

In [42]:
# Good result 2
print(scores.index(0.25))
study_results(90)

90
translation :  Il s’agira d’une caractéristique du consensus de 1945.
sentence :  It was part of the 1945 consensus.
tokenization of trans. :  ['il', 's', '’', 'ag', '##ira', 'd', '’', 'une', 'cara', '##cter', '##ist', '##ique', 'du', 'consensus', 'de', '1945', '.']
tokenization of sentence :  ['it', 'was', 'part', 'of', 'the', '1945', 'consensus', '.']


['il', 's', '’', 'ag', '##ira', 'd', '’', 'une', 'cara', '##cter', '##ist', '##ique', 'du', 'consensus', 'de', '1945', '.']
['il', 's', '’', 'ag', '##ira', 'd', '’', 'une', 'cara', '##cter', '##ist', '##ique', 'du', '[MASK]', 'de', '1945', '.']
predicted_word_base[13]: year
predicted_word_help[13]: year
sentence[13]: consensus
['il', 's', '’', 'ag', '##ira', 'd', '’', 'une', '[MASK]', '##cter', '##ist', '##ique', 'du', 'consensus', 'de', '1945', '.']
predicted_word_base[8]: ba
predicted_word_help[8]: cara
sentence[8]: cara
['il', 's', '’', '[MASK]', '##ira', 'd', '’', 'une', 'cara', '[MASK]', '##ist', '##ique', 'du', 'consensus', '

0.25

In [45]:
# Null result
print(scores.index(0.0))
study_results(0)

0
translation :  L’or à 10.000 dollars l’once ?
sentence :  $10,000 Gold?
tokenization of trans. :  ['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
tokenization of sentence :  ['$', '10', ',', '000', 'gold', '?']


['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
['l', '’', 'or', 'a', '10', '.', '000', '[MASK]', 'l', '’', 'once', '?']
predicted_word_base[7]: -
predicted_word_help[7]: gold
sentence[7]: dollars
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', '[MASK]', '?']
predicted_word_base[10]: ##y
predicted_word_help[10]: ##y
sentence[10]: once
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']


0.0

In [19]:
# Low score 1
study_results(2)

translation :  Et aujourd’hui, alors que le cours de l’or a augmenté de 300 pour cent au cours de la dernière décennie, c’est plus difficile que jamais.
sentence :  Lately, with gold prices up more than 300% over the last decade, it is harder than ever.
tokenization of trans. :  ['et', 'au', '##jou', '##rd', '’', 'hui', ',', 'al', '##ors', 'que', 'le', 'co', '##urs', 'de', 'l', '’', 'or', 'a', 'aug', '##ment', '##e', 'de', '300', 'pour', 'cent', 'au', 'co', '##urs', 'de', 'la', 'der', '##nier', '##e', 'dec', '##en', '##nie', ',', 'c', '’', 'est', 'plus', 'di', '##ffi', '##ci', '##le', 'que', 'jam', '##ais', '.']
tokenization of sentence :  ['lately', ',', 'with', 'gold', 'prices', 'up', 'more', 'than', '300', '%', 'over', 'the', 'last', 'decade', ',', 'it', 'is', 'harder', 'than', 'ever', '.']


['et', 'au', '##jou', '##rd', '’', 'hui', ',', 'al', '##ors', 'que', 'le', 'co', '[MASK]', 'de', 'l', '’', 'or', 'a', '[MASK]', '##ment', '##e', 'de', '300', 'pour', '[MASK]', 'au', 'co', '##ur

In [50]:
# Low score 2
study_results(1)

translation :  SAN FRANCISCO – Il n’a jamais été facile d’avoir une discussion rationnelle sur la valeur du métal jaune.
sentence :  SAN FRANCISCO – It has never been easy to have a rational conversation about the value of gold.
tokenization of trans. :  ['san', 'francisco', '–', 'il', 'n', '’', 'a', 'jam', '##ais', 'et', '##e', 'fa', '##ci', '##le', 'd', '’', 'av', '##oir', 'une', 'discussion', 'ratio', '##nne', '##lle', 'sur', 'la', 'vale', '##ur', 'du', 'metal', 'ja', '##une', '.']
tokenization of sentence :  ['san', 'francisco', '–', 'it', 'has', 'never', 'been', 'easy', 'to', 'have', 'a', 'rational', 'conversation', 'about', 'the', 'value', 'of', 'gold', '.']


['san', 'francisco', '–', 'il', 'n', '’', 'a', 'jam', '##ais', 'et', '##e', 'fa', '[MASK]', '##le', 'd', '’', 'av', '##oir', 'une', 'discussion', 'ratio', '##nne', '##lle', 'sur', 'la', 'vale', '##ur', 'du', 'metal', 'ja', '[MASK]', '.']
predicted_word_base[12]: ##ci
predicted_word_help[12]: ##ci
sentence[12]: ##ci
predicte

0.05263157894736842

In [20]:
model2 = BertForMaskedLM.from_pretrained('bert-base-multilingual-uncased').to(DEVICE)
tokenizer2 = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [21]:
multi_scores = [BLANC_help(translation, sentence, model2, tokenizer2, device = DEVICE)
          for translation, sentence in zip(translations, sentences)]

['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
['l', '’', 'or', 'a', '10', '.', '000', '[MASK]', 'l', '’', 'once', '?']
predicted_word_base[7]: .
predicted_word_help[7]: gold
sentence[7]: dollars
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', '[MASK]', '?']
predicted_word_base[10]: .
predicted_word_help[10]: gold
sentence[10]: once
['l', '’', 'or', 'a', '10', '.', '000', 'dollars', 'l', '’', 'once', '?']
['san', 'francisco', '–', 'il', 'n', '’', 'a', 'jam', '##ais', 'et', '##e', 'fa', '[MASK]', '##le', 'd', '’', 'av', '##oir', 'une', 'discussion', 'ratio', '##nne', '##lle', 'sur', 'la', 'vale', '##ur', 'du', 'metal', 'ja', '[MASK]', '.']
predicted_word_base[12]: ##uvel
predicted_word_help[12]: ##uvel
sentence[12]: ##ci
predicted_word_base[30]: .
predicted_word_help[30]: ##lon
sentence[30]: ##une
['sa

In [22]:
for i in range(len(multi_scores)):
    print(f"{i} : {multi_scores[i]}, {scores[i]}, {multi_scores[i] == scores[i]}")

0 : 0.0, 0.0, True
1 : 0.05263157894736842, 0.05263157894736842, True
2 : 0.037037037037037035, 0.037037037037037035, True
3 : 0.15789473684210525, 0.03508771929824561, False
4 : 0.0, 0.0, True
5 : -0.034482758620689655, 0.0, False
6 : -0.037037037037037035, 0.0, False
7 : -0.05555555555555555, -0.05555555555555555, True
8 : 0.023255813953488372, 0.046511627906976744, False
9 : 0.09090909090909091, 0.0, False
10 : 0.0, 0.06666666666666667, False
11 : 0.0, 0.05, False
12 : 0.030303030303030304, 0.0, False
13 : 0.047619047619047616, 0.09523809523809523, False
14 : 0.0, 0.043478260869565216, False
15 : 0.0, 0.0, True
16 : 0.07692307692307693, 0.0, False
17 : 0.09090909090909091, 0.0, False
18 : 0.0, -0.041666666666666664, False
19 : 0.043478260869565216, 0.0, False
20 : 0.03225806451612903, 0.0, False
21 : 0.06060606060606061, 0.10606060606060606, False
22 : 0.13636363636363635, -0.045454545454545456, False
23 : 0.0, 0.09090909090909091, False
24 : 0.0, 0.047619047619047616, False
25 : -0

In [None]:
# significant improvement on low scores
study_results(3)
print('\n')
study_results(3, model2, tokenizer2)

In [14]:
data = {}
data['BLANC_help_300_translation'] = scores

import json

def add_results_to_json(new_data, file_path = "./results.json"):
    try:
        with open(file_path, 'r') as json_file:
            existing_data = json.load(json_file)
    except FileNotFoundError:
        existing_data = {}

    for key, value in new_data.items():
        existing_data[key] = value

    with open(file_path, 'w') as json_file:
        json.dump(existing_data, json_file, indent=2)

    print(f"Data has been added to {file_path}")

add_results_to_json(data)

Data has been added to ./results.json


In [None]:
import json

def change_key_name(json_file_path, old_key, new_key):
    try:
        # Load existing data from the JSON file
        with open(json_file_path, 'r') as json_file:
            existing_data = json.load(json_file)
    except FileNotFoundError:
        print(f"The file {json_file_path} does not exist.")
        return

    # Check if the old key exists in the data
    if old_key in existing_data:
        # Create a new key with the desired name
        existing_data[new_key] = existing_data.pop(old_key)

        # Write the updated data back to the JSON file
        with open(json_file_path, 'w') as json_file:
            json.dump(existing_data, json_file, indent=2)

        print(f"The key '{old_key}' has been changed to '{new_key}' in {json_file_path}")
    else:
        print(f"The key '{old_key}' does not exist in the data.")

# Example usage:
json_file_path = "./results.json"
old_key_name = "BLANC_help_unbatched_300"
new_key_name = "BLANC_help_300"

change_key_name(json_file_path, old_key_name, new_key_name)


In [51]:
import json

def remove_key_from_json(file_path, key_to_remove):
    try:
        # Load existing data from the JSON file
        with open(file_path, 'r') as json_file:
            existing_data = json.load(json_file)
    except FileNotFoundError:
        print(f"The file {file_path} does not exist.")
        return

    # Check if the key exists in the data
    if key_to_remove in existing_data:
        # Remove the key
        del existing_data[key_to_remove]

        # Write the updated data back to the JSON file
        with open(file_path, 'w') as json_file:
            json.dump(existing_data, json_file, indent=2)

        print(f"The key '{key_to_remove}' has been removed from {file_path}")
    else:
        print(f"The key '{key_to_remove}' does not exist in the data.")

# Example usage:
json_file_path = "./results.json"
key_to_remove = "BLANC_help_300_translation"

remove_key_from_json(json_file_path, key_to_remove)


The key 'BLANC_help_300_translation' has been removed from ./results.json
