<a href="https://colab.research.google.com/github/Emili0-0/bibcheck/blob/main/bib.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Abreviates journal names.** Upload journallist.txt and the bib file, e.g., mybib.bib. It will generate a new file mybib_modified.bib containing the abbreviated journal names.

In [None]:
import re
import os

# Function to add periods to abbreviations (except for single letters at the end)
def add_periods_to_abbreviation(abbreviation):
    words = abbreviation.split()  # Split into individual words
    processed_words = []
    num_words = len(words)

    for i, word in enumerate(words):
        if len(word) == 1 and i != num_words - 1:  # Single letter AND not the last word
            processed_words.append(word + ".")  # Add a period
        elif len(word) > 1:  # Add a period to words longer than one character
            processed_words.append(word + ".")
        else:  # Single letter at the end
            processed_words.append(word)  # Leave unchanged
    return " ".join(processed_words)  # Join the words back together

# Ask the user for the input .bib file
while True:
    bib_file = input("Enter the path to the .bib file (e.g., Qsex.bib): ").strip()
    if not bib_file.endswith(".bib"):
        print("❌ Please provide a valid .bib file (must end with '.bib').")
    elif not os.path.isfile(bib_file):
        print(f"❌ The file '{bib_file}' does not exist.")
    else:
        break  # Valid input, exit the loop

# Ask the user for the journal list file
while True:
    journal_list_file = input("Enter the path to the journal list file (e.g., journallist.txt): ").strip()
    if not os.path.isfile(journal_list_file):
        print(f"❌ The file '{journal_list_file}' does not exist.")
    else:
        break  # Valid input, exit the loop

# Load journal abbreviations from the journal list file
journal_map = {}

with open(journal_list_file, "r", encoding="utf-8") as file:
    for line in file:
        line = line.strip()
        if "-->" in line:
            parts = line.split("-->")
            if len(parts) == 2:
                original = parts[0].strip().rstrip(".")  # Remove trailing period
                abbreviated = parts[1].strip()  # Remove leading/trailing spaces in the abbreviation
                # Add periods to the abbreviation (except for single letters at the end)
                abbreviated = add_periods_to_abbreviation(abbreviated)
                # Store the original name in lowercase for case-insensitive matching
                journal_map[original.lower()] = abbreviated

# Read the .bib file
with open(bib_file, "r", encoding="utf-8") as file:
    bib_content = file.read()

# Replace journal names in the .bib file
replacements_made = 0

# Precompile a regex pattern to match all journal entries
journal_pattern = re.compile(r'journal\s*=\s*\{([^}]+)\}', re.IGNORECASE)

# Set to store encountered journal names in the .bib file
encountered_journals = set()

# Function to replace journal names (case-insensitive)
def replace_journal(match):
    global replacements_made
    journal_name = match.group(1).strip()  # Extract the journal name from the match
    journal_name_lower = journal_name.lower()  # Convert to lowercase for case-insensitive lookup

    # Track encountered journal names
    encountered_journals.add(journal_name_lower)

    if journal_name_lower in journal_map:
        replacements_made += 1
        print(f"✅ Replaced '{journal_name}' → '{journal_map[journal_name_lower]}'")
        return f'journal={{{journal_map[journal_name_lower]}}}'  # Return the replacement
    else:
        return match.group(0)  # Return the original match if no replacement is found

# Perform replacements in a single pass
new_bib_content = journal_pattern.sub(replace_journal, bib_content)

# Determine which journals were not found in the journal list
not_found_journals = encountered_journals - set(journal_map.keys())

# Generate the output file name by appending "_modified" to the input file name
output_file = bib_file.replace(".bib", "_modified.bib")

# Save the modified .bib file
with open(output_file, "w", encoding="utf-8") as file:
    file.write(new_bib_content)

if replacements_made > 0:
    print(f"✅ Replacement done! {replacements_made} replacements made. Check '{output_file}'.")
else:
    print("❌ No replacements were made. Double-check the input files.")

# Print journals that were not found in the journal list
if not_found_journals:
    print("\n❌ The following journals were not found in the journal list:")
    for journal in not_found_journals:
        print(f" - {journal}")
else:
    print("\n✅ All journals in the .bib file were found in the journal list.")

Enter the path to the .bib file (e.g., Qsex.bib): EdgeDetection.bib
Enter the path to the journal list file (e.g., journallist.txt): journallist.txt
✅ Replaced 'Nature' → 'Nature.'
✅ Replaced 'Science' → 'Science.'
✅ Replaced 'Nature' → 'Nature.'
✅ Replaced 'SIAM Review' → 'SIAM. Rev.'
✅ Replacement done! 4 replacements made. Check 'EdgeDetection_modified.bib'.

❌ The following journals were not found in the journal list:
 - ieee t. infor. theory
 - rep. prog. phys.
 - phys. rev. a
 - ieee pami
 - phys. rev. x
 - ieee trans. pattern anal. mach. intell.
 - quantum
 - j elec electron syst
 - rev. mod. phys.
 - pattern recognit.
 - nat. commun.
 - mach. learn.: sci. technol.
 - j. stat. phys.
 - phys. rev. lett.
 - j. mod. opt.
 - epl
 - comput. sci. rev.


In [None]:
import re
import os

def extract_bibunit_environments(input_file):
    # Read the content of the input file
    with open(input_file, 'r', encoding='utf-8') as file:
        content = file.read()

    # Use regex to find all bibunit environments
    bibunits = re.findall(r'\\begin\{bibunit\}.*?\\end\{bibunit\}', content, re.DOTALL)

    print(bibunits)

    # Ensure there are at least two bibunit environments
    if len(bibunits) < 2:
        raise ValueError("The file must contain at least two bibunit environments.")

    # Extract the preamble (everything before the first bibunit environment)
    preamble_end = content.find(r'\begin{bibunit}')
    preamble = content[:preamble_end]  # Everything before the first bibunit

    # Remove the hyperlinks and references section from the preamble
    hyperlinks_section = r'% ------  HYPERLINKS AND REFERENCES ------\s*\\makeatletter.*?\\makeatother'
    preamble = re.sub(hyperlinks_section, '', preamble, flags=re.DOTALL)

    # Process each bibunit environment
    for i, bibunit in enumerate(bibunits):

        # Remove \end{bibunit}
        bibunit = re.sub(r'\\end\{bibunit\}', '', bibunit)

        # Replace \putbib with \bibliography{...}
        default_bib_match = re.search(r'\\defaultbibliography\{(.*?)\}', content)
        if default_bib_match:
            default_bib_content = default_bib_match.group(1)
            bibunit = re.sub(r'\\putbib', r'\\bibliography{' + default_bib_content + '}', bibunit)

        # Extract the bibunit style (content of \begin{bibunit}[...])
        bibunit_style_match = re.search(r'\\begin\{bibunit\}\[(.*?)\]', bibunit)
        if bibunit_style_match:
            bibunit_style = bibunit_style_match.group(1)
            # Add \bibliographystyle{...} after \bibliography{...}
            bibunit = re.sub(r'\\bibliography\{.*?\}', r'\\bibliography{' + default_bib_content + r'}\n\\bibliographystyle{' + bibunit_style + r'}', bibunit)

        # Remove \begin{bibunit}[...]
        bibunit = re.sub(r'\\begin\{bibunit\}\[.*?\]', '', bibunit)

        # Save the processed bibunit
        bibunits[i] = bibunit

         # Remove \defaultbibliography{...}
    preamble = re.sub(r'\\defaultbibliography\{.*?\}', '', preamble, flags=re.DOTALL)

    # Get the base name of the input file (without extension)
    base_name = os.path.splitext(input_file)[0]

    # Write myfile_main.tex (preamble + first bibunit environment + \end{document})
    with open(f'{base_name}_main.tex', 'w', encoding='utf-8') as file:
        file.write(preamble + bibunits[0] + r'\end{document}')

    # Add \externaldocument[][nocite]{myfile_main} to myfile_suppl.tex
    suppl_content = preamble + bibunits[1] + r'\end{document}'
    suppl_content = suppl_content.replace(r'\begin{document}', r'\begin{document}' + '\n' + r'\externaldocument[][nocite]{' + base_name + '_main}')

    # Add \usepackage{xr-hyper} right after \usepackage{hyperref} in myfile_suppl.tex
    suppl_content = suppl_content.replace(r'\usepackage{hyperref}', r'\usepackage{hyperref}' + '\n' + r'\usepackage{xr-hyper}')

    # Write myfile_suppl.tex (preamble + second bibunit environment + \end{document})
    with open(f'{base_name}_suppl.tex', 'w', encoding='utf-8') as file:
        file.write(suppl_content)

    print(f"Files {base_name}_main.tex and {base_name}_suppl.tex have been created successfully.")


# Ask the user for the input file
input_file = input("Enter the name of the .tex file to split (e.g., myfile.tex): ")

# Call the function
extract_bibunit_environments(input_file)

Enter the name of the .tex file to split (e.g., myfile.tex): MyPRL_multbib.tex
['\\begin{bibunit}[prsty]\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%\n%                                            MAIN TEXT START\n%\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n\\title{{\\fontfamily{ptm}\\selectfont The title of my PRL}}\n\\author{Author1$^{1}$, Author2$^{1}$, Author3$^{1}$, Auther4$^{1,2}$, and Author5$^{1,3}$}\n\n\\affiliation{$^{1}$AffiliationA\n}\n\\affiliation{$^{2}$AffiliationB\n}\n\\affiliation{$^{3}$AffiliationC\n}\n\n\\begin{abstract}  \n\\ipsum[1]\n\\end{abstract}  \n\n\\pacs{03.67.-a, 03.65.Ta,42.50.-p }\n\\maketitle\n\n\\phantomsection\n\\textit{Introduction}.\nMy first citation~\\cite{citation6}  \n\nMy second citation~\\cite{citation2} \n%\n\\begin{equation}\nE=mc^2\n\\label{eq1}\n\\end{equation}\n%\nMy third citation~\\cite{citation3}. \n\\ipsum[2]\n%\n\\begin{equation}\nE^2-c^2|\\bolds

**Splits a document with two bibunits** into a two: the main text and the supplemental

In [None]:
import re
import os

def extract_bibunit_environments(input_file):
    # Read the content of the input file
    with open(input_file, 'r', encoding='utf-8') as file:
        content = file.read()

    # Use regex to find all bibunit environments
    bibunits = re.findall(r'\\begin\{bibunit\}.*?\\end\{bibunit\}', content, re.DOTALL)

    # Ensure there are at least two bibunit environments
    if len(bibunits) < 2:
        raise ValueError("The file must contain at least two bibunit environments.")

    # Extract the preamble (everything before the first bibunit environment)
    preamble_end = content.find(r'\begin{bibunit}')
    preamble = content[:preamble_end]  # Everything before the first bibunit

    # Remove the hyperlinks and references section from the preamble
    hyperlinks_section = r'% ------  HYPERLINKS AND REFERENCES ------\s*\\makeatletter.*?\\makeatother'
    preamble = re.sub(hyperlinks_section, '', preamble, flags=re.DOTALL)

    # Process each bibunit environment
    for i, bibunit in enumerate(bibunits):

        # Remove \end{bibunit}
        bibunit = re.sub(r'\\end\{bibunit\}', '', bibunit)

        # Replace \putbib with \bibliography{...}
        default_bib_match = re.search(r'\\defaultbibliography\{(.*?)\}', content)
        if default_bib_match:
            default_bib_content = default_bib_match.group(1)
            bibunit = re.sub(r'\\putbib', r'\\bibliography{' + default_bib_content + '}', bibunit)

        # Extract the bibunit style (content of \begin{bibunit}[...])
        bibunit_style_match = re.search(r'\\begin\{bibunit\}\[(.*?)\]', bibunit)
        if bibunit_style_match:
            bibunit_style = bibunit_style_match.group(1)
            # Add \bibliographystyle{...} after \bibliography{...}
            bibunit = re.sub(r'\\bibliography\{.*?\}', r'\\bibliography{' + default_bib_content + r'}\n\\bibliographystyle{' + bibunit_style + r'}', bibunit)

        # Remove \begin{bibunit}[...]
        bibunit = re.sub(r'\\begin\{bibunit\}\[.*?\]', '', bibunit)

        # Remove % !TEX TS-program = multbib
        bibunit = re.sub(r'% !TEX TS-program = multbib', '', bibunit)

        # Save the processed bibunit
        bibunits[i] = bibunit

        # Remove \defaultbibliography{...}
        preamble = re.sub(r'\\defaultbibliography\{.*?\}', '', preamble, flags=re.DOTALL)

        # Remove % !TEX TS-program = multbib
        preamble = re.sub(r'% !TEX TS-program = multbib', '', preamble)

    # Get the base name of the input file (without extension)
    base_name = os.path.splitext(input_file)[0]

    # Write myfile_main.tex (preamble + first bibunit environment + \end{document})
    with open(f'{base_name}_main.tex', 'w', encoding='utf-8') as file:
        file.write(preamble + bibunits[0] + r'\end{document}')

    # Add \externaldocument[][nocite]{myfile_main} to myfile_suppl.tex
    suppl_content = preamble + bibunits[1] + r'\end{document}'
    suppl_content = suppl_content.replace(r'\begin{document}', r'\begin{document}' + '\n' + r'\externaldocument[][nocite]{' + base_name + '_main}')

    # Add \usepackage{xr-hyper} right after \usepackage{hyperref} in myfile_suppl.tex
    suppl_content = suppl_content.replace(r'\usepackage{hyperref}', r'\usepackage{hyperref}' + '\n' + r'\usepackage{xr-hyper}')

    # Write myfile_suppl.tex (preamble + second bibunit environment + \end{document})
    with open(f'{base_name}_suppl.tex', 'w', encoding='utf-8') as file:
        file.write(suppl_content)

    print(f"Files {base_name}_main.tex and {base_name}_suppl.tex have been created successfully.")


# Ask the user for the input file
input_file = input("Enter the name of the .tex file to split (e.g., myfile.tex): ")

# Call the function
extract_bibunit_environments(input_file)

Enter the name of the .tex file to split (e.g., myfile.tex): MyPRL_multbib.tex
Files MyPRL_multbib_main.tex and MyPRL_multbib_suppl.tex have been created successfully.


New abbreviation file

In [None]:
import re
import os

# Ask the user for the input .bib file
while True:
    bib_file = input("Enter the path to the .bib file (e.g., Qsex.bib): ").strip()
    if not bib_file.endswith(".bib"):
        print("❌ Please provide a valid .bib file (must end with '.bib').")
    elif not os.path.isfile(bib_file):
        print(f"❌ The file '{bib_file}' does not exist.")
    else:
        break  # Valid input, exit the loop

# Ask the user for the journal list file
while True:
    journal_list_file = input("Enter the path to the journal list file (e.g., journallist.txt): ").strip()
    if not os.path.isfile(journal_list_file):
        print(f"❌ The file '{journal_list_file}' does not exist.")
    else:
        break  # Valid input, exit the loop

# Load journal abbreviations from the journal list file
journal_map = {}

with open(journal_list_file, "r", encoding="utf-8") as file:
    for line in file:
        line = line.strip()
        if "-->" in line:
            parts = line.split("-->")
            if len(parts) == 2:
                original = parts[0].strip().rstrip(".")  # Remove trailing period
                abbreviated = parts[1].strip()  # Remove leading/trailing spaces in the abbreviation
                # Add periods to the abbreviation (except for single letters at the end)
#               abbreviated = add_periods_to_abbreviation(abbreviated)
                # Store the original name in lowercase for case-insensitive matching
                journal_map[original.lower()] = abbreviated

# Read the .bib file
with open(bib_file, "r", encoding="utf-8") as file:
    bib_content = file.read()

# Replace journal names in the .bib file
replacements_made = 0

# Precompile a regex pattern to match all journal entries
journal_pattern = re.compile(r'journal\s*=\s*\{([^}]+)\}', re.IGNORECASE)

# Set to store encountered journal names in the .bib file
encountered_journals = set()

# Function to replace journal names (case-insensitive)
def replace_journal(match):
    global replacements_made
    journal_name = match.group(1).strip()  # Extract the journal name from the match
    journal_name_lower = journal_name.lower()  # Convert to lowercase for case-insensitive lookup

    # Track encountered journal names
    encountered_journals.add(journal_name_lower)

    if journal_name_lower in journal_map:
        replacements_made += 1
        print(f"✅ Replaced '{journal_name}' → '{journal_map[journal_name_lower]}'")
        return f'journal={{{journal_map[journal_name_lower]}}}'  # Return the replacement
    else:
        return match.group(0)  # Return the original match if no replacement is found

# Perform replacements in a single pass
new_bib_content = journal_pattern.sub(replace_journal, bib_content)

# Determine which journals were not found in the journal list
not_found_journals = encountered_journals - set(journal_map.keys())

# Generate the output file name by appending "_modified" to the input file name
output_file = bib_file.replace(".bib", "_modified.bib")

# Save the modified .bib file
with open(output_file, "w", encoding="utf-8") as file:
    file.write(new_bib_content)

if replacements_made > 0:
    print(f"✅ Replacement done! {replacements_made} replacements made. Check '{output_file}'.")
else:
    print("❌ No replacements were made. Double-check the input files.")

# Print journals that were not found in the journal list
if not_found_journals:
    print("\n❌ The following journals were not found in the journal list:")
    for journal in not_found_journals:
        print(f" - {journal}")
else:
    print("\n✅ All journals in the .bib file were found in the journal list.")

Enter the path to the .bib file (e.g., Qsex.bib): Qsex.bib
Enter the path to the journal list file (e.g., journallist.txt): journal_abbreviations_geology_physics.txt
✅ Replaced 'Physical Review A' → 'Phys. Rev. A'
✅ Replaced 'Journal of Physics A: Mathematical and Theoretical' → 'J. Phys. A: Math. Theor.'
✅ Replaced 'Physical Review A' → 'Phys. Rev. A'
✅ Replaced 'Physical Review A' → 'Phys. Rev. A'
✅ Replaced 'Nature Physics' → 'Nat. Phys.'
✅ Replaced 'Physical Review Letters' → 'Phys. Rev. Lett.'
✅ Replaced 'Physical Review Letters' → 'Phys. Rev. Lett.'
✅ Replaced 'Physical Review A' → 'Phys. Rev. A'
✅ Replaced 'Physical Review Research' → 'Phys. Rev. Research'
✅ Replaced 'Physical Review A' → 'Phys. Rev. A'
✅ Replaced 'Physical Review A' → 'Phys. Rev. A'
✅ Replaced 'Letters in Mathematical Physics' → 'Lett. Math. Phys.'
✅ Replaced 'Physical Review Research' → 'Phys. Rev. Research'
✅ Replaced 'Contemporary Physics' → 'Contemp. Phys.'
✅ Replaced 'Advances in Optics and Photonics' → 'A