In [1]:
import pandas as pd
from pybtex.database.input.bibtex import Parser
import re
import PyPDF2
import tabula

In [2]:
# Define relative file paths
pdf_path_abb = "../data/raw/Journal-Abbreviation.pdf"
if_path = "../data/raw/ImpactFactor2024.xlsx"
sheet_name = "2024最新完整版IF"

# Load Impact Factor data from the Excel file
if_2024 = pd.read_excel(if_path, sheet_name=sheet_name)

# Display first rows for initial verification
if_2024.head()


Unnamed: 0,Name,Abbr Name,ISSN,EISSN,JIF,JIF5Years,Category
0,CA-A CANCER JOURNAL FOR CLINICIANS,CA-CANCER J CLIN,0007-9235,1542-4863,503.1,297.0,ONCOLOGY|Q1|1/322
1,NATURE REVIEWS DRUG DISCOVERY,NAT REV DRUG DISCOV,1474-1776,1474-1784,122.7,114.9,PHARMACOLOGY & PHARMACY|Q1|1/354
2,LANCET,LANCET,0140-6736,1474-547X,98.4,106.9,"MEDICINE, GENERAL & INTERNAL|Q1|1/325"
3,NEW ENGLAND JOURNAL OF MEDICINE,NEW ENGL J MED,0028-4793,1533-4406,96.2,94.3,"MEDICINE, GENERAL & INTERNAL|Q1|2/325"
4,BMJ-British Medical Journal,BMJ-BRIT MED J,0959-535X,1756-1833,93.6,69.9,"MEDICINE, GENERAL & INTERNAL|Q1|3/325"


In [3]:
# Ensure the 'Category' column exists before splitting
if 'Category' in if_2024.columns:
    # Split the 'Category' column into three new columns
    if_2024[['Domain', 'Area', 'Rank']] = if_2024['Category'].str.split('|', expand=True)
else:
    print("Warning: 'Category' column not found in the DataFrame.")
# Display the original and newly created columns to verify the split
if_2024[['Category', 'Domain', 'Area', 'Rank']].head()
# Drop the original 'Category' column from the DataFrame
if_2024 = if_2024.drop(columns=['Category'], errors='ignore')

# Display the first few rows to confirm removal
if_2024.head()


Unnamed: 0,Name,Abbr Name,ISSN,EISSN,JIF,JIF5Years,Domain,Area,Rank
0,CA-A CANCER JOURNAL FOR CLINICIANS,CA-CANCER J CLIN,0007-9235,1542-4863,503.1,297.0,ONCOLOGY,Q1,1/322
1,NATURE REVIEWS DRUG DISCOVERY,NAT REV DRUG DISCOV,1474-1776,1474-1784,122.7,114.9,PHARMACOLOGY & PHARMACY,Q1,1/354
2,LANCET,LANCET,0140-6736,1474-547X,98.4,106.9,"MEDICINE, GENERAL & INTERNAL",Q1,1/325
3,NEW ENGLAND JOURNAL OF MEDICINE,NEW ENGL J MED,0028-4793,1533-4406,96.2,94.3,"MEDICINE, GENERAL & INTERNAL",Q1,2/325
4,BMJ-British Medical Journal,BMJ-BRIT MED J,0959-535X,1756-1833,93.6,69.9,"MEDICINE, GENERAL & INTERNAL",Q1,3/325


In [10]:
# Define the path to your text file
text_file_path = "ABB.csv"
abb = pd.read_csv(text_file_path)

In [11]:
abb.head()

Unnamed: 0,Journal Name,ISO 4 abbreviation
0,2D Materials,2D Mater.
1,3 Biotech,3 Biotech
2,3D Printing and Additive Manufacturing,3D Print. Addit. Manuf.
3,3D Printing in Medicine,3D Print. Med.
4,Astronomy and Astrophysics,A & A


In [12]:
# Function to split the text into a list of tuples containing the journal name and abbreviation
def split_journal_text(text):
    # Split the text by lines and remove the first three lines which are headers
    lines = text.strip().split('\n')[3:]
    # Initialize list to hold journal data
    journal_data = []
    # Pattern to match the journal names and abbreviations
    # This pattern assumes that the abbreviation is always uppercase letters followed by a period
    pattern = re.compile(r'^(.+?)\s+([A-Z]+\s?[A-Z]*\.)(?=\s|$)')
    for line in lines:
        match = pattern.search(line)
        if match:
            # Append the journal name and abbreviation as a tuple
            journal_data.append((match.group(1).strip(), match.group(2).strip()))
    return journal_data

# Split the text and create a DataFrame

journal_data = split_journal_text(pdf_text_path_abb_content_str)
journal_df = pd.DataFrame(journal_data, columns=['Journal Name', 'Abbreviation'])

# Display the DataFrame
print(journal_df)

NameError: name 'pdf_text_path_abb_content_str' is not defined

In [61]:
# Function to normalize journal names
def normalize_journal_name(name):
    if isinstance(name, str):  # Check if the value is a string
        # Convert to lowercase
        name = name.lower()
        # Remove 'the ' if it's at the start
        if name.startswith('the '):
            name = name[4:]
        # Replace hyphens with spaces or remove them
        name = name.replace('-', ' ')
        name = name.replace(':', ' ')
        name = re.sub(r'\s+', ' ', name)  # Replace multiple spaces with a single space
        # Replace '&' with 'and'
        name = name.replace('&', 'and')
        return name.strip()
    return name  # Return as-is if not a string

In [62]:
if_2024.head()

Unnamed: 0,Name,Abbr Name,ISSN,EISSN,JIF,JIF5Years,Domain,Area,Rank,Normalized Journal
0,ca-a cancer journal for clinicians,CA-CANCER J CLIN,0007-9235,1542-4863,503.1,297.0,ONCOLOGY,Q1,1/322,ca a cancer journal for clinicians
1,nature reviews drug discovery,NAT REV DRUG DISCOV,1474-1776,1474-1784,122.7,114.9,PHARMACOLOGY & PHARMACY,Q1,1/354,nature reviews drug discovery
2,lancet,LANCET,0140-6736,1474-547X,98.4,106.9,"MEDICINE, GENERAL & INTERNAL",Q1,1/325,lancet
3,new england journal of medicine,NEW ENGL J MED,0028-4793,1533-4406,96.2,94.3,"MEDICINE, GENERAL & INTERNAL",Q1,2/325,new england journal of medicine
4,bmj-british medical journal,BMJ-BRIT MED J,0959-535X,1756-1833,93.6,69.9,"MEDICINE, GENERAL & INTERNAL",Q1,3/325,bmj british medical journal


In [63]:
# Convert all journal names to lowercase for case-insensitive comparison

if_2024['Name'] = if_2024['Name'].str.lower()
# Apply the normalization function to the 'Journal' column of both DataFrames
if_2024['Normalized Journal'] = if_2024['Name'].apply(normalize_journal_name)

# Create a dictionary from impact_df for easy lookup
impact_dict = if_2024.set_index('Normalized Journal')['JIF'].to_dict()


impact_factors = {
    'materials science and engineering c': 8.3,
    'applied surface science advances': 6.2,
    'european biophysics journal': 2.0,
    'nanotechnologies in russia': 0.626
}

merged_impact_factors = {**impact_factors, **impact_dict}

In [64]:
type(merged_impact_factors)

dict

In [65]:
abb.head()

Unnamed: 0,Journal Name,ISO 4 abbreviation
0,2D Materials,2D Mater.
1,3 Biotech,3 Biotech
2,3D Printing and Additive Manufacturing,3D Print. Addit. Manuf.
3,3D Printing in Medicine,3D Print. Med.
4,Astronomy and Astrophysics,A & A


In [66]:
abb['Normalized Journal'] = abb['Journal Name'].apply(normalize_journal_name)

In [68]:
# Prepare lists to collect the required data
entries_list = []

# Process each entry in the .bib file
for key, entry in bib_data.entries.items():
    # Each entry is a dictionary with values that are pybtex objects; we convert them to strings here
    authors = [str(person) for person in entry.persons.get('author', [])]
    title = entry.fields.get('title', '')
    journal = entry.fields.get('journal', '')
    volume = entry.fields.get('volume', '')
    year = entry.fields.get('year', '')
    pages = entry.fields.get('pages', '')
    doi = entry.fields.get('doi', '')
    note = entry.fields.get('note', '')

    # Detect if the entry has a cover page or impact factor mentioned in the note
    has_cover = 'cover' in note.lower()
    impact_factor = ''
    if 'IF' in note:
        try:
            # Attempt to extract the impact factor value
            impact_factor = note.split('IF')[1].strip().strip(')').strip()
        except IndexError:
            impact_factor = ''

    # Append to the entries list
    entries_list.append({
        'Authors': authors,
        'Title': title,
        'Journal': journal,
        'Volume': volume,
        'Year': year,
        'Pages': pages,
        'DOI': doi,
        'Has Cover': has_cover,
        'Impact Factor': impact_factor
    })

# Convert the list to a DataFrame
bib_df = pd.DataFrame(entries_list)

bib_df.head(10)


Unnamed: 0,Authors,Title,Journal,Volume,Year,Pages,DOI,Has Cover,Impact Factor
0,"[Chatzakou, Marianna, Huang, Junqing, Parakhon...",{Performing Particle Image Segmentation on an ...,,,2024,295--304,10.1007/978-3-031-42539-4_33,False,
1,"[Eftekhari, Karaneh, Parakhonskiy, Bogdan V., ...",{Advances in Nanoarchitectonics: A Review of “...,Materials,17.0,2024,1051,10.3390/ma17051051,False,
2,"[Huang, Yanqi, Skirtach, Andre G., Parakhonski...","{Systematic study of stability, loading effici...",Ceramics International,50.0,2024,7469--7479,10.1016/j.ceramint.2023.12.044,False,
3,"[Liu, Yin, Li, Jie, Parakhonskiy, Bogdan V., H...",{Labelling of micro- and nanoplastics for envi...,Journal of Hazardous Materials,462.0,2024,132785,10.1016/j.jhazmat.2023.132785,False,
4,"[Eftekhari, Karaneh, {Van der Meeren}, Louis, ...",{PM2.5 and PM10 adsorption onto filters and su...,Colloids and Surfaces A: Physicochemical and E...,680.0,2024,132617,10.1016/j.colsurfa.2023.132617,False,
5,"[Cao, Lin, Verduijn, Joost, {Van der Meeren}, ...",{Alginate-CaCO3 hybrid colloidal hydrogel with...,International Journal of Biological Macromolec...,259.0,2024,129069,10.1016/j.ijbiomac.2023.129069,False,
6,"[Huang, Yanqi, Spiegeleer, Bart De, Parakhonsk...",{Machine learning insights into CaCO3 phase tr...,Ceramics International,50.0,2024,23284--23295,10.1016/j.ceramint.2024.04.052,False,
7,"[Cao, Lin, Li, Jie, Parakhonskiy, Bogdan, Skir...",{Intestinal-specific oral delivery of lactofer...,Food Chemistry,451.0,2024,139205,10.1016/j.foodchem.2024.139205,False,
8,"[Eftekhari, Karaneh, Danglad-Flores, Jose Ang{...",{Calcium carbonate particle synthesis in a con...,Materials Chemistry and Physics,310.0,2023,128462,10.1016/j.matchemphys.2023.128462,False,
9,"[Garello, Francesca, Svenskaya, Yulia, Parakho...",{On the Road to Precision Medicine: Magnetic S...,Pharmaceutics,15.0,2023,1812,10.3390/pharmaceutics15071812,False,


In [69]:
bib_df['Title']

0      {Performing Particle Image Segmentation on an ...
1      {Advances in Nanoarchitectonics: A Review of “...
2      {Systematic study of stability, loading effici...
3      {Labelling of micro- and nanoplastics for envi...
4      {PM2.5 and PM10 adsorption onto filters and su...
                             ...                        
104    {Polyelectrolyte microcapsules with a shell co...
105    {Permeability adjustment of polyelectrolyte mi...
106    {A study of Laser Irradiation Influence on the...
107    {Preparation of polyelectrolyte microcapsules ...
108    {Formation of Langmuir-Blodgett superlattices ...
Name: Title, Length: 109, dtype: object

In [70]:
# Function to detect Cyrillic text
def is_cyrillic(text):
    return bool(re.search('[\u0400-\u04FF]', text))

# Apply the function to the Authors column to create a mask of rows with Cyrillic text
cyrillic_mask = bib_df['Title'].apply(lambda x: any(is_cyrillic(author) for author in x))

# Invert the mask to filter out the rows with Cyrillic text
bib_df = bib_df[~cyrillic_mask].copy()

In [71]:
bib_df.head()

Unnamed: 0,Authors,Title,Journal,Volume,Year,Pages,DOI,Has Cover,Impact Factor
0,"[Chatzakou, Marianna, Huang, Junqing, Parakhon...",{Performing Particle Image Segmentation on an ...,,,2024,295--304,10.1007/978-3-031-42539-4_33,False,
1,"[Eftekhari, Karaneh, Parakhonskiy, Bogdan V., ...",{Advances in Nanoarchitectonics: A Review of “...,Materials,17.0,2024,1051,10.3390/ma17051051,False,
2,"[Huang, Yanqi, Skirtach, Andre G., Parakhonski...","{Systematic study of stability, loading effici...",Ceramics International,50.0,2024,7469--7479,10.1016/j.ceramint.2023.12.044,False,
3,"[Liu, Yin, Li, Jie, Parakhonskiy, Bogdan V., H...",{Labelling of micro- and nanoplastics for envi...,Journal of Hazardous Materials,462.0,2024,132785,10.1016/j.jhazmat.2023.132785,False,
4,"[Eftekhari, Karaneh, {Van der Meeren}, Louis, ...",{PM2.5 and PM10 adsorption onto filters and su...,Colloids and Surfaces A: Physicochemical and E...,680.0,2024,132617,10.1016/j.colsurfa.2023.132617,False,


In [72]:
bib_df['Normalized Journal'] = bib_df['Journal'].apply(normalize_journal_name)

In [73]:
# Map the Impact Factor to bib_df_no_cyrillic using the normalized names
bib_df['Impact Factor'] = bib_df['Normalized Journal'].map(merged_impact_factors)

# Check which journals don't have an IF after normalization
journals_without_if = bib_df[bib_df['Impact Factor'].isnull()]


In [77]:
def check_author_position(authors_list, name_variations):
    # Check if any of the name variations is present as the first author
    first_author_match = any(variation in authors_list[0] for variation in name_variations)
    # Check if any of the name variations is present as the last author
    last_author_match = any(variation in authors_list[-1] for variation in name_variations)
    return first_author_match, last_author_match

# List of all variations of the name
name_variations = [
    'Parakhonskiy, Bogdan V.',
    'Parakhonskiy, Bogdan',
    'Parakhonskiy, B.V.',
    'Parakhonskiy, B.',
    'Parakhonskiy, B. V.',
    'Parakhonskiy, B',
    'Bogdan V. Parakhonskiy',
    'Parakhonsky, B',
    'Parakhonskiy, Bogdan V'
    # Add any other variations if needed
]

# Applying the check_author_position function to each row in the dataframe
bib_df['Parakhonskiy_first_author'], bib_df['Parakhonskiy_last_author'] = zip(
    *bib_df['Authors'].apply(lambda x: check_author_position(x, name_variations))
)

In [78]:
def check_name_variations_absent(authors_list, name_variations):
    # Check if any of the name variations is present in the authors list
    if not any(variation in authors_list for variation in name_variations):
        return authors_list  # Return the authors list if none of the variations are present
    return None  # Return None if any variation is present



# Apply the function to the dataframe and get the rows where the name variation is not present
absent_authors_lists = bib_df['Authors'].apply(lambda x: check_name_variations_absent(x, name_variations))

# Filter out the None values to get only the rows where the name variation is absent
absent_authors_df = bib_df[absent_authors_lists.notnull()]

In [79]:
absent_authors_df['Authors']

45     [Ivanov, Aleksei, Kurtukova, Mariya, Kozadayev...
108    [Bukreeva, T. V., Dembo, K. A., Myagkov, I. V....
Name: Authors, dtype: object

In [None]:

# Display the updated DataFrame
bib_df[['Authors', 'Parakhonskiy_first_author', 'Parakhonskiy_last_author']]

In [None]:
bib_df.head()

In [None]:
# Convert the 'Year' column to integers
bib_df['Year'] = bib_df['Year'].astype(int)

In [None]:
# Convert 'Year' column to integers
bib_df['Year'] = pd.to_numeric(bib_df['Year'], errors='coerce')

In [None]:
bib_df['Parakhonskiy_last_author'][5]

In [None]:
absent_authors_df

In [None]:
journals_without_if

In [None]:
# Function to calculate the desired statistics
def calculate_author_statistics(df, current_year, years_window):
    # Filter records with DOI
    with_doi = df.dropna(subset=['DOI'])
    
    # Total amount of publications with DOI
    total_with_doi = with_doi.shape[0]
    
    # Total amount of publications with first author
    first_author_count = with_doi['Parakhonskiy_first_author'].sum()
    
    # Total amount of publications with last author
    last_author_count = with_doi['Parakhonskiy_last_author'].sum()
    first_last = first_author_count+last_author_count
    
    # Calculate the percentage of publications where the author is first or last author
    first_author_percent = round((first_author_count / total_with_doi) * 100, 1) if total_with_doi else 0
    last_author_percent = round((last_author_count / total_with_doi) * 100, 1) if total_with_doi else 0
    first_last_percentage = round((first_last/total_with_doi)*100,1) if total_with_doi else 0
    # Now, calculate statistics for the last 'years_window' years
    
    
    recent_df = with_doi[with_doi['Year'] >= (current_year - years_window + 1)]

    
    # Total amount of recent publications with DOI
    recent_total_with_doi = recent_df.shape[0]
    
    # Recent total amount of publications with first author
    recent_first_author_count = recent_df['Parakhonskiy_first_author'].sum()
    
    # Recent total amount of publications with last author
    recent_last_author_count = recent_df['Parakhonskiy_last_author'].sum()
    recent_first_last_author_count = recent_first_author_count+recent_last_author_count
    # Recent percentage of first author publications
    recent_first_author_percent = round((recent_first_author_count / recent_total_with_doi) * 100,1) if recent_total_with_doi else 0
    
    # Recent percentage of last author publications
    recent_last_author_percent = round((recent_last_author_count / recent_total_with_doi) * 100,1) if recent_total_with_doi else 0
    recent_first_last_percentage = round((recent_first_last_author_count/total_with_doi)*100,1) if total_with_doi else 0
    
    # Get counts of publications per journal and their total impact factors
    journal_stats = df.groupby('Normalized Journal').agg({
        'DOI': 'count',
        'Impact Factor': 'first'  # Assuming the impact factor is the same for all rows of the same journal
    }).rename(columns={'DOI': 'count'}).reset_index()

    # Sort the journals by Impact Factor and get the top 5
    top_journals = journal_stats.sort_values(by='Impact Factor', ascending=False).head(5)

    # Filter for publications where Parakhonskiy is first or last author
    parakhonskiy_first_last_df = df[ (df['Parakhonskiy_first_author'] == True) | (df['Parakhonskiy_last_author'] == True) ]
    
     # Get counts of publications per journal where Parakhonskiy is first or last author and their total impact factors
    parakhonskiy_journal_stats = parakhonskiy_first_last_df.groupby('Normalized Journal').agg({
        'DOI': 'count',
        'Impact Factor': 'first'  # Assuming the impact factor is the same for all rows of the same journal
    }).rename(columns={'DOI': 'count'}).reset_index()
    
    
    # Sort the journals by count of publications where Parakhonskiy is first or last author and get the top 5
    top_parakhonskiy_journals = parakhonskiy_journal_stats.sort_values(by='Impact Factor', ascending=False).head(5)
    
    # Compile statistics into a dictionary
    statistics = {
        'total_with_doi': total_with_doi,
        'first_author_count': first_author_count,
        'last_author_count': last_author_count,
        'first_last' : first_last,
        'first_author_percent': first_author_percent,
        'last_author_percent': last_author_percent,
        'first_last_percentage':first_last_percentage,
        'recent_total_with_doi': recent_total_with_doi,
        'recent_first_author_count': int(recent_first_author_count),
        'recent_last_author_count': int(recent_last_author_count),
        'recent_first_last_author_count' : int(recent_first_last_author_count),
        'recent_first_author_percent': recent_first_author_percent,
        'recent_last_author_percent': recent_last_author_percent,
        'recent_first_last_percentage' : recent_first_last_percentage,
        'top_journals': top_journals.to_dict(orient='records'),  # Convert the top 5 journals to a list of dictionaries
        'top_parakhonskiy_journals': top_parakhonskiy_journals.to_dict(orient='records')  # Convert the top 5 journals to a list of dictionaries
    }
    
    return statistics

# Assuming the current year is 2023 and we want statistics for the last 5 years
current_year = 2023
years_window = 5

# Calculate statistics
author_stats = calculate_author_statistics(bib_df, current_year, years_window)
author_stats

In [None]:
def create_author_summary(author_stats, current_year, years_window):
    # Construct the high impact journals string without the year
    high_impact_string = ', '.join([
        f"{journal['Normalized Journal'].title()} (IF {journal['Impact Factor']})"
        for journal in author_stats['top_journals']
    ])

    # Construct the Parakhonskiy journals string
    parakhonskiy_journal_string = ', '.join([
        f"{journal['Normalized Journal'].title()} (IF {journal['Impact Factor']}, {journal['count']} publications)"
        for journal in author_stats['top_parakhonskiy_journals']
    ])

    # Construct the summary string
    summary = (
        f"I have authored {author_stats['total_with_doi']} publications "
        f"({author_stats['recent_total_with_doi']} since {current_year - years_window + 1}), "
        f"including papers in such high impact journals as {high_impact_string}. "
        f"Among these, I am the first or last author on {author_stats['first_last']} papers, "
        f"with {author_stats['recent_first_last_author_count']} of those since {current_year - years_window + 1}. "
        f"I have served as the first author on {author_stats['first_author_percent']:.1f}% of my publications and as the last author on "
        f"{author_stats['last_author_percent']:.1f}%. In the past {years_window} years, "
        f"I have been the first or last author on {author_stats['recent_first_last_author_count']} publications, "
        f"{author_stats['recent_last_author_count']} of which list me as the last author. "
        f"Significant contributions include {parakhonskiy_journal_string}."
    )
    return summary

# You would then call this function with your author_stats dictionary:
author_summary = create_author_summary(author_stats, current_year, years_window)






In [None]:
# Display with increased font size
display(HTML(f"<div style='font-size: 1.25em;'>{author_summary}</div>"))

In [None]:
def create_author_summary(author_stats, current_year, years_window):
     # Check if 'top_journals' exists in author_stats and construct the high impact journals string
    if 'top_journals' in author_stats and author_stats['top_journals']:
        high_impact_string = ', '.join([
            f"{journal.get('name', 'Unknown Journal')} {journal.get('year', 'Unknown Year')} (IF {journal.get('impact_factor', 'N/A')})"
            for journal in author_stats['top_journals']
        ])
    else:
        high_impact_string = 'N/A'

    # Check if 'top_parakhonskiy_journals' exists in author_stats and construct the parakhonskiy journals string
    if 'top_parakhonskiy_journals' in author_stats and author_stats['top_parakhonskiy_journals']:
        parakhonskiy_journal_string = ', '.join([
            f"{journal.get('name', 'Unknown Journal')} (IF {journal.get('impact_factor', 'N/A')}, {journal.get('count', 'N/A')} publications {journal.get('year_range', 'Unknown Year Range')})"
            for journal in author_stats['top_parakhonskiy_journals']
        ])
    else:
        parakhonskiy_journal_string = 'N/A'

    # Construct the summary string
    summary = (
        f"I have authored {author_stats['total_with_doi']} publications "
        f"({author_stats['recent_total_with_doi']} since {current_year - years_window + 1}), "
        f"including papers in such high impact journals as {high_impact_string}. "
        f"Among these, I am the first or last author on {author_stats['first_author_count'] + author_stats['last_author_count']} papers, "
        f"with {author_stats['recent_first_author_count'] + author_stats['recent_last_author_count']} of those since {current_year - years_window + 1}. "
        f"I have served as the first author on {author_stats['first_author_percent']:.1f}% of my publications and as the last author on "
        f"{author_stats['last_author_percent']:.1f}%. In the past {years_window} years, "
        f"I have been the first or last author on {author_stats['recent_first_author_count'] + author_stats['recent_last_author_count']} publications, "
        f"{author_stats['recent_last_author_count']} of which list me as the last author. "
        f"Significant contributions include {parakhonskiy_journal_string}."
    )
    return summary

# You would then call this function with your author_stats dictionary:
author_summary = create_author_summary(author_stats, current_year, years_window)


In [None]:
author_summary

In [None]:
# Filter the DataFrame to show records where 'Parakhonskiy_first_author' or 'Parakhonskiy_last_author' is True
parakhonskiy_author_records = bib_df[
    (bib_df['Parakhonskiy_first_author']) | (bib_df['Parakhonskiy_last_author'])
]

parakhonskiy_author_records.head(30)