In [None]:
import pandas as pd

This function takes the PubMed text file path and returns all articles that have entries in ALL columns in this list 'PMID', 'TI', 'TA', 'JID', 'DP', 'EDAT',  but they MUST also have information in the 'DEP' column.

In [None]:
def extract_articles_with_DEP(pubmed_text_file_path):
    with open(pubmed_text_file_path, 'r', encoding='utf-8') as file:
        text_file = file.read()

    pmid_entries = text_file.split('\n\n')

    pmids = []
    ti_values = []
    ta_values = []
    jids = []
    dp_values = []
    edat_values = []
    dep_values = []

    # Iterate through each PMID entry
    current_entry = {}
    for entry in pmid_entries:
        lines = entry.strip().split('\n')
        for line in lines:
            key_value = line.split('-', 1)
            if len(key_value) == 2:
                key = key_value[0].strip()
                value = key_value[1].strip()
                current_entry[key] = value
            else:
                # Accumulate multiline text for certain keys
                if key in current_entry:
                    current_entry[key] += ' ' + line.strip()

        # Check if the current entry contains the required keys
        if all(key in current_entry for key in ['PMID', 'TI', 'TA', 'JID', 'DP', 'EDAT', 'DEP']):
            pmids.append(current_entry['PMID'])
            ti_values.append(current_entry['TI'])
            ta_values.append(current_entry['TA'])
            jids.append(current_entry['JID'])
            dp_values.append(current_entry['DP'])
            edat_values.append(current_entry['EDAT'])
            dep_values.append(current_entry['DEP'])

        # Clear the current entry for the next iteration
        current_entry = {}

    # Create a DataFrame
    data = {
        'PMID': pmids,
        'TI': ti_values,
        'TA': ta_values,
        'JID': jids,
        'DP': dp_values,
        'EDAT': edat_values,
        'DEP': dep_values
    }

    articles_with_DEP = pd.DataFrame(data)

    return articles_with_DEP

This other function takes the PubMed text file path and returns articles that have entries in ALL columns in this list 'PMID', 'TI', 'TA', 'JID', 'DP', 'EDAT' but REGARDLESS of whether they have DEP information or not.
'DEP' information is NOT ALWAYS present, therefore the lenght of the dataframe with this function will be LONGER than the previous one where DEP information must be present.

In [None]:
def extract_articles_REGARDLESS_of_DEP(pubmed_text_file_path):
    with open(pubmed_text_file_path, 'r', encoding='utf-8') as file:
        text_file = file.read()

    pmid_entries = text_file.split('\n\n')

    pmids = []
    ti_values = []
    ta_values = []
    jids = []
    dp_values = []
    edat_values = []


    # Iterate through each PMID entry
    current_entry = {}
    for entry in pmid_entries:
        lines = entry.strip().split('\n')
        for line in lines:
            key_value = line.split('-', 1)
            if len(key_value) == 2:
                key = key_value[0].strip()
                value = key_value[1].strip()
                current_entry[key] = value
            else:
                # Accumulate multiline text for certain keys
                if key in current_entry:
                    current_entry[key] += ' ' + line.strip()

        # Check if the current entry contains the required keys
        if all(key in current_entry for key in ['PMID', 'TI', 'TA', 'JID', 'DP', 'EDAT']):
            pmids.append(current_entry['PMID'])
            ti_values.append(current_entry['TI'])
            ta_values.append(current_entry['TA'])
            jids.append(current_entry['JID'])
            dp_values.append(current_entry['DP'])
            edat_values.append(current_entry['EDAT'])


        # Clear the current entry for the next iteration
        current_entry = {}

    # Create a DataFrame
    data = {
        'PMID': pmids,
        'TI': ti_values,
        'TA': ta_values,
        'JID': jids,
        'DP': dp_values,
        'EDAT': edat_values,

    }

    articles_REGARDLESS_of_DEP = pd.DataFrame(data)

    return articles_REGARDLESS_of_DEP

To use the functions, insert the PubMed file path as the arguement.

The attached sample file can be used to test the functions. The file was obtained on 10/11/2024 using the PubMed query:

"naloxone"[MeSH Terms] OR "naloxone"[All Fields] AND ("social stigma"[MeSH Terms] OR ("social"[All Fields] AND "stigma"[All Fields]) OR "social stigma"[All Fields] OR "stigma"[All Fields] OR "stigmas"[All Fields] OR "stigma s"[All Fields]) AND 2000/01/01:2023/12/31[Date - Publication]

In [None]:
from google.colab import files
uploaded = files.upload()

Saving pubmed_naloxonestigma2020_2023.txt to pubmed_naloxonestigma2020_2023.txt


In [None]:
extract_articles_with_DEP('/content/pubmed_naloxonestigma2020_2023.txt')


Unnamed: 0,PMID,TI,TA,JID,DP,EDAT,DEP
0,37563039,How to save a life: Public awareness of a nati...,Int J Drug Policy,9014759,2024 Sep,2023/08/11 00:42,20230808
1,36154722,Caring for People Who Use Drugs: Best Practice...,Health Promot Pract,100890609,2024 Sep,2022/09/27 06:00,20220926
2,37326129,Patient perspectives on emergency department i...,Acad Emerg Med,9418450,2024 May,2023/06/16 13:10,20230705
3,38103835,Willingness to utilize a mobile treatment unit...,J Subst Use Addict Treat,9918541186406676,2024 Apr,2023/12/17 09:42,20231214
4,38142801,Differences in perceptions of community stigma...,J Subst Use Addict Treat,9918541186406676,2024 Mar,2023/12/25 00:42,20231222
...,...,...,...,...,...,...,...
168,27323868,A new era of addiction treatment amplifies the...,Int J Obes (Lond),101256108,2016 Sep,2016/06/22 06:00,20160621
169,24997702,Opioid substitution treatment in New Zealand: ...,N Z Med J,0401067,2014 Jul 4,2014/07/07 06:00,20140704
170,24938376,Pharmacies as providers of expanded health ser...,BMC Health Serv Res,101088677,2014 Jun 17,2014/06/19 06:00,20140617
171,24741316,Utilizing buprenorphine-naloxone to treat illi...,Neuropsychiatr Dis Treat,101240304,2014,2014/04/18 06:00,20140407


In [None]:
extract_articles_REGARDLESS_of_DEP('/content/pubmed_naloxonestigma2020_2023.txt')

Unnamed: 0,PMID,TI,TA,JID,DP,EDAT
0,37563039,How to save a life: Public awareness of a nati...,Int J Drug Policy,9014759,2024 Sep,2023/08/11 00:42
1,36154722,Caring for People Who Use Drugs: Best Practice...,Health Promot Pract,100890609,2024 Sep,2022/09/27 06:00
2,37326129,Patient perspectives on emergency department i...,Acad Emerg Med,9418450,2024 May,2023/06/16 13:10
3,38103835,Willingness to utilize a mobile treatment unit...,J Subst Use Addict Treat,9918541186406676,2024 Apr,2023/12/17 09:42
4,38142801,Differences in perceptions of community stigma...,J Subst Use Addict Treat,9918541186406676,2024 Mar,2023/12/25 00:42
...,...,...,...,...,...,...
194,24997702,Opioid substitution treatment in New Zealand: ...,N Z Med J,0401067,2014 Jul 4,2014/07/07 06:00
195,24938376,Pharmacies as providers of expanded health ser...,BMC Health Serv Res,101088677,2014 Jun 17,2014/06/19 06:00
196,24741316,Utilizing buprenorphine-naloxone to treat illi...,Neuropsychiatr Dis Treat,101240304,2014,2014/04/18 06:00
197,20958853,Buprenorphine-based regimens and methadone for...,Am J Addict,9208821,2010 Nov-Dec,2010/10/21 06:00
