# Title
[]()

In [6]:

# import pandas as pd
import sys
sys.path.append(r"C:\Users\silvh\OneDrive\lighthouse\custom_python")
from silvhua import *

# From previous note book `2023-07-05 citation API`

In [3]:
import requests
import json

def search_article(title):
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    params = {
        'db': 'pubmed',
        'term': title,
        'field': 'title',
        'retmax': 5,
        'retmode': 'json'
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    try:
        id_list = data['esearchresult']['idlist']
        if id_list:
            for article_id in id_list:
                result = retrieve_citation(article_id).decode('utf-8')
                if title.lower() in result.lower():
                    print(f'Match found for {title}: PMID = {article_id}.')
                    return result
            print('Article title not found in PMIDs.')
            return id_list        
    except:
        print('Article not found.')
        return None
    
def retrieve_citation(article_id):
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
    params = {
        'db': 'pubmed',
        'id': article_id
    }

    response = requests.get(base_url, params=params)
    return response.content


# iteration = 3.1
# # # Example usage
# article_title = 'Daily Energy Expenditure through the Human Life Course'
## reference pubmed page: https://pubmed.ncbi.nlm.nih.gov/34385400/

# search_dict[iteration] =  search_article(article_title)
# search_dict[iteration]

# citation_dict[iteration] = retrieve_citation('34385400')
# citation_dict[iteration].content


# Load previously saved citation data

In [8]:
filename = 'pubmed_result_2023-07-09_1151.sav'
path = '../output/'
search_dict = {}
search_dict[0] = loadpickle(filename, path)
print(search_dict[0])

	Time completed: 2023-07-10 17:07:11.150296
<?xml version="1.0" ?>
<!DOCTYPE PubmedArticleSet PUBLIC "-//NLM//DTD PubMedArticle, 1st January 2023//EN" "https://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_230101.dtd">
<PubmedArticleSet>
<PubmedArticle><MedlineCitation Status="MEDLINE" Owner="NLM"><PMID Version="1">34385400</PMID><DateCompleted><Year>2021</Year><Month>08</Month><Day>20</Day></DateCompleted><DateRevised><Year>2022</Year><Month>08</Month><Day>14</Day></DateRevised><Article PubModel="Print"><Journal><ISSN IssnType="Electronic">1095-9203</ISSN><JournalIssue CitedMedium="Internet"><Volume>373</Volume><Issue>6556</Issue><PubDate><Year>2021</Year><Month>Aug</Month><Day>13</Day></PubDate></JournalIssue><Title>Science (New York, N.Y.)</Title><ISOAbbreviation>Science</ISOAbbreviation></Journal><ArticleTitle>Daily energy expenditure through the human life course.</ArticleTitle><Pagination><StartPage>808</StartPage><EndPage>812</EndPage><MedlinePgn>808-812</MedlinePgn></Pagination><ELoca

In [9]:
from pprint import pprint
pprint(search_dict[0])

('<?xml version="1.0" ?>\n'
 '<!DOCTYPE PubmedArticleSet PUBLIC "-//NLM//DTD PubMedArticle, 1st January '
 '2023//EN" "https://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_230101.dtd">\n'
 '<PubmedArticleSet>\n'
 '<PubmedArticle><MedlineCitation Status="MEDLINE" Owner="NLM"><PMID '
 'Version="1">34385400</PMID><DateCompleted><Year>2021</Year><Month>08</Month><Day>20</Day></DateCompleted><DateRevised><Year>2022</Year><Month>08</Month><Day>14</Day></DateRevised><Article '
 'PubModel="Print"><Journal><ISSN '
 'IssnType="Electronic">1095-9203</ISSN><JournalIssue '
 'CitedMedium="Internet"><Volume>373</Volume><Issue>6556</Issue><PubDate><Year>2021</Year><Month>Aug</Month><Day>13</Day></PubDate></JournalIssue><Title>Science '
 '(New York, '
 'N.Y.)</Title><ISOAbbreviation>Science</ISOAbbreviation></Journal><ArticleTitle>Daily '
 'energy expenditure through the human life '
 'course.</ArticleTitle><Pagination><StartPage>808</StartPage><EndPage>812</EndPage><MedlinePgn>808-812</MedlinePgn></Paginati

# Extract key details

Here is an example of a record from the PubMed API ('https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'), which has been converted to a string.

Write a function in Python that extracts the following details:
- Authors using APA citation format. The number of authors is variable.
- Publication year.
- ArticleTitle.
- Journal title.
- Journal volume.
- Journal issue.
- Start page.
- End page.
- ELocationID.

In [10]:
import re

def extract_pubmed_details(record_string):
    # Extract authors using APA citation format
    authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
    formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

    # Extract publication year
    publication_year = re.search(r'<Year>(\d{4})</Year>', record_string)
    publication_year = publication_year.group(1) if publication_year else '[]'

    # Extract article title
    article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
    article_title = article_title.group(1) if article_title else '[]'

    # Extract journal title
    journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
    journal_title = journal_title.group(1) if journal_title else '[]'

    # Extract journal volume
    journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
    journal_volume = journal_volume.group(1) if journal_volume else '[]'

    # Extract journal issue
    journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
    journal_issue = journal_issue.group(1) if journal_issue else '[]'

    # Extract start page
    start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
    start_page = start_page.group(1) if start_page else '[]'

    # Extract end page
    end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
    end_page = end_page.group(1) if end_page else '[]'

    # Extract ELocationID
    elocation_id = re.search(r'<ELocationID.*?>(.*?)</ELocationID>', record_string)
    elocation_id = elocation_id.group(1) if elocation_id else '[]'

    return {
        'Authors': formatted_authors,
        'Publication Year': publication_year,
        'Article Title': article_title,
        'Journal Title': journal_title,
        'Journal Volume': journal_volume,
        'Journal Issue': journal_issue,
        'Start Page': start_page,
        'End Page': end_page,
        'ELocationID': elocation_id
    }


record_string = search_dict[0]
details = extract_pubmed_details(record_string)
print(details)


{'Authors': 'Herman Pontzer, Yosuke Yamada, Hiroyuki Sagayama, Philip N Ainslie, Lene F Andersen, Liam J Anderson, Lenore Arab, Issaad Baddou, Kweku Bedu-Addo, Ellen E Blaak, Stephane Blanc, Alberto G Bonomi, Carlijn V C Bouten, Pascal Bovet, Maciej S Buchowski, Nancy F Butte, Stefan G Camps, Graeme L Close, Jamie A Cooper, Richard Cooper, Sai Krupa Das, Lara R Dugas, Ulf Ekelund, Sonja Entringer, Terrence Forrester, Barry W Fudge, Annelies H Goris, Michael Gurven, Catherine Hambly, Asmaa El Hamdouchi, Marjije B Hoos, Sumei Hu, Noorjehan Joonas, Annemiek M Joosen, Peter Katzmarzyk, Kitty P Kempen, Misaka Kimura, William E Kraus, Robert F Kushner, Estelle V Lambert, William R Leonard, Nader Lessan, Corby Martin, Anine C Medin, Erwin P Meijer, James C Morehen, James P Morton, Marian L Neuhouser, Teresa A Nicklas, Robert M Ojiambo, Kirsi H Pietil&#xe4;inen, Yannis P Pitsiladis, Jacob Plange-Rhule, Guy Plasqui, Ross L Prentice, Roberto A Rabinovich, Susan B Racette, David A Raichlen, Eric 

In [11]:
details

{'Authors': 'Herman Pontzer, Yosuke Yamada, Hiroyuki Sagayama, Philip N Ainslie, Lene F Andersen, Liam J Anderson, Lenore Arab, Issaad Baddou, Kweku Bedu-Addo, Ellen E Blaak, Stephane Blanc, Alberto G Bonomi, Carlijn V C Bouten, Pascal Bovet, Maciej S Buchowski, Nancy F Butte, Stefan G Camps, Graeme L Close, Jamie A Cooper, Richard Cooper, Sai Krupa Das, Lara R Dugas, Ulf Ekelund, Sonja Entringer, Terrence Forrester, Barry W Fudge, Annelies H Goris, Michael Gurven, Catherine Hambly, Asmaa El Hamdouchi, Marjije B Hoos, Sumei Hu, Noorjehan Joonas, Annemiek M Joosen, Peter Katzmarzyk, Kitty P Kempen, Misaka Kimura, William E Kraus, Robert F Kushner, Estelle V Lambert, William R Leonard, Nader Lessan, Corby Martin, Anine C Medin, Erwin P Meijer, James C Morehen, James P Morton, Marian L Neuhouser, Teresa A Nicklas, Robert M Ojiambo, Kirsi H Pietil&#xe4;inen, Yannis P Pitsiladis, Jacob Plange-Rhule, Guy Plasqui, Ross L Prentice, Roberto A Rabinovich, Susan B Racette, David A Raichlen, Eric 

## Try with another journal title

In [12]:
iteration = 1
# # Example usage
article_title = 'High doses of anti-inflammatory drugs compromise muscle strength and hypertrophic adaptations to resistance training in young adults'
# reference pubmed page: https://pubmed.ncbi.nlm.nih.gov/28834248/

search_dict[iteration] =  search_article(article_title)
search_dict[iteration]

Match found for High doses of anti-inflammatory drugs compromise muscle strength and hypertrophic adaptations to resistance training in young adults: PMID = 28834248.


'<?xml version="1.0" ?>\n<!DOCTYPE PubmedArticleSet PUBLIC "-//NLM//DTD PubMedArticle, 1st January 2023//EN" "https://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_230101.dtd">\n<PubmedArticleSet>\n<PubmedArticle><MedlineCitation Status="MEDLINE" Owner="NLM"><PMID Version="1">28834248</PMID><DateCompleted><Year>2019</Year><Month>07</Month><Day>16</Day></DateCompleted><DateRevised><Year>2019</Year><Month>07</Month><Day>16</Day></DateRevised><Article PubModel="Print-Electronic"><Journal><ISSN IssnType="Electronic">1748-1716</ISSN><JournalIssue CitedMedium="Internet"><Volume>222</Volume><Issue>2</Issue><PubDate><Year>2018</Year><Month>Feb</Month></PubDate></JournalIssue><Title>Acta physiologica (Oxford, England)</Title><ISOAbbreviation>Acta Physiol (Oxf)</ISOAbbreviation></Journal><ArticleTitle>High doses of anti-inflammatory drugs compromise muscle strength and hypertrophic adaptations to resistance training in young adults.</ArticleTitle><ELocationID EIdType="doi" ValidYN="Y">10.1111/apha.1294

### Extracted details
- Wrong year extracted
- This pubmed record has no page numbers.

In [15]:
details_dict = dict()
record_string = search_dict[iteration]
details_dict[iteration] = extract_pubmed_details(record_string)
details_dict[iteration]

{'Authors': 'M Lilja, M Mandi&#x107;, W Apr&#xf3;, M Melin, K Olsson, S Rosenborg, T Gustafsson, T R Lundberg',
 'Publication Year': '2019',
 'Article Title': 'High doses of anti-inflammatory drugs compromise muscle strength and hypertrophic adaptations to resistance training in young adults.',
 'Journal Title': 'Acta physiologica (Oxford, England)',
 'Journal Volume': '222',
 'Journal Issue': '2',
 'Start Page': '[]',
 'End Page': '[]',
 'ELocationID': '10.1111/apha.12948'}

# Update parsing

In [16]:
import requests
import json
import re

def search_article(title):
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    params = {
        'db': 'pubmed',
        'term': title,
        'field': 'title',
        'retmax': 5,
        'retmode': 'json'
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    try:
        id_list = data['esearchresult']['idlist']
        if id_list:
            for article_id in id_list:
                result = retrieve_citation(article_id).decode('utf-8')
                if title.lower() in result.lower():
                    print(f'Match found for {title}: PMID = {article_id}.')
                    return result
            print('Article title not found in PMIDs.')
            return id_list        
    except:
        print('Article not found.')
        return None
    
def retrieve_citation(article_id):
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
    params = {
        'db': 'pubmed',
        'id': article_id
    }

    response = requests.get(base_url, params=params)
    return response.content

def extract_pubmed_details(record_string):
    # Extract authors using APA citation format
    authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
    formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

    # Extract publication year
    publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
    publication_year = publication_year.group(1) if publication_year else '[]'

    # Extract article title
    article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
    article_title = article_title.group(1) if article_title else '[]'

    # Extract journal title
    journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
    journal_title = journal_title.group(1) if journal_title else '[]'

    # Extract journal volume
    journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
    journal_volume = journal_volume.group(1) if journal_volume else '[]'

    # Extract journal issue
    journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
    journal_issue = journal_issue.group(1) if journal_issue else '[]'

    # Extract start page
    start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
    start_page = start_page.group(1) if start_page else '[]'

    # Extract end page
    end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
    end_page = end_page.group(1) if end_page else '[]'

    # Extract ELocationID
    elocation_id = re.search(r'<ELocationID.*?>(.*?)</ELocationID>', record_string)
    elocation_id = elocation_id.group(1) if elocation_id else '[]'

    return {
        'Authors': formatted_authors,
        'Publication Year': publication_year,
        'Article Title': article_title,
        'Journal Title': journal_title,
        'Journal Volume': journal_volume,
        'Journal Issue': journal_issue,
        'Start Page': start_page,
        'End Page': end_page,
        'ELocationID': elocation_id
    }



# iteration = 3.2
# # # Example usage
# article_title = 'High doses of anti-inflammatory drugs compromise muscle strength and hypertrophic adaptations to resistance training in young adults'
# # reference pubmed page: https://pubmed.ncbi.nlm.nih.gov/28834248/

# search_dict[iteration] =  search_article(article_title)
# search_dict[iteration]


record_string = search_dict[iteration]
details = extract_pubmed_details(record_string)
details


{'Authors': 'M Lilja, M Mandi&#x107;, W Apr&#xf3;, M Melin, K Olsson, S Rosenborg, T Gustafsson, T R Lundberg',
 'Publication Year': '2018',
 'Article Title': 'High doses of anti-inflammatory drugs compromise muscle strength and hypertrophic adaptations to resistance training in young adults.',
 'Journal Title': 'Acta physiologica (Oxford, England)',
 'Journal Volume': '222',
 'Journal Issue': '2',
 'Start Page': '[]',
 'End Page': '[]',
 'ELocationID': '10.1111/apha.12948'}

In [17]:
extract_pubmed_details(search_dict[0])

{'Authors': 'Herman Pontzer, Yosuke Yamada, Hiroyuki Sagayama, Philip N Ainslie, Lene F Andersen, Liam J Anderson, Lenore Arab, Issaad Baddou, Kweku Bedu-Addo, Ellen E Blaak, Stephane Blanc, Alberto G Bonomi, Carlijn V C Bouten, Pascal Bovet, Maciej S Buchowski, Nancy F Butte, Stefan G Camps, Graeme L Close, Jamie A Cooper, Richard Cooper, Sai Krupa Das, Lara R Dugas, Ulf Ekelund, Sonja Entringer, Terrence Forrester, Barry W Fudge, Annelies H Goris, Michael Gurven, Catherine Hambly, Asmaa El Hamdouchi, Marjije B Hoos, Sumei Hu, Noorjehan Joonas, Annemiek M Joosen, Peter Katzmarzyk, Kitty P Kempen, Misaka Kimura, William E Kraus, Robert F Kushner, Estelle V Lambert, William R Leonard, Nader Lessan, Corby Martin, Anine C Medin, Erwin P Meijer, James C Morehen, James P Morton, Marian L Neuhouser, Teresa A Nicklas, Robert M Ojiambo, Kirsi H Pietil&#xe4;inen, Yannis P Pitsiladis, Jacob Plange-Rhule, Guy Plasqui, Ross L Prentice, Roberto A Rabinovich, Susan B Racette, David A Raichlen, Eric 

## Save & reload record 

In [25]:
search_dict.keys()

dict_keys([0, 1])

In [28]:
description = 'pubmed_results'
save_output(search_dict, description=description, pickle_path=path)

File saved:  ../output//pubmed_results_2023-07-10_2251.sav
	Time completed: 2023-07-10 22:51:38.240507
	Object saved as pickle
	Unable to save CSV


In [None]:
import os
# os.environ['api_ncbi']
os.environ.get('api_ncbi')

In [8]:
path = '../output/'
search_dict = loadpickle('pubmed_results_2023-07-10_2251.sav', path)

	Time completed: 2023-07-10 22:56:25.641937
Dictionary keys: [0, 1]


In [None]:
api_key = os.getenv('api_ncbi')
api_key

# Confirm with third article

In [12]:
import requests
import json
import re

def search_article(title):
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    params = {
        'db': 'pubmed',
        'term': title,
        'field': 'title',
        'retmax': 5,
        'retmode': 'json'
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    try:
        id_list = data['esearchresult']['idlist']
        if id_list:
            for article_id in id_list:
                result = retrieve_citation(article_id).decode('utf-8')
                if title.lower() in result.lower():
                    print(f'Match found for {title}: PMID = {article_id}.')
                    return result
            print('Article title not found in PMIDs.')
            return id_list        
    except:
        print('Article not found.')
        return None
    
def retrieve_citation(article_id):
    base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
    params = {
        'db': 'pubmed',
        'id': article_id
    }

    response = requests.get(base_url, params=params)
    return response.content

def extract_pubmed_details(record_string):
    # Extract authors using APA citation format
    authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
    formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

    # Extract publication year
    publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
    publication_year = publication_year.group(1) if publication_year else '[]'

    # Extract article title
    article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
    article_title = article_title.group(1) if article_title else '[]'

    # Extract journal title
    journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
    journal_title = journal_title.group(1) if journal_title else '[]'

    # Extract journal volume
    journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
    journal_volume = journal_volume.group(1) if journal_volume else '[]'

    # Extract journal issue
    journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
    journal_issue = journal_issue.group(1) if journal_issue else '[]'

    # Extract start page
    start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
    start_page = start_page.group(1) if start_page else '[]'

    # Extract end page
    end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
    end_page = end_page.group(1) if end_page else '[]'

    # Extract ELocationID
    elocation_id = re.search(r'<ELocationID.*?>(.*?)</ELocationID>', record_string)
    elocation_id = elocation_id.group(1) if elocation_id else '[]'

    return {
        'Authors': formatted_authors,
        'Publication Year': publication_year,
        'Article Title': article_title,
        'Journal Title': journal_title,
        'Journal Volume': journal_volume,
        'Journal Issue': journal_issue,
        'Start Page': start_page,
        'End Page': end_page,
        'ELocationID': elocation_id
    }



iteration = 2
# # Example usage
article_title = 'Comparisons in the Recovery Response From Resistance Exercise Between Young and Middle-Aged Men'
# reference pubmed page: https://pubmed.ncbi.nlm.nih.gov/28859014/

search_dict[iteration] =  search_article(article_title)
# search_dict[iteration]


record_string = search_dict[iteration]
details = extract_pubmed_details(record_string)
details


Match found for Comparisons in the Recovery Response From Resistance Exercise Between Young and Middle-Aged Men: PMID = 28859014.


{'Authors': 'Joseph A Gordon, Jay R Hoffman, Eliott Arroyo, Alyssa N Varanoske, Nicholas A Coker, Yftach Gepner, Adam J Wells, Jeffrey R Stout, David H Fukuda',
 'Publication Year': '2017',
 'Article Title': 'Comparisons in the Recovery Response From Resistance Exercise Between Young and Middle-Aged Men.',
 'Journal Title': 'Journal of strength and conditioning research',
 'Journal Volume': '31',
 'Journal Issue': '12',
 'Start Page': '3454',
 'End Page': '3462',
 'ELocationID': '10.1519/JSC.0000000000002219'}

# Iteration 2: Update API request function to include API key

In [14]:
import requests
import json
import re
api_key = os.getenv('api_ncbi')

def search_article(title):
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi&api_key={api_key}'
    params = {
        'db': 'pubmed',
        'term': title,
        'field': 'title',
        'retmax': 5,
        'retmode': 'json'
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    try:
        id_list = data['esearchresult']['idlist']
        if id_list:
            for article_id in id_list:
                result = retrieve_citation(article_id).decode('utf-8')
                if title.lower() in result.lower():
                    print(f'Match found for {title}: PMID = {article_id}.')
                    return result
            print('Article title not found in PMIDs.')
            return id_list        
    except:
        print('Article not found.')
        return None
    
def retrieve_citation(article_id):
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi={api_key}'
    params = {
        'db': 'pubmed',
        'id': article_id
    }

    response = requests.get(base_url, params=params)
    return response.content

def extract_pubmed_details(record_string):
    # Extract authors using APA citation format
    authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
    formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

    # Extract publication year
    publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
    publication_year = publication_year.group(1) if publication_year else '[]'

    # Extract article title
    article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
    article_title = article_title.group(1) if article_title else '[]'

    # Extract journal title
    journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
    journal_title = journal_title.group(1) if journal_title else '[]'

    # Extract journal volume
    journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
    journal_volume = journal_volume.group(1) if journal_volume else '[]'

    # Extract journal issue
    journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
    journal_issue = journal_issue.group(1) if journal_issue else '[]'

    # Extract start page
    start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
    start_page = start_page.group(1) if start_page else '[]'

    # Extract end page
    end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
    end_page = end_page.group(1) if end_page else '[]'

    # Extract ELocationID
    elocation_id = re.search(r'<ELocationID.*?>(.*?)</ELocationID>', record_string)
    elocation_id = elocation_id.group(1) if elocation_id else '[]'

    return {
        'Authors': formatted_authors,
        'Publication Year': publication_year,
        'Article Title': article_title,
        'Journal Title': journal_title,
        'Journal Volume': journal_volume,
        'Journal Issue': journal_issue,
        'Start Page': start_page,
        'End Page': end_page,
        'ELocationID': elocation_id
    }



iteration = 3
# # Example usage
article_title = 'Comparisons in the Recovery Response From Resistance Exercise Between Young and Middle-Aged Men'
# reference pubmed page: https://pubmed.ncbi.nlm.nih.gov/28859014/

search_dict[iteration] =  search_article(article_title)

search_dict[iteration]


# record_string = search_dict[iteration]
# details = extract_pubmed_details(record_string)
# details


Article title not found in PMIDs.


['28859014']

In [15]:
retrieve_citation(search_dict[iteration][0])

b'{"error":"Invalid eutil name \'efetch=8ec7a7fcee0a7d679d8bffc053f45a491d09\'","api-key":"2001:569:51f7:5c00:c91c:a69b:d3f1:eb7a","type":"ip",\n"status":"ok"}'

## 2.1 

In [16]:
import requests
import json
import re
api_key = os.getenv('api_ncbi')

def search_article(title):
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi&api_key={api_key}'
    params = {
        'db': 'pubmed',
        'term': title,
        'field': 'title',
        'retmax': 5,
        'retmode': 'json'
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    try:
        id_list = data['esearchresult']['idlist']
        if id_list:
            for article_id in id_list:
                result = retrieve_citation(article_id).decode('utf-8')
                if title.lower() in result.lower():
                    print(f'Match found for {title}: PMID = {article_id}.')
                    return result
            print('Article title not found in PMIDs.')
            return id_list        
    except:
        print('Article not found.')
        return None
    
def retrieve_citation(article_id):
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi&api_key={api_key}'
    params = {
        'db': 'pubmed',
        'id': article_id
    }

    response = requests.get(base_url, params=params)
    return response.content

def extract_pubmed_details(record_string):
    # Extract authors using APA citation format
    authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
    formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

    # Extract publication year
    publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
    publication_year = publication_year.group(1) if publication_year else '[]'

    # Extract article title
    article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
    article_title = article_title.group(1) if article_title else '[]'

    # Extract journal title
    journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
    journal_title = journal_title.group(1) if journal_title else '[]'

    # Extract journal volume
    journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
    journal_volume = journal_volume.group(1) if journal_volume else '[]'

    # Extract journal issue
    journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
    journal_issue = journal_issue.group(1) if journal_issue else '[]'

    # Extract start page
    start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
    start_page = start_page.group(1) if start_page else '[]'

    # Extract end page
    end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
    end_page = end_page.group(1) if end_page else '[]'

    # Extract ELocationID
    elocation_id = re.search(r'<ELocationID.*?>(.*?)</ELocationID>', record_string)
    elocation_id = elocation_id.group(1) if elocation_id else '[]'

    return {
        'Authors': formatted_authors,
        'Publication Year': publication_year,
        'Article Title': article_title,
        'Journal Title': journal_title,
        'Journal Volume': journal_volume,
        'Journal Issue': journal_issue,
        'Start Page': start_page,
        'End Page': end_page,
        'ELocationID': elocation_id
    }



iteration = 3
# # Example usage
article_title = 'Comparisons in the Recovery Response From Resistance Exercise Between Young and Middle-Aged Men'
# reference pubmed page: https://pubmed.ncbi.nlm.nih.gov/28859014/

search_dict[iteration] =  search_article(article_title)

# search_dict[iteration]


record_string = search_dict[iteration]
details = extract_pubmed_details(record_string)
details


Match found for Comparisons in the Recovery Response From Resistance Exercise Between Young and Middle-Aged Men: PMID = 28859014.


{'Authors': 'Joseph A Gordon, Jay R Hoffman, Eliott Arroyo, Alyssa N Varanoske, Nicholas A Coker, Yftach Gepner, Adam J Wells, Jeffrey R Stout, David H Fukuda',
 'Publication Year': '2017',
 'Article Title': 'Comparisons in the Recovery Response From Resistance Exercise Between Young and Middle-Aged Men.',
 'Journal Title': 'Journal of strength and conditioning research',
 'Journal Volume': '31',
 'Journal Issue': '12',
 'Start Page': '3454',
 'End Page': '3462',
 'ELocationID': '10.1519/JSC.0000000000002219'}

## 2.2

In [17]:
import requests
import json
import re
api_key = os.getenv('api_ncbi')

def search_article(title, api_key):
    """
    Search for article title in PubMed database.

    Parameters
    """
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi&api_key={api_key}'
    params = {
        'db': 'pubmed',
        'term': title,
        'field': 'title',
        'retmax': 5,
        'retmode': 'json'
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    try:
        id_list = data['esearchresult']['idlist']
        if id_list:
            for article_id in id_list:
                result = retrieve_citation(article_id).decode('utf-8')
                if title.lower() in result.lower():
                    print(f'Match found for {title}: PMID = {article_id}.')
                    return result
            print('Article title not found in PMIDs.')
            return id_list        
    except:
        print('Article not found.')
        return None
    
def retrieve_citation(article_id):
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi&api_key={api_key}'
    params = {
        'db': 'pubmed',
        'id': article_id
    }

    response = requests.get(base_url, params=params)
    return response.content

def extract_pubmed_details(record_string):
    # Extract authors using APA citation format
    authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
    formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

    # Extract publication year
    publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
    publication_year = publication_year.group(1) if publication_year else '[]'

    # Extract article title
    article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
    article_title = article_title.group(1) if article_title else '[]'

    # Extract journal title
    journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
    journal_title = journal_title.group(1) if journal_title else '[]'

    # Extract journal volume
    journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
    journal_volume = journal_volume.group(1) if journal_volume else '[]'

    # Extract journal issue
    journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
    journal_issue = journal_issue.group(1) if journal_issue else '[]'

    # Extract start page
    start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
    start_page = start_page.group(1) if start_page else '[]'

    # Extract end page
    end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
    end_page = end_page.group(1) if end_page else '[]'

    # Extract ELocationID
    doi = re.search(r'<ELocationID.*?EIdType="doi".*?>(.*?)</ELocationID>', record_string)
    doi = doi.group(1) if doi else '[]'

    return {
        'Authors': formatted_authors,
        'Publication Year': publication_year,
        'Article Title': article_title,
        'Journal Title': journal_title,
        'Journal Volume': journal_volume,
        'Journal Issue': journal_issue,
        'Start Page': start_page,
        'End Page': end_page,
        'doi': doi
    }



iteration = 3
# # Example usage
article_title = 'Comparisons in the Recovery Response From Resistance Exercise Between Young and Middle-Aged Men'
# reference pubmed page: https://pubmed.ncbi.nlm.nih.gov/28859014/

# search_dict[iteration] =  search_article(article_title)

# search_dict[iteration]


record_string = search_dict[iteration]
details = extract_pubmed_details(record_string)
details


{'Authors': 'Joseph A Gordon, Jay R Hoffman, Eliott Arroyo, Alyssa N Varanoske, Nicholas A Coker, Yftach Gepner, Adam J Wells, Jeffrey R Stout, David H Fukuda',
 'Publication Year': '2017',
 'Article Title': 'Comparisons in the Recovery Response From Resistance Exercise Between Young and Middle-Aged Men.',
 'Journal Title': 'Journal of strength and conditioning research',
 'Journal Volume': '31',
 'Journal Issue': '12',
 'Start Page': '3454',
 'End Page': '3462',
 'doi': '10.1519/JSC.0000000000002219'}

## 2.3 make api key optional

In [38]:
import requests
import json
import re
api_key = os.getenv('api_ncbi')

def search_article(title, api_key):
    """
    Search for article title in PubMed database.

    Parameters:
    - title (str): article title
    - api_key (str): NCBI API key

    Returns:
    response (str): Article metadata from PubMed database if present. Otherwise, returns list of PMIDs.
    """
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    if api_key:
        base_url += f'&api_key={api_key}'
    params = {
        'db': 'pubmed',
        'term': title,
        'field': 'title',
        'retmax': 5,
        'retmode': 'json'
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    try:
        id_list = data['esearchresult']['idlist']
        if id_list:
            for article_id in id_list:
                result = retrieve_citation(article_id, api_key).decode('utf-8')
                # if title.lower() in result.lower():
                if f'<articletitle>{title.lower().strip()}' in result.lower():
                # print(f'>{title.lower().strip()}')
                # if f'>{title.lower().strip()}' in result.lower():
                    print(f'Match found for {title}: PMID = {article_id}.')
                    return result
            print('Article title not found in PMIDs.')
            return id_list        
    except:
        print('Article not found.')
        return None
    
def retrieve_citation(article_id, api_key):
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
    if api_key:
        base_url += f'&api_key={api_key}'
    params = {
        'db': 'pubmed',
        'id': article_id
    }

    response = requests.get(base_url, params=params)
    return response.content

def extract_pubmed_details(record_string):
    # Extract authors using APA citation format
    authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
    formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

    # Extract publication year
    publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
    publication_year = publication_year.group(1) if publication_year else '[]'

    # Extract article title
    article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
    article_title = article_title.group(1) if article_title else '[]'

    # Extract journal title
    journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
    journal_title = journal_title.group(1) if journal_title else '[]'

    # Extract journal volume
    journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
    journal_volume = journal_volume.group(1) if journal_volume else '[]'

    # Extract journal issue
    journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
    journal_issue = journal_issue.group(1) if journal_issue else '[]'

    # Extract start page
    start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
    start_page = start_page.group(1) if start_page else '[]'

    # Extract end page
    end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
    end_page = end_page.group(1) if end_page else '[]'

    # Extract ELocationID
    doi = re.search(r'<ELocationID.*?EIdType="doi".*?>(.*?)</ELocationID>', record_string)
    doi = doi.group(1) if doi else '[]'

    return {
        'Authors': formatted_authors,
        'Publication Year': publication_year,
        'Article Title': article_title,
        'Journal Title': journal_title,
        'Journal Volume': journal_volume,
        'Journal Issue': journal_issue,
        'Start Page': start_page,
        'End Page': end_page,
        'doi': doi
    }



iteration = 4
# # Example usage
article_title = 'Recovery From Eccentric Squat Exercise in Resistance-Trained Young and Master Athletes With Similar Maximum Strength: Combining Cold Water Immersion and Compression'
# reference pubmed page: https://pubmed.ncbi.nlm.nih.gov/34566669/

search_dict[iteration] =  search_article(article_title, api_key)

# search_dict[iteration]


record_string = search_dict[iteration]
details = extract_pubmed_details(record_string)
details


Match found for Recovery From Eccentric Squat Exercise in Resistance-Trained Young and Master Athletes With Similar Maximum Strength: Combining Cold Water Immersion and Compression: PMID = 34566669.


{'Authors': 'Julian Schmidt, Alexander Ferrauti, Michael Kellmann, Florian Beaudouin, Mark Pfeiffer, Nicola Reiner Volk, Jan Martin Wambach, Oliver Bruder, Thimo Wiewelhove',
 'Publication Year': '2021',
 'Article Title': 'Recovery From Eccentric Squat Exercise in Resistance-Trained Young and Master Athletes With Similar Maximum Strength: Combining Cold Water Immersion and Compression.',
 'Journal Title': 'Frontiers in physiology',
 'Journal Volume': '12',
 'Journal Issue': '[]',
 'Start Page': '665204',
 'End Page': '[]',
 'doi': '10.3389/fphys.2021.665204'}

In [39]:
search_dict[iteration] =  search_article(article_title, api_key=None)

# search_dict[iteration]


record_string = search_dict[iteration]
details = extract_pubmed_details(record_string)
details

Match found for Recovery From Eccentric Squat Exercise in Resistance-Trained Young and Master Athletes With Similar Maximum Strength: Combining Cold Water Immersion and Compression: PMID = 34566669.


{'Authors': 'Julian Schmidt, Alexander Ferrauti, Michael Kellmann, Florian Beaudouin, Mark Pfeiffer, Nicola Reiner Volk, Jan Martin Wambach, Oliver Bruder, Thimo Wiewelhove',
 'Publication Year': '2021',
 'Article Title': 'Recovery From Eccentric Squat Exercise in Resistance-Trained Young and Master Athletes With Similar Maximum Strength: Combining Cold Water Immersion and Compression.',
 'Journal Title': 'Frontiers in physiology',
 'Journal Volume': '12',
 'Journal Issue': '[]',
 'Start Page': '665204',
 'End Page': '[]',
 'doi': '10.3389/fphys.2021.665204'}

## 2.4

In [41]:
import requests
import json
import re
api_key = os.getenv('api_ncbi')

def search_article(title, api_key):
    """
    Search for article title in PubMed database.

    Parameters:
    - title (str): article title
    - api_key (str): NCBI API key

    Returns:
    response (str): Article metadata from PubMed database if present. Otherwise, returns list of PMIDs.
    """
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    if api_key:
        base_url += f'&api_key={api_key}'
    params = {
        'db': 'pubmed',
        'term': title,
        'field': 'title',
        'retmax': 5,
        'retmode': 'json'
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    try:
        id_list = data['esearchresult']['idlist']
        if id_list:
            for article_id in id_list:
                result = retrieve_citation(article_id, api_key).decode('utf-8')
                # if title.lower() in result.lower():
                if f'<articletitle>{title.lower().strip()}' in result.lower():
                # print(f'>{title.lower().strip()}')
                # if f'>{title.lower().strip()}' in result.lower():
                    print(f'Match found for {title}: PMID = {article_id}.')
                    return result
            print('Article title not found in PMIDs.')
            return id_list        
    except:
        print('Article not found.')
        return None
    
def retrieve_citation(article_id, api_key):
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
    if api_key:
        base_url += f'&api_key={api_key}'
    params = {
        'db': 'pubmed',
        'id': article_id
    }

    response = requests.get(base_url, params=params)
    return response.content

def extract_pubmed_details(record_string):
    # Extract authors using APA citation format
    authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
    formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

    # Extract publication year
    publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
    publication_year = publication_year.group(1) if publication_year else ''

    # Extract article title
    article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
    article_title = article_title.group(1) if article_title else ''

    # Extract journal title
    journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
    journal_title = journal_title.group(1) if journal_title else ''

    # Extract journal volume
    journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
    journal_volume = journal_volume.group(1) if journal_volume else ''

    # Extract journal issue
    journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
    journal_issue = journal_issue.group(1) if journal_issue else ''

    # Extract start page
    start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
    start_page = start_page.group(1) if start_page else ''

    # Extract end page
    end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
    end_page = end_page.group(1) if end_page else ''

    # Extract ELocationID
    doi = re.search(r'<ELocationID.*?EIdType="doi".*?>(.*?)</ELocationID>', record_string)
    doi = doi.group(1) if doi else ''

    abstract = re.search(r'<AbstractText.*?>(.*?)</AbstractText>', record_string)
    abstract = abstract.group(1) if abstract else ''

    return {
        'Authors': formatted_authors,
        'Publication Year': publication_year,
        'Article Title': article_title,
        'Journal Title': journal_title,
        'Journal Volume': journal_volume,
        'Journal Issue': journal_issue,
        'Start Page': start_page,
        'End Page': end_page,
        'doi': doi,
        'Abstract': abstract
    }



iteration = 4
# # Example usage
article_title = 'Recovery From Eccentric Squat Exercise in Resistance-Trained Young and Master Athletes With Similar Maximum Strength: Combining Cold Water Immersion and Compression'
# reference pubmed page: https://pubmed.ncbi.nlm.nih.gov/34566669/

# search_dict[iteration] =  search_article(article_title, api_key)

# search_dict[iteration]


record_string = search_dict[iteration]
details = extract_pubmed_details(record_string)
details


{'Authors': 'Julian Schmidt, Alexander Ferrauti, Michael Kellmann, Florian Beaudouin, Mark Pfeiffer, Nicola Reiner Volk, Jan Martin Wambach, Oliver Bruder, Thimo Wiewelhove',
 'Publication Year': '2021',
 'Article Title': 'Recovery From Eccentric Squat Exercise in Resistance-Trained Young and Master Athletes With Similar Maximum Strength: Combining Cold Water Immersion and Compression.',
 'Journal Title': 'Frontiers in physiology',
 'Journal Volume': '12',
 'Journal Issue': '',
 'Start Page': '665204',
 'End Page': '',
 'doi': '10.3389/fphys.2021.665204',
 'Abstract': 'The aim of this study was to investigate whether recovery from eccentric squat exercise varies depending on age and to assess whether the use of a mixed-method recovery (MMR) consisting of cold water immersion and compression tights benefits recovery. Sixteen healthy and resistance-trained young (age, 22.1&#xb1;2.1years; <i>N</i>=8) and master male athletes (age, 52.4&#xb1;3.5years; <i>N</i>=8), who had a similar half sq

In [54]:
search_dict[iteration]

'<?xml version="1.0" ?>\n<!DOCTYPE PubmedArticleSet PUBLIC "-//NLM//DTD PubMedArticle, 1st January 2023//EN" "https://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_230101.dtd">\n<PubmedArticleSet>\n<PubmedArticle><MedlineCitation Status="PubMed-not-MEDLINE" Owner="NLM"><PMID Version="1">34566669</PMID><DateRevised><Year>2021</Year><Month>09</Month><Day>29</Day></DateRevised><Article PubModel="Electronic-eCollection"><Journal><ISSN IssnType="Print">1664-042X</ISSN><JournalIssue CitedMedium="Print"><Volume>12</Volume><PubDate><Year>2021</Year></PubDate></JournalIssue><Title>Frontiers in physiology</Title><ISOAbbreviation>Front Physiol</ISOAbbreviation></Journal><ArticleTitle>Recovery From Eccentric Squat Exercise in Resistance-Trained Young and Master Athletes With Similar Maximum Strength: Combining Cold Water Immersion and Compression.</ArticleTitle><Pagination><StartPage>665204</StartPage><MedlinePgn>665204</MedlinePgn></Pagination><ELocationID EIdType="pii" ValidYN="Y">665204</ELocationID><

## 2.5

In [67]:
import requests
import json
import re
api_key = os.getenv('api_ncbi')

def search_article(title, api_key):
    """
    Search for article title in PubMed database.

    Parameters:
    - title (str): article title
    - api_key (str): NCBI API key

    Returns:
    response (str): Article metadata from PubMed database if present. Otherwise, returns list of PMIDs.
    """
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
    if api_key:
        base_url += f'&api_key={api_key}'
    params = {
        'db': 'pubmed',
        'term': title,
        'field': 'title',
        'retmax': 5,
        'retmode': 'json'
    }

    response = requests.get(base_url, params=params)
    data = response.json()

    try:
        id_list = data['esearchresult']['idlist']
        if id_list:
            for article_id in id_list:
                result = retrieve_citation(article_id, api_key).decode('utf-8')
                # if title.lower() in result.lower():
                if f'<articletitle>{title.lower().strip()}' in result.lower():
                # print(f'>{title.lower().strip()}')
                # if f'>{title.lower().strip()}' in result.lower():
                    print(f'Match found for {title}: PMID = {article_id}.')
                    return result
            print('Article title not found in PMIDs.')
            return id_list        
    except:
        print('Article not found.')
        return None
    
def retrieve_citation(article_id, api_key):
    base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
    if api_key:
        base_url += f'&api_key={api_key}'
    params = {
        'db': 'pubmed',
        'id': article_id
    }

    response = requests.get(base_url, params=params)
    return response.content

def extract_pubmed_details(record_string):
    # Extract authors using APA citation format
    authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
    formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

    # Extract publication year
    publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
    publication_year = publication_year.group(1) if publication_year else ''
    publication_month = re.search(r'<PubDate>.*?<Month>(Aug)</Month>.*?</PubDate>', record_string)
    publication_month = publication_month.group(1) if publication_month else ''

    # Extract article title
    article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
    article_title = article_title.group(1) if article_title else ''

    # Extract journal title
    journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
    journal_title = journal_title.group(1) if journal_title else ''

    # Extract journal volume
    journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
    journal_volume = journal_volume.group(1) if journal_volume else ''

    # Extract journal issue
    journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
    journal_issue = journal_issue.group(1) if journal_issue else ''

    # Extract start page
    start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
    start_page = start_page.group(1) if start_page else ''

    # Extract end page
    end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
    end_page = end_page.group(1) if end_page else ''

    # Extract ELocationID
    doi = re.search(r'<ELocationID.*?EIdType="doi".*?>(.*?)</ELocationID>', record_string)
    doi = doi.group(1) if doi else ''

    abstract = re.search(r'<AbstractText.*?>(.*?)</AbstractText>', record_string)
    abstract = abstract.group(1) if abstract else ''

    return {
        'title': article_title,
        'abstract': abstract,
        'publication': journal_title,
        'authors': formatted_authors,
        'year': publication_year,
        'month': publication_month,
        'pub_volume': journal_volume,
        'pub_issue': journal_issue,
        'start_page': start_page,
        'end_page': end_page,
        'doi': doi,
    }



iteration = 4
# # Example usage
article_title = 'Recovery From Eccentric Squat Exercise in Resistance-Trained Young and Master Athletes With Similar Maximum Strength: Combining Cold Water Immersion and Compression'
# reference pubmed page: https://pubmed.ncbi.nlm.nih.gov/34566669/

# search_dict[iteration] =  search_article(article_title, api_key)

# search_dict[iteration]


record_string = search_dict[iteration]
details = extract_pubmed_details(record_string)
details


{'title': 'Recovery From Eccentric Squat Exercise in Resistance-Trained Young and Master Athletes With Similar Maximum Strength: Combining Cold Water Immersion and Compression.',
 'abstract': 'The aim of this study was to investigate whether recovery from eccentric squat exercise varies depending on age and to assess whether the use of a mixed-method recovery (MMR) consisting of cold water immersion and compression tights benefits recovery. Sixteen healthy and resistance-trained young (age, 22.1&#xb1;2.1years; <i>N</i>=8) and master male athletes (age, 52.4&#xb1;3.5years; <i>N</i>=8), who had a similar half squat 1-repetition maximum relative to body weight, completed two identical squat exercise training sessions, separated by a 2-week washout period. Training sessions were followed by either MMR or passive recovery (PR). Internal training loads [heart rate and blood lactate concentration (BLa)] were recorded during and after squat sessions. Furthermore, maximal voluntary isometric co

In [68]:
record_string = search_dict[0]
details = extract_pubmed_details(record_string)
details

{'title': 'Daily energy expenditure through the human life course.',
 'abstract': 'Total daily energy expenditure ("total expenditure") reflects daily energy needs and is a critical variable in human health and physiology, but its trajectory over the life course is poorly studied. We analyzed a large, diverse database of total expenditure measured by the doubly labeled water method for males and females aged 8 days to 95 years. Total expenditure increased with fat-free mass in a power-law manner, with four distinct life stages. Fat-free mass-adjusted expenditure accelerates rapidly in neonates to ~50% above adult values at ~1 year; declines slowly to adult levels by ~20 years; remains stable in adulthood (20 to 60 years), even during pregnancy; then declines in older adults. These changes shed light on human development and aging and should help shape nutrition and health strategies across the life span.',
 'publication': 'Science (New York, N.Y.)',
 'authors': 'Herman Pontzer, Yosuke 

In [71]:
record_string = search_dict[3]
details = extract_pubmed_details(record_string)
details

{'title': 'Comparisons in the Recovery Response From Resistance Exercise Between Young and Middle-Aged Men.',
 'abstract': 'Gordon, JA III, Hoffman, JR, Arroyo, E, Varanoske, AN, Coker, NA, Gepner, Y, Wells, AJ, Stout, JR, and Fukuda, DH. Comparisons in the recovery response from resistance exercise between young and middle-aged men. J Strength Cond Res 31(12): 3454-3462, 2017-The purpose of this study was to compare the effects of a bout of high-volume isokinetic resistance exercise protocol (HVP) on lower-body strength and markers of inflammation and muscle damage during recovery between young and middle-aged adult men. Nineteen recreationally trained men were classified as either a young adult (YA: 21.8 &#xb1; 2.0 years; 90.7 &#xb1; 11.6 kg) or a middle-aged adult (MA: 47.0 &#xb1; 4.4 years; 96.0 &#xb1; 21.5 kg) group. The HVP consisted of 8 sets of 10 repetitions, with 1 minute of rest between each set, performed on an isokinetic dynamometer at 60&#xb0;&#xb7;s. Maximal voluntary is

# *End of Page*