In [1]:
import pandas as pd
import numpy as np
import importlib
import seaborn as sns
from matplotlib import pyplot
import time

In [5]:
str_database_name = 'mBase_29jun.xlsx'
str_version_db_name = 'mBase_29jun'
csv_output_file_name = str_version_db_name + "_abstract.csv"
print(csv_output_file_name)

milk_df = pd.read_excel(str_database_name,sheet_name='Source')
milk_df = milk_df.dropna(subset=['Source_DOI'])

mBase_29jun_abstract.csv


In [6]:
milk_df

Unnamed: 0,Source,Title,Source_DOI,PublDate,InPutDate,InputBy,Finished,is_useful,is_supplementary,Comment / Summary,Milk
0,Andres_12,Developmental Status of 1-year-old Infants Fed...,https://doi.org/10.1542/peds.2011-3121,2012.0,2020.0,Chyanne,1.0,0.0,0.0,No Human Milk Composition - Only states that i...,
1,Calvo-Castro_19,"Pharmacokinetics of Vitamin E, γ-Oryzanol, and...",https://doi.org/10.1007/s00394-018-1770-6,2019.0,2020.0,Chyanne,1.0,0.0,0.0,No Human Milk Composition - Only compositions ...,
2,Dandrifosse_00,Are milk polyamines preventive agents against ...,https://doi.org/10.1017/s0029665100000100,2000.0,2020.0,Chyanne,1.0,0.0,0.0,Review (descriptive) about spermine and sperm...,HumanMilk
3,Deep_12,Angiopreventive Efficacy of Pure Flavonolignan...,https://doi.org/10.1371/journal.pone.0034630,2012.0,2020.0,Chyanne,1.0,0.0,0.0,No Human Milk Composition - Only composition o...,
4,Dunstan_04,The effect of supplementation with fish oil du...,https://doi.org/10.1111/j.1365-2222.2004.02028.x,2004.0,2020.0,Chyanne,1.0,1.0,0.0,"cytokines, Iga, and sCD14",HumanMilk
...,...,...,...,...,...,...,...,...,...,...,...
361,Gibbs_77,"Drip Breast Milk: It's Composition, Collection...",https://doi.org/10.1016/0378-3782(77)90037-8,1977.0,,Tünde,,,,,
362,Goehring_16,"Similar to Those Who Are Breastfed, Infants Fe...",https://doi.org/10.3945/jn.116.236919,2016.0,,Tünde,,,,,
363,Goelz_09,Effects of Different CMV-heat-inactivation-met...,https://doi.org/10.1203/PDR.0b013e3181991f18,2009.0,,Tünde,,,,,
364,Goes_02,Nutrient Composition of Banked Human Milk in B...,https://doi.org/10.1016/s0899-9007(02)00813-4,2002.0,,Tünde,,,,,


In [7]:
raw_dois = milk_df['Source_DOI'].tolist()
dois = set()
pmids = set()
urls_not_in_pubmed = set()
for raw_doi in raw_dois:
    if 'doi.' in raw_doi:
        doi = raw_doi.split('org/')[1].strip()
        dois.add(doi)
    elif 'PMID:' in raw_doi:
        pmid = raw_doi.split(':')[1].strip()
        pmids.add(pmid)
    else:
        urls_not_in_pubmed.add(raw_doi)

In [8]:
len(dois) + len(pmids) + len(urls_not_in_pubmed)

358

In [9]:
pmids

{'13099905', '2260791', '3372922', '3808849', '8920139', '9557492'}

In [7]:
urls_not_in_pubmed

{'-',
 'http://www.mcdb.ca/metabolites',
 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/743819/The_Composition_of_Mature_Human_Milk__1977_.pdf',
 'https://hmdb.ca/metabolites',
 'https://pediatrics.aappublications.org/content/91/3/637',
 'https://www.ars.usda.gov/northeast-area/beltsville-md-bhnrc/beltsville-human-nutrition-research-center/food-surveys-research-group/docs/fndds-download-databases/',
 'https://www.fasebj.org/doi/abs/10.1096/fasebj.23.1_supplement.lb448'}

In [8]:
import urllib.request as request
from lxml import etree
import math

# Constructs appropriate url for pubmed api from search terms
def construct_url(url_input, query_type):
    # Constructs url for search query
    if query_type == 'search':
        base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term='
        if isinstance(url_input, str):
            term_url = url_input.replace(" ", "%20")
        else:
            adjusted_terms = [s.replace(" ", "%20") for s in url_input]
            term_url = '%20AND%20'.join(adjusted_terms)
        url = base_url + term_url
        return url
    elif query_type == 'document':
        base_url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id='
        doc_urls = ""
        for i in url_input:
            if isinstance(i, str): 
                doc_urls = doc_urls + "," + i
            else:
                doc_urls = doc_urls + "," + str(i)
        url = base_url + doc_urls.lstrip(",") + '&retmode=xml'
        return url

# Divides doc ids for larger paper queries in retrieve_doc_info()
def divide_list(ids, num_divisions):
    split_ids = np.array_split(list(ids), num_divisions)
    split_ids = [np.ndarray.tolist(split_ids[i]) for i in range(len(split_ids))]

    return split_ids

# Enters search terms into pubmed database to return document ID's
def search_pubmed(search_term):
    url = construct_url(search_term, 'search')
    with request.urlopen(url) as response:
        xml = response.read()
    root = etree.fromstring(xml)
    # Recursively gets all objects where the tag is Id
    if (root.findall('.//Count') is None) | (root.findall('.//Count')[0].text == '0'):
        return np.nan, np.nan
    elements = root.findall('.//Count')
    ID = root.findall('.//Id')[0].text
    # Converts all lxml objects to their text values
    ids = [i.text for i in elements]
    return ids[0], ID

# Retrieves document (paper) info using pubmed paper ids
def retrieve_doc_info(ids):
    # Can't query too much in a single query, so divides larger id lists into seperate queries
    num_loops = int(math.ceil(len(ids) / 100))
    # Have to split requests larger than 100 documents to keep it within url size
    ids = divide_list(ids, num_loops)
    documents = []
    # Retrieves xml data from pubmed
    for i in ids:
        url = construct_url(i, 'document')
        with request.urlopen(url) as response:
            xml = response.read()
        root = etree.fromstring(xml)
        documents = documents + root.findall('PubmedArticle')
    info = pd.DataFrame()
    for document in documents:
        doc_id = int(document.find('.//PMID').text)
        paper = document.find('.//ArticleTitle').text
        journal = document.find('.//Title').text
        year = document.find('.//Year').text

        if document.find('.//AbstractText') is not None:
            abstract = document.find('.//AbstractText').text
        else:
            abstract = None
        new_row = {
            'PMID' : doc_id,
            'Paper' : paper,
            'Journal' : journal,
            'Year' : year,
            'Abstract' : abstract,
            'Webpage' : 'https://www.ncbi.nlm.nih.gov/pubmed/' + str(doc_id)
        }
        info = info.append(new_row, ignore_index = True)
    info['PMID'] = info['PMID'].astype('int32')
    return info.reset_index(drop = True)

In [9]:
import time

unknown_dois = set()
# NCBI requirement: At most three queries per second if no API key is provided.
# Equivalently, at least a third of second between queries
delay = 0.33333333
for doi in dois:
    time.sleep(delay)
    pmid = search_pubmed(doi)[1]
    if str(pmid) != 'nan':
        pmids.add(pmid)
    else:
        unknown_dois.add(doi)

In [10]:
pmids

{'10200140',
 '10353922',
 '10400108',
 '10489525',
 '10569230',
 '10652985',
 '10673912',
 '10828177',
 '10837303',
 '10940350',
 '11234918',
 '11242449',
 '1130835',
 '11425797',
 '11787695',
 '11787710',
 '11787720',
 '11927700',
 '11994712',
 '12225305',
 '12496220',
 '12499346',
 '12548290',
 '12603709',
 '12812151',
 '12911694',
 '13099905',
 '1331178',
 '1442650',
 '14576748',
 '14599042',
 '14657823',
 '14973953',
 '14991268',
 '15097435',
 '15295084',
 '15298564',
 '15343178',
 '15384616',
 '15537722',
 '15585800',
 '15623823',
 '15681631',
 '15693764',
 '15693858',
 '15736916',
 '15867329',
 '16011458',
 '16061595',
 '16203669',
 '16261995',
 '16280560',
 '16306212',
 '16325533',
 '16390829',
 '16510619',
 '16512938',
 '16614484',
 '17002410',
 '17073150',
 '17177685',
 '17211132',
 '17295795',
 '17311985',
 '17443195',
 '17556680',
 '17596738',
 '17661565',
 '17761135',
 '17852471',
 '17915960',
 '17941914',
 '17942378',
 '17957152',
 '18053685',
 '18088092',
 '1808991',
 '1

In [11]:
unknown_dois

{'10.1002/(SICI)1098-2345(1997)41:3<195::AID-AJP3>3.0.CO;2-S',
 '10.1002/(SICI)1521-3803(19990801)43:4<233::AID-FOOD233>3.0.CO;2-B',
 '10.1016/0140-6736(92)92684-8',
 '10.1016/0163-7827(95)00010-0',
 '10.1016/0377-8401(96)00973-X',
 '10.1016/0378-3782(77)90037-8',
 '10.1016/0378-3782(80)90003-1',
 '10.1016/0378-3782(81)90051-7',
 '10.1016/0378-3782(84)90096-3',
 '10.1016/0378-3782(92)90191-i',
 '10.1016/S0022-3476(05)81238-7',
 '10.1016/S0031-3955(05)70285-6',
 '10.1016/S0031-3955(05)70286-8',
 '10.1016/S0031-3955(05)70295-9',
 '10.1016/S0045-6535(99)00360-4',
 '10.1016/S0091-6749(03)01941-9',
 '10.1016/S0091-6749(99)70021-7',
 '10.1016/S0169-409X(03)00033-4',
 '10.1016/S0271-5317(87)80171-9',
 '10.1016/S0271-5317(98)00153-5',
 '10.1016/S0301-6226(01)00196-8',
 '10.1016/S0378-3782(97)00051-0',
 '10.1016/S0946-672X(98)80005-9',
 '10.1016/S0958-6946(02)00120-6',
 '10.1016/s0022-3476(05)81239-9',
 '10.1016/s0022-3476(77)80759-2',
 '10.1016/s0022-3476(77)80885-8',
 '10.1016/s0022-3476(80)8

In [12]:
manual_pmids = set([9057965, 10481820, 1357244, 9039426, 363402, 7389626, 7249996, 6714135, 1396267, 1560328, 11236733, 11236734, 11236727, 10739053, 14564350, 10589009, 12706546, 9363415, 9857329, 1560329, 318682, 336859, 7359266, 7359271, 7277107, 3772659, 3495653, 9602205, 10631803, 12093436, 15636165, 11913692, 12741556, 16537962, 1779072])
dois_not_in_pubmed = ['10.1016/0377-8401(96)00973-X',
    '10.1016/S0271-5317(87)80171-9',
    '10.1016/S0271-5317(98)00153-5',
    '10.1016/S0301-6226(01)00196-8',
    '10.1016/S0958-6946(02)00120-6',
    '10.1016/s0378-4347(97)00439-8',
    '10.1097/00005176-200000002-00003',
    '10.1289/ehp.116-a426',
    '10.1590/S0044-59672010000100008',
    '10.2223/JPED.1284',
    'https://doi.'
]
manual_abstracts = [
    "This study aimed to examine the carbohydrate content (monosaccharides, lactose, and oligosaccharides) of human milk over 4 months of lactation to determine whether any changes occurred over time. Milk samples from 46 mothers, who delivered at term, were collected at 4th, 10th, 30th, 60th, 90th, and 120th days after delivery. Carbohydrates were measured by high-pressure liquid chromatography. Mean lactose concentration (±SD) increased from 56 ± 6.06 g/L on day 4 to 68.9 ± 8.16 g/L on day 120. Oligosaccharide level decreased from 20.9 ± 4.81 g/L to 12.9 ± 3.30 g/L, respectively. Monosaccharides represented only 1.2% of total carbohydrates. The changes in carbohydrate composition found indicate that carbohydrate synthesis by the mammary gland is a dynamic process. The physiological and biological relevance of human milk oligosaccharides is also discussed.",
    "This study represents analysis of 220 human milk samples from nine cities in nine countries crossing five continents. Samples were collected from mothers from 18 to 40 years of age with a lactation range from 30 days to 188 days. Amino acids were analyzed using Waters Accq_Tag HPLC method. Total protein was calculated from the nitrogen content multiplied by a protein factor of 6.25. The nitrogen content was analyzed using FP‐528 nitrogen analyzer made by LECO Corporation. True protein was calculated from amino acid analysis and includes protein, free amino acids and peptides. Total protein, true protein, and the amino acid composition are not influenced by mother's age, and are statistically similar among locations, with the exception of samples from Chile, which had higher total protein and true protein. True protein is approximately 76% of total protein, is statistically similar among locations, and is not influenced by mother's age or days of lactation. Total protein, true protein, and individual amino acid concentrations in human milk steadily decline from 30 to 151 days of lactation, and are significantly higher in the second month than in the following four months. The amino acid profiles are statistically similar among locations. These results confirm higher protein and amino acid levels in early lactation and demonstrate consistency in the protein and amino acid composition of human milk across all locations.",
    "The fatty acid composition of bovine milk fat can be substantially altered by feeding lipid sources which alter the fatty acid profile of lipid entering the intestine from the rumen. As long-chain fatty acids of dietary origin can be incorporated directly into milk fat the opportunity exists to alter the ratio of short and long-chain fatty acids as well as the degree of saturation of milk fat. In practice our ability to alter the fatty acid profile of milk fat is limited not by the synthetic capacity of the mammary gland, but rather by the challenge of achieving effective protection of unsaturated dietary fatty acids from biohydrogenation in the rumen, as well as keeping the level of polyunsaturated fatty acids within the range where the organoleptic quality and shelf-life of milk and dairy products are not compromised. The fatty acid composition of oilseeds such as canola are considered desirable from a human health perspective and thus their inclusion in the diet of dairy cattle as a means of achieving a more desirable fatty acid profile in milk fat may enhance the nutritive quality of milk.",
    "Although the biochemistry of milk from mothers giving birth prematurely has been described in some detail, it was not known whether premature birth would affect vitamin D status of mothers, the vitamin D activity of milk or the concentration of vitamin D binding protein in milk. In the present study, plasma and milk samples were collected from eight mothers giving birth prematurely and compared to samples obtained from nine mothers giving birth at term. Vitamin D intake from milk and vitamin D supplements was at least 400 IU/day in all mothers. Complete 24-hour expressions of milk were collected between 14 and 21 days postpartum. No differences were observed in plasma D2 (5.1+0.7 and 3.7+1.0 ng/ml, mean±SEM) or milk D2 (0.19±0.06 and 0.12±0.02 ng/ml) in term versus preterm groups, respectively. In contrast, plasma vitamin D3 (0.7±0.1 and 2.7±0.5, ng/ml, p<0.01) and milk vitamin D3 (0.14±0.02 and 0.23±0.03, ng/ml, p<0.05) were significantly higher in preterm mothers. This was associated with higher plasma vitamin D binding protein (DBP) in preterm mothers (308±13 versus 463±30, ug/ml, p<0.01) but not preterm milk DBP. There were no differences in milk vitamin D sterols nor DBP content between the second and fourth week of lactation. In the case of the hydroxylated sterols, 25-hydroxyvitamin D2 and 25-hydroxyvitamin D3, no differences between mother groups could be discerned in plasma or milk. Differences in the synthesis and/or transport of vitamin D and DBP which are reflected in PT milk and plasma levels in the early postpartum period may relate to cutaneous synthesis or abbreviated gestational stage. Based on the vitamin D sterols measured in this study, the total vitamin D activity in human milk in the first month of lactation averaged 80±9 IU/L for preterm milk and 60±7 IU/L for term milk, both values which are in excess of the often quoted value for human milk vitamin D of 22 IU/L.",
    "The purpose of this study was to determine the vitamin B12 status of vegetarian, lactating women who had immigrated to the US from India. Women exclusively breast-feeding their 4–20 week old infants were studied [8 lactovegetarians (VEG group), 11 omnivorous women (CONTROL group)]. Fasting maternal blood and breast milk samples were collected and analyzed for B12 concentrations; a random urine sample was collected from infants to determine methylmalonic acid (MMA) concentration. All subjects, except three VEG, reported taking vitamin supplements containing 2 to 25 μg of vitamin B12 regularly during pregnancy and lactation. Vitamin B12 intake from dietary sources and total intake (dietary plus supplement) was significantly lower in the VEG group compared to CONTROL subjects (1.35 ± 0.27 vs 2.61 ± 0.23, p<0.01 and 4.60 ± 1.43 vs 11.70 ± 2.47 μg/d, p = 0.04; respectively). Serum and milk vitamin B12 concentrations in VEG women were significantly lower than the CONTROL group (264 ± 52 vs 531 ± 78 pmol/L, p = 0.02 and 277 ± 39 vs 544 ± 104 pmol/L, p = 0.05; respectively). After controlling for diet, the mean milk vitamin B12 concentration of mothers at 16–20 wks postpartum was significantly lower than mothers 4–8 wks postpartum (p = 0.04). Assuming that infants consumed 750 ml of milk/d, 75% of infants of VEG mothers and 27% of infants of CONTROL mothers were not consuming the RDA for vitamin B12. All MMA concentrations were within normal limits except for one infant whose mother consumed a VEG diet. These results suggest the importance of supplementation with vitamin B12 during lactation among lactovegetarian women.",
    "This review summarises the known effects of forages, animal fats or marine oils on bovine milk fat secretion and composition. Special attention is given to fatty acids that could play a positive role for human health, such as butyric acid, oleic acid, C18 to C22 polyunsaturated fatty acids and conjugated linoleic acid (CLA). The efficiency of the transfer of n-3 polyunsaturated fatty acids from diet to milk is reviewed. Milk fat from pasture fed cows seems to be higher in linolenic acid than milk fat from cows receiving preserved grass or maize, but the magnitude of this difference is limited. Indirect comparisons show that milk fat from maize silage diets is richer in short-chain FA and linoleic acid when compared to grass silage diets. Compared to fresh grass, grass silage favours myristic and palmitic acids at the expense of mono- and polyunsaturated FA, including CLA. Protected tallow allows for a large increase in milk fat yield, and in the percentage of milk stearic and oleic acids, at the expense of medium chain FA. Non-protected tallow has a similar effect on medium chain FA without increasing so much C18 FA yield, which explains that it does not increase milk fat yield. Dose–response curves of milk CLA are reviewed for marine oil supplements, as well as the relationship between milk CLA and trans-C18:1 contents. The potential of marine oil supplementation to increase the mean CLA content in cow milk fat is large (more than 300% above basal values). A specific role for dietary C20:5 n-3 in the sharp decrease in milk fat secretion after fish oil supplementation is suggested. However, there is a need to evaluate how the different feeding strategies could change the other aspects of milk fat quality, such as taste, oxidative stability or manufacturing value.",
    "Milk composition of mammallian species varies widely with reference to genetic, physiological and nutritional factors and environmental conditions. In this survey, the composition of mare's milk is reviewed and compared to human and cow's milk, considering principal protein fractions and fatty acid content. Protein content in mare's milk is higher than in human milk and lower than in cow's milk; casein concentration in mare's milk is intermediate between the other two milks. Fat content is lower in mare's milk compared to human and cow's milk. Distribution of di- and tri-glycerides in mare's and women’ milk is similar. The proportion of polyunsaturated fatty acids in mare's and human milk is remarkably higher than in cow's milk. Mare's milk shows some structural and functional peculiarities that make it more suitable for human nourishment than cow's milk.",
    "Pooled human milk was subjected to Holder pasteurization and storage at −20°C up to 90 days and examined for its content of fat and l-lactate and for lipid composition. This treatment reduced fats by 6% and l-lactate by at least 7%. In addition, pasteurization and storage induced triglyceride hydrolysis. The absolute amount of free fatty acids (FFAs) which was 0.5% after collection, doubled after pasteurization and rose even more after storage. Different FFA compositions were found by several authors using the same analytical method even for milk samples subjected to the same treatment. More detailed information on procedures must be given to explain the different results.",
    "When it comes to feeding the newborn, human milk is, from an evolutionary perspective, the biological norm, the time-tested standard of care. The health benefits to the infant of breast-feeding have been amply documented; numerous studies strongly indicate significantly decreased risks of infection, allergy, asthma, arthritis, diabetes, obesity, cardiovascular disease, and various cancers in both childhood and adulthood. Among the more fundamental disadvantages of not being breastfed is a loss of immunologic protection afforded by maternal colostrum, a “pre-milk” fluid secreted only during the first days after delivery, as well as numerous other bioactive factors that help protect the infant through the first two years of life, when the immune and nervous systems are incompletely developed. Nevertheless, given the tendency for persistent organic pollutants (POPs), pesticides, heavy metals, and other contaminants to accumulate in human milk, researchers and parents alike are asking whether the nursling’s exposure to these pollutants might reduce or even override the health benefits.",
    "This study treats a transversal, descriptive, observational and analytical study with the objective of determining the Vitamin A chemical content in Human Maternal Milk (HMM) of 50 donor mothers from the Ana Braga Maternity Human Milk Bank (HMB), in Manaus, Amazonas. Samples were collected weekly and maintained frozen until pasteurization. Analyses of retinol and β-carotene in human milk were undertaken before and after the treatment (pasteurization), chromatographic method. Aliquots of approximately 5 mL of milk were taken from each sample. The babies included in the study were weight > 2500g. The Vitamin A concentration in the milk, before and after the processing, was found to be insufficient for that required (46.57 ± 6.28 µg/dL and 31.55 ± 8.29 µg/dL respectively) for children who feed exclusively on HMB milk, notably premature babies, whose Vitamin A requirements are greater and whose capacity for ingesting milk is lower than that of fully born babies. The loss of Vitamin A through processing was, in this study, 32.51%. To guarantee hepatic reserves and protect against deficiencies, it would be necessary that the vitamin A concentration in the milk be higher than 80 µg/dL.",
    "OBJECTIVES: To analyze the effect of processing on the Vitamin A levels of milk that is donated to the Human Milk Bank at the University Maternity Hospital, and to evaluate whether this milk supplies the infants' Vitamin A requirement. METHODS: Sixty milk samples from the Human Milk Bank were divided into two equal portions. One milk portion was extracted prior to processing, while another fraction was subjected to pasteurization and later extracted. The samples were analyzed using high performance liquid chromatography. Analysis with Student's t test indicated that the difference between mean retinol levels was statistically significant at the p < 0.05 level. RESULTS: The retinol content found in the milk prior to processing was 55.4±34.0 µg/100 ml, whereas for the processed milk this level was 36.6±26.1 µg/100 ml (p < 0.001). CONCLUSION: It was found that retinol loss occurs during milk processing and that the milk from the Human Milk Bank does not meet infants' Vitamin A requirement."
]

pmids |= manual_pmids

In [13]:
results = retrieve_doc_info(pmids)
results

Unnamed: 0,Abstract,Journal,PMID,Paper,Webpage,Year
0,There has been a striking generational increas...,Nature medicine,30643289,Healthy infants harbor intestinal bacteria tha...,https://www.ncbi.nlm.nih.gov/pubmed/30643289,2019
1,"For term infants, human milk provides adequate...",The Cochrane database of systematic reviews,14973953,Multicomponent fortified human milk for promot...,https://www.ncbi.nlm.nih.gov/pubmed/14973953,2004
2,The overall beneficial effects of breastfeedin...,Pediatric allergy and immunology : official pu...,23577672,"Relationship between breast milk sCD14, TGF-β1...",https://www.ncbi.nlm.nih.gov/pubmed/23577672,2014
3,Milk samples from 33 women from a rural area o...,The American journal of clinical nutrition,219681,Studies on the quality of breast milk during 2...,https://www.ncbi.nlm.nih.gov/pubmed/219681,1979
4,The aim of this study was to compare the actua...,Journal of perinatology : official journal of ...,19444237,Preterm infants fed fortified human milk recei...,https://www.ncbi.nlm.nih.gov/pubmed/19444237,2009
...,...,...,...,...,...,...
325,Insufficient polyamine intake could play a rol...,The Proceedings of the Nutrition Society,10828177,Are milk polyamines preventive agents against ...,https://www.ncbi.nlm.nih.gov/pubmed/10828177,2000
326,Bodyweight gain of formula-fed babies is usual...,Pediatrics international : official journal of...,15693858,Lower energy content of human milk than calcul...,https://www.ncbi.nlm.nih.gov/pubmed/15693858,2005
327,The aim of the present study was to identify a...,Annals of nutrition & metabolism,28301837,Breast Milk Polyamines and Microbiota Interact...,https://www.ncbi.nlm.nih.gov/pubmed/28301837,2018
328,The authors discovered some errors regarding r...,Cancer,25855820,"Erratum: Borderud SP, Li Y, Burkhalter JE, She...",https://www.ncbi.nlm.nih.gov/pubmed/25855820,2015


In [14]:
for abstract in manual_abstracts:
    new_row = {
        'PMID' : 'nan',
        'Paper' : 'nan',
        'Journal' : 'nan',
        'Year' : 'nan',
        'Abstract' : abstract,
        'Webpage' : 'nan'
    }
    results = results.append(new_row, ignore_index = True)
results.to_csv(csv_output_file_name, index=False)
results = pd.read_csv(csv_output_file_name)
results

Unnamed: 0,Abstract,Journal,PMID,Paper,Webpage,Year
0,There has been a striking generational increas...,Nature medicine,30643289.0,Healthy infants harbor intestinal bacteria tha...,https://www.ncbi.nlm.nih.gov/pubmed/30643289,2019.0
1,"For term infants, human milk provides adequate...",The Cochrane database of systematic reviews,14973953.0,Multicomponent fortified human milk for promot...,https://www.ncbi.nlm.nih.gov/pubmed/14973953,2004.0
2,The overall beneficial effects of breastfeedin...,Pediatric allergy and immunology : official pu...,23577672.0,"Relationship between breast milk sCD14, TGF-β1...",https://www.ncbi.nlm.nih.gov/pubmed/23577672,2014.0
3,Milk samples from 33 women from a rural area o...,The American journal of clinical nutrition,219681.0,Studies on the quality of breast milk during 2...,https://www.ncbi.nlm.nih.gov/pubmed/219681,1979.0
4,The aim of this study was to compare the actua...,Journal of perinatology : official journal of ...,19444237.0,Preterm infants fed fortified human milk recei...,https://www.ncbi.nlm.nih.gov/pubmed/19444237,2009.0
...,...,...,...,...,...,...
336,Milk composition of mammallian species varies ...,,,,,
337,Pooled human milk was subjected to Holder past...,,,,,
338,"When it comes to feeding the newborn, human mi...",,,,,
339,"This study treats a transversal, descriptive, ...",,,,,


In [15]:
results

Unnamed: 0,Abstract,Journal,PMID,Paper,Webpage,Year
0,There has been a striking generational increas...,Nature medicine,30643289.0,Healthy infants harbor intestinal bacteria tha...,https://www.ncbi.nlm.nih.gov/pubmed/30643289,2019.0
1,"For term infants, human milk provides adequate...",The Cochrane database of systematic reviews,14973953.0,Multicomponent fortified human milk for promot...,https://www.ncbi.nlm.nih.gov/pubmed/14973953,2004.0
2,The overall beneficial effects of breastfeedin...,Pediatric allergy and immunology : official pu...,23577672.0,"Relationship between breast milk sCD14, TGF-β1...",https://www.ncbi.nlm.nih.gov/pubmed/23577672,2014.0
3,Milk samples from 33 women from a rural area o...,The American journal of clinical nutrition,219681.0,Studies on the quality of breast milk during 2...,https://www.ncbi.nlm.nih.gov/pubmed/219681,1979.0
4,The aim of this study was to compare the actua...,Journal of perinatology : official journal of ...,19444237.0,Preterm infants fed fortified human milk recei...,https://www.ncbi.nlm.nih.gov/pubmed/19444237,2009.0
...,...,...,...,...,...,...
336,Milk composition of mammallian species varies ...,,,,,
337,Pooled human milk was subjected to Holder past...,,,,,
338,"When it comes to feeding the newborn, human mi...",,,,,
339,"This study treats a transversal, descriptive, ...",,,,,
