# Formation of articles list which present results of pulsed laser-ablative synthesis of nanoparticles

Articles are searched in PubMed database.
Articles are searched in 3 steps:
1. Initial (base) list of consists of articles where:
   1. Author list includes Kabashin or Barcikowski;
   2. keywords include "pulsed laser ablation in liquids" or "laser ablation in liquids" or "laser fragmentation in liquids" or ("nanoparticles" and "laser ablation");
   3. Articles are published after 1992.
2. Obtained articles, which cite articles from base list
3. Obtained articles, which are cited in base list

In [None]:
import logging
from typing import  Iterable

import pandas as pd

from artfinder import PubMedArticle, PubMed

#logging.basicConfig(level=logging.DEBUG,datefmt='%H:%M:%S' , format='%(asctime)s.%(msecs)03d - %(module)s/%(funcName)s:%(lineno)d - %(levelname)s - %(message)s')
logging.basicConfig(level=logging.INFO,datefmt='%H:%M:%S' , format='%(asctime)s.%(msecs)03d - %(module)s/%(funcName)s:%(lineno)d - %(levelname)s - %(message)s')

pubmed = PubMed()

TypeError: __slots__ must be identifiers

Helper functions

In [None]:
def get_df(articles: Iterable[PubMedArticle]) -> pd.DataFrame:
    "Construct a DataFrame from a list of PubMedArticle objects"
    
    dict_articles = [x.to_dict() for x in articles]
    print(f"Total articles: {len(dict_articles)}")
    return pd.DataFrame(dict_articles)

## Step 1
Base list of articles is a set of articles where:
 - Author: Kabashin 
 - or Author: Barcikowski 
 - or Keywords: "pulsed laser ablation in liquids" 
 - or Keywords: "laser ablation in liquids"
 - or Keywords: "nanoparticles" AND "laser ablation"
 - or Keywords: "laser fragmentation in liquids" 
 - Articles are published after 1992

In [None]:
print(author:= "Kabashin[AU]")
base = get_df(pubmed.query(author))

print(author:="Barcikowski[AU]")
base = pd.concat([base, get_df(pubmed.query(author))])

print(keywords:="pulsed laser ablation in liquids[OT]")
base = pd.concat([base, get_df(pubmed.query(keywords))])

print(keywords:= "pulsed laser ablation in liquid[OT]")
base = pd.concat([base, get_df(pubmed.query(keywords))])

print(keywords:= "laser ablation in liquids[OT]")
base = pd.concat([base, get_df(pubmed.query(keywords))])

print(keywords:= "laser ablation in liquid[OT]")
base = pd.concat([base, get_df(pubmed.query(keywords))])

print(keywords:= "laser ablation[OT] AND nanoparticles[OT]")
base = pd.concat([base, get_df(pubmed.query(keywords))])

print(keywords:= "ablation[OT] AND nanoparticles[OT]")
base = pd.concat([base, get_df(pubmed.query(keywords))])

print(keywords:= "fragmentation[OT] AND nanoparticles[OT]")
base = pd.concat([base, get_df(pubmed.query(keywords))])

print(keywords:= "laser fragmentation in liquids[OT]")
base = pd.concat([base, get_df(pubmed.query(keywords))])

print(keywords:= "laser fragmentation in liquid[OT]")
base = pd.concat([base, get_df(pubmed.query(keywords))])

print(keywords:= "plal[OT]")
base = pd.concat([base, get_df(pubmed.query(keywords))])

# Drop duplicates
base.drop_duplicates(subset='doi', inplace=True)

# Drop too old
base = base[base['publication_date'] > '1993-01-01']

Kabashin[AU]


17:33:02.645 - api/query:97 - INFO - Found: 82 articles. Fetching...


Total articles: 82
Barcikowski[AU]


17:33:09.010 - api/query:97 - INFO - Found: 272 articles. Fetching...
17:33:14.957 - api/_getArticles:397 - INFO - Unrecognized articles: 1


Total articles: 271
pulsed laser ablation in liquids[OT]


17:33:17.410 - api/query:97 - INFO - Found: 24 articles. Fetching...


Total articles: 24
pulsed laser ablation in liquid[OT]


17:33:21.783 - api/query:97 - INFO - Found: 18 articles. Fetching...


Total articles: 18
laser ablation in liquids[OT]


17:33:25.497 - api/query:97 - INFO - Found: 54 articles. Fetching...


Total articles: 54
laser ablation in liquid[OT]


17:33:31.107 - api/query:97 - INFO - Found: 63 articles. Fetching...


Total articles: 63
laser ablation[OT] AND nanoparticles[OT]


17:33:38.531 - api/query:97 - INFO - Found: 215 articles. Fetching...


Total articles: 215
ablation[OT] AND nanoparticles[OT]


17:33:48.043 - api/query:97 - INFO - Found: 312 articles. Fetching...


Total articles: 312
fragmentation[OT] AND nanoparticles[OT]


17:34:01.305 - api/query:97 - INFO - Found: 68 articles. Fetching...


Total articles: 68
laser fragmentation in liquids[OT]


17:34:06.904 - api/query:97 - INFO - Found: 4 articles. Fetching...


Total articles: 4
laser fragmentation in liquid[OT]


17:34:10.369 - api/query:97 - INFO - Found: 4 articles. Fetching...


Total articles: 4
plal[OT]


17:34:13.815 - api/query:97 - INFO - Found: 18 articles. Fetching...


Total articles: 18
lal[OT]


17:34:17.329 - api/query:97 - INFO - Found: 153 articles. Fetching...
17:34:21.630 - api/_getArticles:397 - INFO - Unrecognized articles: 1


Total articles: 152


## Step 2
Get list of articles, which cite articles from base list

In [None]:
citing = pd.DataFrame(columns=base.columns)

tot_articles = len(base)
for i, pmid in enumerate(base['pmid']):
    logging.info(f'Start fetching article {i+1}/{tot_articles} - {pmid}')
    articles = pubmed.getCitingArticles(pmid)
    dict_articles = [x.to_dict() for x in articles]
    citing = pd.concat([citing, pd.DataFrame(dict_articles)], ignore_index=True)

# Drop duplicates
citing.drop_duplicates(subset='doi', inplace=True)

# Drop articles from base list
citing = citing[~citing.doi.isin(base.doi)] 

18:09:04.967 - 1858469794/<module>:5 - INFO - Start fetching article 1/812 - 39791764
18:09:06.390 - api/getCitingArticles:130 - INFO - Found: 0 siting articles. Fetching...
18:09:06.391 - 1858469794/<module>:5 - INFO - Start fetching article 2/812 - 39683349
18:09:07.377 - api/getCitingArticles:130 - INFO - Found: 0 siting articles. Fetching...
18:09:07.378 - 1858469794/<module>:5 - INFO - Start fetching article 3/812 - 39253754
18:09:08.459 - api/getCitingArticles:130 - INFO - Found: 0 siting articles. Fetching...
18:09:08.460 - 1858469794/<module>:5 - INFO - Start fetching article 4/812 - 39203014
18:09:09.376 - api/getCitingArticles:130 - INFO - Found: 0 siting articles. Fetching...
18:09:09.377 - 1858469794/<module>:5 - INFO - Start fetching article 5/812 - 38795244
18:09:10.606 - api/getCitingArticles:130 - INFO - Found: 0 siting articles. Fetching...
18:09:10.609 - 1858469794/<module>:5 - INFO - Start fetching article 6/812 - 38739779
18:09:11.630 - api/getCitingArticles:130 - I

ValueError: time data '2021/Sept/15' does not match format '%Y/%b/%d'

## Step 3
Get references, which are cited in articles of base list

In [None]:
refs = pd.DataFrame(columns=base.columns)

tot_articles = len(base)
for i, ref_list in enumerate(base['references'].dropna()):
    logging.info(f'Start fetching refernces for article {i+1}/{tot_articles} - {ref_list}')
    if ref_list:
        ref_list = eval(ref_list)
        ids = [ref['pmid'] for ref in ref_list if ref['pmid'] is not None]
        logging.info(f'Fetching {len(ids)} references...')
        articles = list(pubmed.getArticles(ids))
        logging.info(f'Fetched {len(articles)} references')
        dict_articles = [x.to_dict() for x in articles]
        refs = pd.concat([refs, pd.DataFrame(dict_articles)], ignore_index=True)

# Drop duplicates
refs.drop_duplicates(subset='doi', inplace=True)

# Drop articles already in the base
refs = refs[~refs.doi.isin(base.doi)]

## Step 4
Combine all lists

In [None]:
all_articles = pd.concat([base, citing, refs], ignore_index=True)
all_articles.drop_duplicates(subset='doi', inplace=True)
all_articles = all_articles[all_articles['publication_date'] > '1993-01-01']
all_articles.to_csv('all_articles.csv', index=False)

## Some 
Get authors statistics

In [None]:
from collections import Counter

authors = []
for authorlist in all_articles['authors']:
    authorlist = eval(authorlist)
    authors.extend([x['lastname'].lower() for x in authorlist if x['lastname'] is not None])

auth_counter = Counter(authors)
auth_counter.most_common(10)

[('wang', 774),
 ('zhang', 606),
 ('li', 597),
 ('liu', 517),
 ('chen', 437),
 ('yang', 311),
 ('kim', 296),
 ('lee', 235),
 ('wu', 223),
 ('huang', 204)]

Get statistics on keywords

In [None]:
from collections import Counter

kw = Counter()
for kwlist in all_articles['keywords'].dropna():
    kwlist = eval(kwlist)
    kw.update([kw.lower() for kw in kwlist if kw is not None])
kw.most_common(20)

[('nanoparticles', 323),
 ('gold nanoparticles', 161),
 ('silver nanoparticles', 148),
 ('laser ablation', 136),
 ('drug delivery', 92),
 ('nanomedicine', 71),
 ('cytotoxicity', 67),
 ('nanotechnology', 67),
 ('surface plasmon resonance', 65),
 ('cancer', 62),
 ('toxicity', 60),
 ('nanomaterials', 59),
 ('plasmonics', 54),
 ('photothermal therapy', 51),
 ('antibacterial activity', 51),
 ('sers', 49),
 ('antibacterial', 46),
 ('oxidative stress', 46),
 ('wound healing', 46),
 ('green synthesis', 45)]