# About this notebook:
- We will be using articles IDs to collect each articles' information, including:
    - Title
    - Abstract
    - Given Mesh Terms
    - Publication Date
- This notebook is part of a series of notebooks under 'Step 2 Articles Collection', where each notebook contain articles published in a particular year.

# Import Libraries and Articles' IDs

In [1]:
# import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from Bio import Entrez
import xml.etree.ElementTree as ET

import time
from tqdm import tqdm

# Set the email address associated with your NCBI account
Entrez.email = "geok1723@gmail.com"

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


df=pd.read_csv("../Step_1_ID_Collection/IDs_2015.csv")

In [2]:
df.head()

Unnamed: 0,Date_str,ID,Date,Month,Year
0,2015/01/08,25565897,2015-01-08,1,2015
1,2015/01/27,25619629,2015-01-27,1,2015
2,2015/01/06,25557018,2015-01-06,1,2015
3,2015/01/09,25568442,2015-01-09,1,2015
4,2015/01/30,25631110,2015-01-30,1,2015


# Collecting Articles in groups of 100<br>
> The IDs are split into mini-dataframes containing 100 IDs for scraping. <br>
> Output will contain about 360 mini dataframes of articles that are published in this particular year. <br>
> This process was executed using Kaggle, and resulting dataframes are stored in the respective zip-folder. <br>

In [3]:
# determine the group number for each row
group_num = (df.index // 100) + 1

# map the group number to a group label (G1 to G6)
group_label = 'G' + group_num.astype(str)

# add the new column to the DataFrame
df['sub_group'] = group_label

# print(df)
# print(df['sub_group'].nunique())

df['Date']=pd.to_datetime(df['Date'],yearfirst=True)

# group the DataFrame by "Sub_group"
grouped = df.groupby('sub_group')

# create an empty dictionary to store the smaller DataFrames
df_dict = {}

# loop over the groups and create smaller DataFrames
for subgroup, subgroup_df in grouped:
    # construct the name of the smaller DataFrame
    year = subgroup_df['Year'].iloc[0]
    month = subgroup_df['Date'].iloc[0].strftime('%m')
    df_name = f"df_{year}_{month}_{subgroup}"
    # add the smaller DataFrame to the dictionary
    df_dict[df_name] = subgroup_df.copy()

df_ls = list(df_dict.keys())

df_ls.sort()

In [4]:
#to continue where previous notebook left off
x = df_ls.index("df_2015_12_G347")
x = x+1

In [8]:
output_df = {}
for grp in tqdm(df_ls[x:]):
    _df = df_dict[grp]
    _s = _df[['ID','Date_str']] 

    print(f"Creating Dataframe of articles for {grp}")
    abstracts = []
    titles = []
    ids = []
    MeSH = []
    Pub_Date = []

    for index,row in tqdm(_s.iterrows()):	

        pub_date = row['Date_str']
        pmid = row['ID']


        try:
            handle = Entrez.efetch(db="pubmed", id=pmid, rettype="abstract", retmode="xml")

            xml_data = handle.read()
            handle.close()

            #obtain abstract and title
            root = ET.fromstring(xml_data)
            title = root.find(".//ArticleTitle").text.strip()
            abst = root.findall(".//AbstractText")

            abstract_text = ""
            for abstract in abst:
                abstract_text += abstract.text

            
            #obtain MeSH
            mesh_terms = ['']
            mesh_list = root.findall(".//MeshHeadingList/MeshHeading")
            for mesh in mesh_list:
                descriptor_name = mesh.find(".//DescriptorName").text
                qualifiers = mesh.findall(".//QualifierName")
                qualifier_names = [qualifier.text for qualifier in qualifiers]
                mesh_terms.append((descriptor_name, qualifier_names))
                
            MeSH.append(mesh_terms)            
            ids.append(pmid)
            abstracts.append(abstract_text)
            titles.append(title)
            Pub_Date.append(pub_date)

        except:
            print(f"Error here in ID{pmid}")

    # Store the data in a pandas dataframe
    data = {"ID": ids, "title": titles,"Pub_Date":Pub_Date, "abstract": abstracts, "MeSH_term":MeSH}
    data_df = pd.DataFrame(data)
    df_name = f"ABSTRACT{grp}"
    output_df[df_name] = data_df
    data_df.to_csv(f"{df_name}.csv", index=False)
    print(output_df.keys())

  0%|          | 0/13 [00:00<?, ?it/s]

Creating Dataframe of articles for df_2015_12_G348



0it [00:00, ?it/s][A
1it [00:01,  1.66s/it][A
2it [00:02,  1.46s/it][A
3it [00:04,  1.46s/it][A
4it [00:05,  1.41s/it][A
5it [00:07,  1.36s/it][A
6it [00:08,  1.28s/it][A
7it [00:09,  1.23s/it][A
8it [00:10,  1.34s/it][A
9it [00:12,  1.32s/it][A
10it [00:13,  1.30s/it][A
11it [00:14,  1.30s/it][A
12it [00:15,  1.24s/it][A
13it [00:16,  1.19s/it][A
14it [00:18,  1.22s/it][A
15it [00:20,  1.42s/it][A
16it [00:21,  1.40s/it][A
17it [00:22,  1.36s/it][A
18it [00:23,  1.34s/it][A
19it [00:25,  1.26s/it][A
20it [00:26,  1.27s/it][A
21it [00:27,  1.29s/it][A
22it [00:28,  1.28s/it][A
23it [00:29,  1.21s/it][A
24it [00:31,  1.17s/it][A
25it [00:32,  1.21s/it][A
26it [00:33,  1.31s/it][A
27it [00:35,  1.31s/it][A
28it [00:36,  1.30s/it][A
29it [00:37,  1.23s/it][A
30it [00:38,  1.19s/it][A
31it [00:39,  1.22s/it][A
32it [00:41,  1.24s/it][A
33it [00:42,  1.21s/it][A
34it [00:43,  1.18s/it][A
35it [00:44,  1.24s/it][A
36it [00:46,  1.29s/it][A
37it [00:47,  

dict_keys(['ABSTRACTdf_2015_12_G348'])
Creating Dataframe of articles for df_2015_12_G349



0it [00:00, ?it/s][A
1it [00:01,  1.33s/it][A
2it [00:02,  1.37s/it][A
3it [00:04,  1.34s/it][A
4it [00:05,  1.26s/it][A
5it [00:06,  1.22s/it][A
6it [00:07,  1.24s/it][A
7it [00:08,  1.27s/it][A
8it [00:10,  1.22s/it][A
9it [00:11,  1.29s/it][A
10it [00:12,  1.23s/it][A
11it [00:13,  1.19s/it][A
12it [00:14,  1.20s/it][A
13it [00:16,  1.17s/it][A
14it [00:17,  1.21s/it][A
15it [00:18,  1.25s/it][A
16it [00:19,  1.28s/it][A
17it [00:21,  1.23s/it][A
18it [00:22,  1.18s/it][A
19it [00:23,  1.22s/it][A
20it [00:25,  1.31s/it][A
21it [00:26,  1.25s/it][A
22it [00:27,  1.20s/it][A
23it [00:28,  1.24s/it][A
24it [00:29,  1.28s/it][A
25it [00:31,  1.30s/it][A
26it [00:32,  1.26s/it][A
27it [00:33,  1.23s/it][A
28it [00:34,  1.21s/it][A
29it [00:36,  1.25s/it][A
30it [00:37,  1.21s/it][A
31it [00:38,  1.17s/it][A
32it [00:39,  1.28s/it][A
33it [00:41,  1.25s/it][A
34it [00:42,  1.27s/it][A
35it [00:43,  1.24s/it][A
36it [00:44,  1.25s/it][A
37it [00:46,  

dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349'])
Creating Dataframe of articles for df_2015_12_G350



0it [00:00, ?it/s][A
1it [00:01,  1.32s/it][A
2it [00:02,  1.28s/it][A
3it [00:03,  1.21s/it][A
4it [00:05,  1.25s/it][A
5it [00:06,  1.21s/it][A
6it [00:07,  1.31s/it][A
7it [00:08,  1.24s/it][A
8it [00:09,  1.20s/it][A
9it [00:11,  1.24s/it][A
10it [00:12,  1.25s/it][A
11it [00:13,  1.28s/it][A
12it [00:15,  1.28s/it][A
13it [00:16,  1.29s/it][A
14it [00:17,  1.25s/it][A
15it [00:18,  1.20s/it][A
16it [00:19,  1.18s/it][A
17it [00:20,  1.15s/it][A
18it [00:21,  1.13s/it][A
19it [00:23,  1.21s/it][A
20it [00:24,  1.28s/it][A
21it [00:25,  1.21s/it][A
22it [00:26,  1.18s/it][A
23it [00:28,  1.37s/it][A
24it [00:29,  1.29s/it][A
25it [00:30,  1.23s/it][A
26it [00:32,  1.25s/it][A
27it [00:33,  1.19s/it][A
28it [00:34,  1.21s/it][A
29it [00:35,  1.26s/it][A
30it [00:37,  1.27s/it][A
31it [00:38,  1.29s/it][A
32it [00:40,  1.44s/it][A
33it [00:41,  1.34s/it][A
34it [00:42,  1.27s/it][A
35it [00:43,  1.22s/it][A
36it [00:44,  1.18s/it][A
37it [00:46,  

dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349', 'ABSTRACTdf_2015_12_G350'])
Creating Dataframe of articles for df_2015_12_G351



0it [00:00, ?it/s][A
1it [00:01,  1.31s/it][A
2it [00:02,  1.31s/it][A
3it [00:03,  1.30s/it][A
4it [00:05,  1.30s/it][A
5it [00:06,  1.23s/it][A
6it [00:07,  1.27s/it][A
7it [00:08,  1.28s/it][A
8it [00:10,  1.29s/it][A
9it [00:11,  1.24s/it][A
10it [00:12,  1.35s/it][A
11it [00:14,  1.33s/it][A
12it [00:15,  1.25s/it][A
13it [00:16,  1.26s/it][A
14it [00:17,  1.22s/it][A
15it [00:18,  1.18s/it][A
16it [00:20,  1.22s/it][A
17it [00:21,  1.18s/it][A
18it [00:22,  1.23s/it][A
19it [00:23,  1.24s/it][A
20it [00:24,  1.20s/it][A
21it [00:26,  1.16s/it][A
22it [00:27,  1.14s/it][A
23it [00:28,  1.12s/it][A
24it [00:29,  1.11s/it][A
25it [00:30,  1.21s/it][A
26it [00:31,  1.18s/it][A
27it [00:32,  1.15s/it][A
28it [00:34,  1.14s/it][A
29it [00:35,  1.23s/it][A
30it [00:36,  1.18s/it][A
31it [00:37,  1.21s/it][A
32it [00:38,  1.18s/it][A
33it [00:40,  1.15s/it][A
34it [00:41,  1.14s/it][A
35it [00:42,  1.18s/it][A
36it [00:43,  1.15s/it][A
37it [00:44,  

dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349', 'ABSTRACTdf_2015_12_G350', 'ABSTRACTdf_2015_12_G351'])
Creating Dataframe of articles for df_2015_12_G352



0it [00:00, ?it/s][A
1it [00:01,  1.26s/it][A
2it [00:02,  1.28s/it][A
3it [00:04,  1.41s/it][A
4it [00:05,  1.29s/it][A
5it [00:06,  1.29s/it][A
6it [00:07,  1.30s/it][A
7it [00:09,  1.31s/it][A
8it [00:10,  1.23s/it][A
9it [00:11,  1.21s/it][A
10it [00:12,  1.22s/it][A
11it [00:13,  1.19s/it][A
12it [00:14,  1.16s/it][A
13it [00:16,  1.21s/it][A
14it [00:17,  1.24s/it][A
15it [00:18,  1.26s/it][A
16it [00:19,  1.22s/it][A
17it [00:21,  1.18s/it][A
18it [00:22,  1.14s/it][A
19it [00:23,  1.13s/it][A
20it [00:24,  1.24s/it][A
21it [00:25,  1.20s/it][A
22it [00:27,  1.23s/it][A
23it [00:28,  1.18s/it][A
24it [00:29,  1.16s/it][A
25it [00:30,  1.14s/it][A
26it [00:31,  1.21s/it][A
27it [00:33,  1.24s/it][A
28it [00:34,  1.25s/it][A
29it [00:35,  1.25s/it][A
30it [00:36,  1.26s/it][A
31it [00:37,  1.21s/it][A
32it [00:39,  1.23s/it][A
33it [00:40,  1.26s/it][A
34it [00:41,  1.26s/it][A
35it [00:42,  1.21s/it][A
36it [00:44,  1.24s/it][A
37it [00:45,  

Error here in ID26622461



58it [01:12,  1.31s/it][A
59it [01:13,  1.24s/it][A
60it [01:14,  1.20s/it][A
61it [01:15,  1.23s/it][A
62it [01:17,  1.19s/it][A
63it [01:18,  1.21s/it][A
64it [01:19,  1.20s/it][A
65it [01:20,  1.17s/it][A
66it [01:21,  1.21s/it][A
67it [01:23,  1.26s/it][A
68it [01:24,  1.23s/it][A
69it [01:25,  1.20s/it][A
70it [01:26,  1.16s/it][A
71it [01:28,  1.29s/it][A
72it [01:29,  1.24s/it][A
73it [01:30,  1.20s/it][A
74it [01:31,  1.16s/it][A
75it [01:32,  1.20s/it][A
76it [01:34,  1.20s/it][A
77it [01:35,  1.19s/it][A
78it [01:36,  1.16s/it][A
79it [01:37,  1.20s/it][A
80it [01:38,  1.22s/it][A
81it [01:40,  1.20s/it][A
82it [01:41,  1.28s/it][A
83it [01:42,  1.28s/it][A
84it [01:43,  1.22s/it][A
85it [01:45,  1.25s/it][A
86it [01:46,  1.25s/it][A
87it [01:47,  1.27s/it][A
88it [01:49,  1.27s/it][A
89it [01:50,  1.30s/it][A
90it [01:51,  1.30s/it][A
91it [01:53,  1.31s/it][A
92it [01:54,  1.25s/it][A
93it [01:55,  1.19s/it][A
94it [01:56,  1.16s/it][A


dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349', 'ABSTRACTdf_2015_12_G350', 'ABSTRACTdf_2015_12_G351', 'ABSTRACTdf_2015_12_G352'])
Creating Dataframe of articles for df_2015_12_G353



0it [00:00, ?it/s][A
1it [00:01,  1.08s/it][A
2it [00:02,  1.22s/it][A
3it [00:03,  1.26s/it][A
4it [00:04,  1.20s/it][A
5it [00:06,  1.22s/it][A
6it [00:07,  1.19s/it][A
7it [00:08,  1.22s/it][A
8it [00:09,  1.19s/it][A
9it [00:10,  1.22s/it][A
10it [00:12,  1.25s/it][A
11it [00:13,  1.26s/it][A
12it [00:14,  1.27s/it][A
13it [00:15,  1.25s/it][A
14it [00:17,  1.22s/it][A
15it [00:18,  1.27s/it][A
16it [00:19,  1.22s/it][A
17it [00:21,  1.35s/it][A
18it [00:22,  1.38s/it][A
19it [00:23,  1.31s/it][A
20it [00:25,  1.36s/it][A
21it [00:26,  1.39s/it][A
22it [00:27,  1.31s/it][A
23it [00:29,  1.25s/it][A
24it [00:30,  1.20s/it][A
25it [00:31,  1.27s/it][A
26it [00:32,  1.29s/it][A
27it [00:34,  1.30s/it][A
28it [00:35,  1.30s/it][A
29it [00:36,  1.23s/it][A
30it [00:37,  1.19s/it][A
31it [00:38,  1.16s/it][A
32it [00:39,  1.14s/it][A
33it [00:40,  1.12s/it][A
34it [00:42,  1.17s/it][A
35it [00:43,  1.15s/it][A
36it [00:44,  1.19s/it][A
37it [00:46,  

dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349', 'ABSTRACTdf_2015_12_G350', 'ABSTRACTdf_2015_12_G351', 'ABSTRACTdf_2015_12_G352', 'ABSTRACTdf_2015_12_G353'])
Creating Dataframe of articles for df_2015_12_G354



0it [00:00, ?it/s][A
1it [00:01,  1.21s/it][A
2it [00:02,  1.25s/it][A
3it [00:03,  1.30s/it][A
4it [00:04,  1.22s/it][A
5it [00:06,  1.26s/it][A
6it [00:07,  1.26s/it][A
7it [00:08,  1.29s/it][A
8it [00:09,  1.23s/it][A
9it [00:11,  1.19s/it][A
10it [00:12,  1.22s/it][A
11it [00:14,  1.39s/it][A
12it [00:16,  1.60s/it][A
13it [00:17,  1.54s/it][A
14it [00:18,  1.45s/it][A
15it [00:20,  1.37s/it][A
16it [00:21,  1.28s/it][A
17it [00:22,  1.23s/it][A
18it [00:23,  1.25s/it][A
19it [00:24,  1.30s/it][A
20it [00:26,  1.24s/it][A
21it [00:27,  1.27s/it][A
22it [00:28,  1.32s/it][A
23it [00:29,  1.26s/it][A
24it [00:31,  1.21s/it][A
25it [00:32,  1.18s/it][A
26it [00:33,  1.22s/it][A
27it [00:34,  1.24s/it][A
28it [00:36,  1.28s/it][A
29it [00:37,  1.21s/it][A
30it [00:38,  1.18s/it][A
31it [00:39,  1.23s/it][A
32it [00:40,  1.24s/it][A
33it [00:42,  1.22s/it][A
34it [00:43,  1.18s/it][A
35it [00:44,  1.22s/it][A
36it [00:45,  1.24s/it][A
37it [00:46,  

dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349', 'ABSTRACTdf_2015_12_G350', 'ABSTRACTdf_2015_12_G351', 'ABSTRACTdf_2015_12_G352', 'ABSTRACTdf_2015_12_G353', 'ABSTRACTdf_2015_12_G354'])
Creating Dataframe of articles for df_2015_12_G355



0it [00:00, ?it/s][A
1it [00:01,  1.32s/it][A
2it [00:02,  1.17s/it][A
3it [00:03,  1.16s/it][A
4it [00:05,  1.30s/it][A
5it [00:06,  1.23s/it][A
6it [00:07,  1.19s/it][A
7it [00:08,  1.17s/it][A
8it [00:09,  1.30s/it][A
9it [00:12,  1.78s/it][A
10it [00:14,  1.62s/it][A
11it [00:15,  1.51s/it][A
12it [00:16,  1.51s/it][A
13it [00:18,  1.45s/it][A
14it [00:19,  1.35s/it][A
15it [00:20,  1.33s/it][A
16it [00:21,  1.31s/it][A
17it [00:23,  1.30s/it][A
18it [00:24,  1.24s/it][A
19it [00:25,  1.27s/it][A
20it [00:26,  1.22s/it][A
21it [00:27,  1.24s/it][A
22it [00:29,  1.27s/it][A
23it [00:30,  1.31s/it][A
24it [00:32,  1.34s/it][A
25it [00:33,  1.29s/it][A
26it [00:34,  1.23s/it][A
27it [00:35,  1.26s/it][A
28it [00:36,  1.26s/it][A
29it [00:38,  1.31s/it][A
30it [00:39,  1.25s/it][A
31it [00:40,  1.20s/it][A
32it [00:41,  1.17s/it][A
33it [00:43,  1.24s/it][A
34it [00:44,  1.26s/it][A
35it [00:46,  1.43s/it][A
36it [00:47,  1.33s/it][A
37it [00:48,  

dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349', 'ABSTRACTdf_2015_12_G350', 'ABSTRACTdf_2015_12_G351', 'ABSTRACTdf_2015_12_G352', 'ABSTRACTdf_2015_12_G353', 'ABSTRACTdf_2015_12_G354', 'ABSTRACTdf_2015_12_G355'])
Creating Dataframe of articles for df_2015_12_G356



0it [00:00, ?it/s][A
1it [00:01,  1.07s/it][A
2it [00:02,  1.21s/it][A
3it [00:03,  1.28s/it][A
4it [00:04,  1.21s/it][A
5it [00:05,  1.17s/it][A
6it [00:07,  1.16s/it][A
7it [00:08,  1.14s/it][A
8it [00:09,  1.19s/it][A
9it [00:10,  1.21s/it][A
10it [00:12,  1.25s/it][A
11it [00:13,  1.19s/it][A
12it [00:14,  1.16s/it][A
13it [00:15,  1.15s/it][A
14it [00:16,  1.13s/it][A
15it [00:17,  1.12s/it][A
16it [00:18,  1.16s/it][A
17it [00:20,  1.21s/it][A
18it [00:21,  1.24s/it][A
19it [00:22,  1.27s/it][A
20it [00:24,  1.28s/it][A
21it [00:25,  1.22s/it][A
22it [00:26,  1.25s/it][A
23it [00:27,  1.21s/it][A
24it [00:28,  1.18s/it][A
25it [00:30,  1.23s/it][A
26it [00:31,  1.19s/it][A
27it [00:32,  1.22s/it][A
28it [00:33,  1.25s/it][A
29it [00:34,  1.19s/it][A
30it [00:35,  1.19s/it][A
31it [00:37,  1.28s/it][A
32it [00:38,  1.23s/it][A
33it [00:39,  1.26s/it][A
34it [00:41,  1.28s/it][A
35it [00:42,  1.24s/it][A
36it [00:43,  1.23s/it][A
37it [00:44,  

dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349', 'ABSTRACTdf_2015_12_G350', 'ABSTRACTdf_2015_12_G351', 'ABSTRACTdf_2015_12_G352', 'ABSTRACTdf_2015_12_G353', 'ABSTRACTdf_2015_12_G354', 'ABSTRACTdf_2015_12_G355', 'ABSTRACTdf_2015_12_G356'])
Creating Dataframe of articles for df_2015_12_G357



0it [00:00, ?it/s][A
1it [00:01,  1.32s/it][A
2it [00:02,  1.29s/it][A
3it [00:03,  1.21s/it][A
4it [00:05,  1.28s/it][A
5it [00:06,  1.27s/it][A
6it [00:07,  1.28s/it][A
7it [00:08,  1.22s/it][A
8it [00:10,  1.26s/it][A
9it [00:11,  1.28s/it][A
10it [00:12,  1.35s/it][A
11it [00:14,  1.28s/it][A
12it [00:15,  1.35s/it][A
13it [00:19,  2.16s/it][A

Error here in ID26663507



14it [00:20,  1.92s/it][A
15it [00:22,  1.67s/it][A
16it [00:23,  1.52s/it][A
17it [00:24,  1.47s/it][A
18it [00:25,  1.35s/it][A
19it [00:26,  1.35s/it][A
20it [00:28,  1.34s/it][A
21it [00:29,  1.26s/it][A
22it [00:30,  1.30s/it][A
23it [00:32,  1.31s/it][A
24it [00:33,  1.24s/it][A
25it [00:34,  1.20s/it][A
26it [00:35,  1.18s/it][A
27it [00:36,  1.15s/it][A
28it [00:37,  1.19s/it][A
29it [00:38,  1.17s/it][A

Error here in ID26621888



30it [00:40,  1.26s/it][A
31it [00:41,  1.20s/it][A
32it [00:42,  1.18s/it][A
33it [00:43,  1.22s/it][A
34it [00:45,  1.26s/it][A
35it [00:46,  1.28s/it][A
36it [00:47,  1.23s/it][A
37it [00:48,  1.19s/it][A
38it [00:50,  1.22s/it][A
39it [00:51,  1.23s/it][A
40it [00:52,  1.19s/it][A
41it [00:53,  1.21s/it][A
42it [00:54,  1.24s/it][A
43it [00:56,  1.19s/it][A
44it [00:57,  1.18s/it][A
45it [00:58,  1.22s/it][A
46it [00:59,  1.24s/it][A
47it [01:00,  1.19s/it][A
48it [01:02,  1.31s/it][A
49it [01:03,  1.24s/it][A
50it [01:05,  1.59s/it][A
51it [01:07,  1.53s/it][A
52it [01:08,  1.46s/it][A
53it [01:10,  1.51s/it][A
54it [01:11,  1.39s/it][A
55it [01:12,  1.37s/it][A
56it [01:13,  1.29s/it][A
57it [01:15,  1.28s/it][A
58it [01:16,  1.26s/it][A
59it [01:17,  1.27s/it][A
60it [01:18,  1.22s/it][A
61it [01:20,  1.26s/it][A
62it [01:21,  1.27s/it][A
63it [01:22,  1.23s/it][A
64it [01:23,  1.19s/it][A
65it [01:24,  1.21s/it][A
66it [01:26,  1.24s/it][A


dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349', 'ABSTRACTdf_2015_12_G350', 'ABSTRACTdf_2015_12_G351', 'ABSTRACTdf_2015_12_G352', 'ABSTRACTdf_2015_12_G353', 'ABSTRACTdf_2015_12_G354', 'ABSTRACTdf_2015_12_G355', 'ABSTRACTdf_2015_12_G356', 'ABSTRACTdf_2015_12_G357'])
Creating Dataframe of articles for df_2015_12_G358



0it [00:00, ?it/s][A
1it [00:01,  1.16s/it][A
2it [00:02,  1.25s/it][A
3it [00:03,  1.18s/it][A
4it [00:04,  1.20s/it][A
5it [00:05,  1.17s/it][A
6it [00:07,  1.21s/it][A
7it [00:08,  1.37s/it][A
8it [00:09,  1.28s/it][A
9it [00:11,  1.22s/it][A
10it [00:12,  1.24s/it][A
11it [00:13,  1.18s/it][A
12it [00:14,  1.21s/it][A
13it [00:15,  1.19s/it][A
14it [00:17,  1.28s/it][A
15it [00:18,  1.32s/it][A
16it [00:20,  1.31s/it][A
17it [00:21,  1.24s/it][A
18it [00:22,  1.20s/it][A
19it [00:23,  1.23s/it][A
20it [00:24,  1.19s/it][A
21it [00:25,  1.16s/it][A
22it [00:27,  1.36s/it][A
23it [00:28,  1.35s/it][A
24it [00:30,  1.37s/it][A
25it [00:31,  1.30s/it][A
26it [00:32,  1.25s/it][A
27it [00:33,  1.30s/it][A
28it [00:35,  1.30s/it][A
29it [00:36,  1.24s/it][A
30it [00:38,  1.37s/it][A
31it [00:39,  1.28s/it][A
32it [00:40,  1.24s/it][A
33it [00:41,  1.26s/it][A
34it [00:42,  1.28s/it][A
35it [00:44,  1.31s/it][A
36it [00:45,  1.31s/it][A
37it [00:46,  

dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349', 'ABSTRACTdf_2015_12_G350', 'ABSTRACTdf_2015_12_G351', 'ABSTRACTdf_2015_12_G352', 'ABSTRACTdf_2015_12_G353', 'ABSTRACTdf_2015_12_G354', 'ABSTRACTdf_2015_12_G355', 'ABSTRACTdf_2015_12_G356', 'ABSTRACTdf_2015_12_G357', 'ABSTRACTdf_2015_12_G358'])
Creating Dataframe of articles for df_2015_12_G359



0it [00:00, ?it/s][A
1it [00:01,  1.31s/it][A
2it [00:02,  1.17s/it][A
3it [00:03,  1.17s/it][A
4it [00:04,  1.23s/it][A
5it [00:05,  1.19s/it][A
6it [00:07,  1.15s/it][A
7it [00:08,  1.21s/it][A
8it [00:09,  1.26s/it][A
9it [00:10,  1.20s/it][A
10it [00:11,  1.16s/it][A
11it [00:13,  1.16s/it][A
12it [00:14,  1.20s/it][A
13it [00:15,  1.28s/it][A
14it [00:17,  1.34s/it][A
15it [00:18,  1.27s/it][A
16it [00:19,  1.29s/it][A
17it [00:21,  1.37s/it][A
18it [00:22,  1.46s/it][A
19it [00:24,  1.49s/it][A
20it [00:26,  1.62s/it][A
21it [00:27,  1.53s/it][A
22it [00:28,  1.39s/it][A
23it [00:30,  1.35s/it][A
24it [00:31,  1.34s/it][A
25it [00:32,  1.32s/it][A
26it [00:33,  1.25s/it][A
27it [00:35,  1.35s/it][A
28it [00:36,  1.35s/it][A
29it [00:38,  1.34s/it][A
30it [00:39,  1.32s/it][A
31it [00:40,  1.26s/it][A
32it [00:41,  1.23s/it][A
33it [00:42,  1.26s/it][A
34it [00:44,  1.27s/it][A
35it [00:45,  1.33s/it][A
36it [00:46,  1.26s/it][A
37it [00:48,  

dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349', 'ABSTRACTdf_2015_12_G350', 'ABSTRACTdf_2015_12_G351', 'ABSTRACTdf_2015_12_G352', 'ABSTRACTdf_2015_12_G353', 'ABSTRACTdf_2015_12_G354', 'ABSTRACTdf_2015_12_G355', 'ABSTRACTdf_2015_12_G356', 'ABSTRACTdf_2015_12_G357', 'ABSTRACTdf_2015_12_G358', 'ABSTRACTdf_2015_12_G359'])
Creating Dataframe of articles for df_2015_12_G360



0it [00:00, ?it/s][A
1it [00:01,  1.46s/it][A
2it [00:02,  1.26s/it][A
3it [00:03,  1.27s/it][A
4it [00:05,  1.38s/it][A
5it [00:06,  1.35s/it][A
6it [00:08,  1.33s/it][A
7it [00:09,  1.29s/it][A
8it [00:10,  1.30s/it][A
9it [00:11,  1.26s/it][A
10it [00:12,  1.20s/it][A
11it [00:13,  1.16s/it][A
12it [00:15,  1.22s/it][A
13it [00:16,  1.25s/it][A
14it [00:17,  1.20s/it][A
15it [00:18,  1.24s/it][A
16it [00:20,  1.27s/it][A
17it [00:21,  1.22s/it][A
18it [00:22,  1.23s/it][A
19it [00:23,  1.20s/it][A
20it [00:25,  1.23s/it][A
21it [00:26,  1.26s/it][A
22it [00:27,  1.32s/it][A
23it [00:29,  1.38s/it][A
24it [00:30,  1.34s/it][A
25it [00:32,  1.42s/it][A
26it [00:33,  1.38s/it][A
27it [00:34,  1.28s/it][A
28it [00:35,  1.28s/it][A
29it [00:36,  1.22s/it][A
30it [00:38,  1.25s/it][A
31it [00:39,  1.27s/it][A
32it [00:41,  1.36s/it][A
33it [00:42,  1.31s/it][A
34it [00:43,  1.28s/it][A
35it [00:44,  1.30s/it][A
36it [00:46,  1.35s/it][A
37it [00:47,  

dict_keys(['ABSTRACTdf_2015_12_G348', 'ABSTRACTdf_2015_12_G349', 'ABSTRACTdf_2015_12_G350', 'ABSTRACTdf_2015_12_G351', 'ABSTRACTdf_2015_12_G352', 'ABSTRACTdf_2015_12_G353', 'ABSTRACTdf_2015_12_G354', 'ABSTRACTdf_2015_12_G355', 'ABSTRACTdf_2015_12_G356', 'ABSTRACTdf_2015_12_G357', 'ABSTRACTdf_2015_12_G358', 'ABSTRACTdf_2015_12_G359', 'ABSTRACTdf_2015_12_G360'])



