## Import Section

In [153]:
import requests
from bs4 import BeautifulSoup
import csv
import numpy as np
import pandas as pd

## Defining Scraping Function for ACS Nano:
2 steps:

1. extracting links to "abstract" page for each article of the corresponding ACS Nano issue
2. extracting title, authors, abstract, keywords, publication date and reference link from each article page

In [154]:
def ACS_NANO_scrape(journal_volume_counter, journal_number_counter):
    url_iterable = 'http://pubs.acs.org/toc/ancac3/'+str(journal_volume_counter)+'/'+str(journal_number_counter)
    r = requests.get(url_iterable)
    soup = BeautifulSoup(r.content, "html5lib")
    links = soup.find_all('a')
    link_list = []
    article_list = []
    for link in links:
        if 'Abstract' == link.text:
            #go to the specific article abstract page for pulling data
            full_link = "http://pubs.acs.org%s" %(link.get('href'))
            re = requests.get(full_link)
            sub_soup = BeautifulSoup(re.content, "html5lib")
            keywords_all = sub_soup.find("div", {'class': 'keywords'})
            if keywords_all != None:
                keywords = keywords_all.text.replace('Keywords: ', '')
                #pull the article title
                title = sub_soup.find("span", {'class': 'hlFld-Title'})
                #pull the article abstract
                abstract = sub_soup.find('p', {'class': 'articleBody_abstractText'})
                #create publication date from volume and number
                pub_date = str(journal_volume_counter+2006)+'-'+str(journal_number_counter)
                #pull author list into a single string
                authors = sub_soup.find_all("a", {'id': 'authors'})
                authors_str = ''
                for author in authors:
                    authors_str = authors_str + ', ' + author.text
                authors_str = authors_str[2:]
                #put all data together into list of list for handing over to pandas
                article_list.append([title.text, authors_str, abstract.text, keywords, pub_date, full_link])
    return article_list

# Scraping the desired journal information

ACS Nano is published on a monthly basis and counts in volume (publication year) and numbers (publication month). The loops iterate over desired ranges, put the meta-data into a Pandas DataFrame object with column names.
Further each issue of ACS Nano is be exported into a single .csv file

In [208]:
#initial empty DataFrame structure
master_df = pd.DataFrame()

for volume in range (8,9):
    for number in range (1,13):
        article_data = pd.DataFrame(ACS_NANO_scrape(volume,number), columns=['Title', 'Authors', 'Abstract', 'Keywords', 'PubDate', 'JournalLink'])
        master_df = master_df.append(article_data, ignore_index = True)
        exportname = 'ACS_Nano_Vol'+str(volume)+'_Num'+str(number)+'.csv'
        article_data.to_csv(exportname, index=False)
