In [3]:
import bs4, requests, re
from bibtexparser.bparser import BibTexParser

In [4]:
mode_layout = 'flat'
article_id = '1081750'
expformat = 'bibtex'

domain = 'https://dl.acm.org/'
article = 'citation.cfm?id={0}&'.format(article_id)
parametrs = 'preflayout={0}'.format(mode_layout)
url = domain + article + parametrs
# https://dl.acm.org/downformats.cfm?id=2492591&parent_id=&expformat=bibtex
url_download_bibtex = domain + 'downformats.cfm?id={0}&parent_id=&expformat={1}'.format(article_id, expformat)
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:45.0) Gecko/20100101 Firefox/45.0'}

print(url, url_download_bibtex, sep='\n')

https://dl.acm.org/citation.cfm?id=1081750&preflayout=flat
https://dl.acm.org/downformats.cfm?id=1081750&parent_id=&expformat=bibtex


In [5]:
response = requests.get(url=url, headers=headers)
soup = bs4.BeautifulSoup(response.text, 'lxml')

bibtex_file = open('bibtex.bib', 'w').write(requests.get(url=url_download_bibtex, headers=headers).text)
bibtex_file = open('bibtex.bib', 'r')
bibtex_dict = BibTexParser(interpolate_strings=False).parse_file(bibtex_file).entries

In [6]:
article_data = {}

article_data['id'] = article_id
article_data['url'] = url
article_data['title'] = bibtex_dict[0].get('title', None)
article_data['doi'] = bibtex_dict[0].get('doi', None) 
article_data['year'] = bibtex_dict[0].get('year', None)

article_data, bibtex_dict

({'doi': '10.1145/1095430.1081750',
  'id': '1081750',
  'title': 'CUTE: A Concolic Unit Testing Engine for C',
  'url': 'https://dl.acm.org/citation.cfm?id=1081750&preflayout=flat',
  'year': '2005'},
 [{'ENTRYTYPE': 'article',
   'ID': 'Sen:2005:CCU:1095430.1081750',
   'acmid': '1081750',
   'address': 'New York, NY, USA',
   'author': 'Sen, Koushik and Marinov, Darko and Agha, Gul',
   'doi': '10.1145/1095430.1081750',
   'issn': '0163-5948',
   'issue_date': 'September 2005',
   'journal': 'SIGSOFT Softw. Eng. Notes',
   'keywords': 'concolic testing, data structure testing, explicit path model-checking, random testing, testing C programs, unit testing',
   'month': BibDataStringExpression([BibDataString('sep')]),
   'number': '5',
   'numpages': '10',
   'pages': '263--272',
   'publisher': 'ACM',
   'title': 'CUTE: A Concolic Unit Testing Engine for C',
   'url': 'http://doi.acm.org/10.1145/1095430.1081750',
   'volume': '30',
   'year': '2005'}])

In [7]:
divmain = soup.find('div', id='divmain')

authors_tags = divmain.find_all('td', {'style': 'padding-right:3px;', 'valign': 'top',  'nowrap': 'nowrap'})
authors = []

for author_tag in authors_tags:
    name = author_tag.text
    authors.append({'name': name.strip(),
                    'url': domain + author_tag.find('a')['href']})
    
article_data['authors'] = authors
article_data

{'authors': [{'name': 'Koushik Sen',
   'url': 'https://dl.acm.org/author_page.cfm?id=81100399070&coll=DL&dl=ACM&trk=0'},
  {'name': 'Darko Marinov',
   'url': 'https://dl.acm.org/author_page.cfm?id=81100437653&coll=DL&dl=ACM&trk=0'},
  {'name': 'Gul Agha',
   'url': 'https://dl.acm.org/author_page.cfm?id=81100531086&coll=DL&dl=ACM&trk=0'}],
 'doi': '10.1145/1095430.1081750',
 'id': '1081750',
 'title': 'CUTE: A Concolic Unit Testing Engine for C',
 'url': 'https://dl.acm.org/citation.cfm?id=1081750&preflayout=flat',
 'year': '2005'}

In [8]:
layout = soup.find('div', {'class': 'layout'})
flatbody = layout.find('div', {'class': 'flatbody'})

abstract = flatbody.text.strip()
article_data['abstract'] = abstract

article_data

{'abstract': 'In unit testing, a program is decomposed into units which are collections of functions. A part of unit can be tested by generating inputs for a single entry function. The entry function may contain pointer arguments, in which case the inputs to the unit are memory graphs. The paper addresses the problem of automating unit testing with memory graphs as inputs. The approach used builds on previous work combining symbolic and concrete execution, and more specifically, using such a combination to generate test inputs to explore all feasible execution paths. The current work develops a method to represent and track constraints that capture the behavior of a symbolic execution of a unit with memory graphs as inputs. Moreover, an efficient constraint solver is proposed to facilitate incremental generation of such test inputs. Finally, CUTE, a tool implementing the method is described together with the results of applying CUTE to real-world examples of C code.',
 'authors': [{'na

In [9]:
td = soup.find('td', string='Conference')

if td:
    td = td.nextSibling.nextSibling
    venue = td.strong.text.strip()
    url_conference = td.a['href']
else:
    venue = None
    url_conference = 'None'

article_data['venue'] = {'name': venue,
                        'url': domain + url_conference}

article_data

{'abstract': 'In unit testing, a program is decomposed into units which are collections of functions. A part of unit can be tested by generating inputs for a single entry function. The entry function may contain pointer arguments, in which case the inputs to the unit are memory graphs. The paper addresses the problem of automating unit testing with memory graphs as inputs. The approach used builds on previous work combining symbolic and concrete execution, and more specifically, using such a combination to generate test inputs to explore all feasible execution paths. The current work develops a method to represent and track constraints that capture the behavior of a symbolic execution of a unit with memory graphs as inputs. Moreover, an efficient constraint solver is proposed to facilitate incremental generation of such test inputs. Finally, CUTE, a tool implementing the method is described together with the results of applying CUTE to real-world examples of C code.',
 'authors': [{'na

In [10]:
flatbody = soup.find_all('div', {'class': 'flatbody'})
cited_by = []
for a in flatbody[3].find_all('a'):
    cited_by.append(a['href'].split('=')[-1])
    
article_data['cited_by'] = cited_by

article_data

{'abstract': 'In unit testing, a program is decomposed into units which are collections of functions. A part of unit can be tested by generating inputs for a single entry function. The entry function may contain pointer arguments, in which case the inputs to the unit are memory graphs. The paper addresses the problem of automating unit testing with memory graphs as inputs. The approach used builds on previous work combining symbolic and concrete execution, and more specifically, using such a combination to generate test inputs to explore all feasible execution paths. The current work develops a method to represent and track constraints that capture the behavior of a symbolic execution of a unit with memory graphs as inputs. Moreover, an efficient constraint solver is proposed to facilitate incremental generation of such test inputs. Finally, CUTE, a tool implementing the method is described together with the results of applying CUTE to real-world examples of C code.',
 'authors': [{'na