-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #45 from tleedepriest/load_scopus_csv
Add ability load Scopus CSV files
- Loading branch information
Showing
5 changed files
with
162 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
""" | ||
support loading Scopus CSV export. | ||
""" | ||
from typing import List, Optional | ||
from ..types import Document, Author, DocumentSet, DocumentIdentifier, Affiliation | ||
from ..common import robust_open | ||
import csv | ||
|
||
|
||
class ScopusCsvAffiliation(Affiliation): | ||
def __init__(self, name): | ||
self._name = name | ||
|
||
@property | ||
def name(self): | ||
return self._name | ||
|
||
|
||
class ScopusCsvAuthor(Author): | ||
def __init__(self, name, affiliation): | ||
self._name = name | ||
self._affiliation = affiliation | ||
|
||
@property | ||
def name(self): | ||
return self._name | ||
|
||
@property | ||
def affiliations(self): | ||
return [ScopusCsvAffiliation(self._affiliation)] | ||
|
||
|
||
class ScopusCsvDocument(Document): | ||
def __init__(self, entry): | ||
doi = entry.get("DOI") | ||
title = entry.get("Title") | ||
pubmed_id = entry.get("PubMed ID") | ||
eid = entry.get("EID") | ||
identifier = DocumentIdentifier(title, doi=doi, pubmed=pubmed_id, eid=eid) | ||
super().__init__(identifier) | ||
self.entry = entry | ||
|
||
@property | ||
def title(self) -> Optional[str]: | ||
return self.entry.get("Title") or None | ||
|
||
@property | ||
def authors(self) -> List[ScopusCsvAuthor]: | ||
auths_affs = self.entry.get("Authors with affiliations") | ||
auths_id = self.entry.get("Author(s) ID", "") | ||
# author_last, first initial, affiliation; ..... | ||
if not auths_affs: | ||
return [] | ||
auths_affs = auths_affs.split("; ") | ||
auths = [", ".join(auth_aff.split(", ")[0:2]) for auth_aff in auths_affs] | ||
affs = [", ".join(auth_aff.split(", ")[2:]) for auth_aff in auths_affs] | ||
# try to add id to author name | ||
auths_id = auths_id.split(";")[:-1] # remove empty string last el | ||
if len(auths) == len(auths_id): | ||
auths = [f"{name} (ID: {auth_id})" for name, auth_id in zip(auths, auths_id)] | ||
return [ScopusCsvAuthor(a, b) for a, b in zip(auths, affs)] | ||
|
||
@property | ||
def publisher(self) -> Optional[str]: | ||
return self.entry.get("Publisher") or None | ||
|
||
@property | ||
def publication_year(self) -> Optional[int]: | ||
year = self.entry.get("Year") | ||
if not year: | ||
return None | ||
|
||
try: | ||
return int(year) | ||
except: | ||
return None | ||
|
||
@property | ||
def keywords(self) -> Optional[List[str]]: | ||
keywords = self.entry.get("Author Keywords") | ||
if not keywords: | ||
return None | ||
return keywords.split("; ") | ||
|
||
@property | ||
def abstract(self) -> Optional[str]: | ||
abstract = self.entry.get("Abstract") | ||
if not abstract: | ||
return None | ||
return abstract | ||
|
||
@property | ||
def citation_count(self) -> Optional[int]: | ||
citation_count = self.entry.get("Cited by") | ||
if not citation_count: | ||
return None | ||
return int(citation_count) | ||
|
||
@property | ||
def language(self) -> Optional[str]: | ||
return self.entry.get("Language of Original Document") or None | ||
|
||
@property | ||
def publication_source(self) -> Optional[str]: | ||
return self.entry.get("Source title") or None | ||
|
||
@property | ||
def source_type(self) -> Optional[str]: | ||
return self.entry.get("Document Type") or None | ||
|
||
|
||
def load_scopus_csv(path: str) -> DocumentSet: | ||
"""Import CSV file exported from Scopus""" | ||
with robust_open(path) as f: | ||
lines = csv.DictReader(f) | ||
docs = [ScopusCsvDocument(line) for line in lines] | ||
return DocumentSet(docs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from litstudy.sources.scopus_csv import load_scopus_csv | ||
import os | ||
|
||
|
||
def test_load_scopus_csv(): | ||
path = os.path.dirname(__file__) + "/resources/scopus.csv" | ||
docs = load_scopus_csv(path) | ||
for num, doc in enumerate(docs): | ||
title = doc.title | ||
doc_id_title = doc.id.title | ||
doc_id_doi = doc.id.doi | ||
doc_id_pubmed = doc.id.pubmed | ||
doc_id_scopus = doc.id.scopusid | ||
pub_year = doc.publication_year | ||
keywords = doc.keywords | ||
abstract = doc.abstract | ||
citation_count = doc.citation_count | ||
publication_source = doc.publication_source | ||
source_type = doc.source_type | ||
for author in doc.authors: | ||
author_name = author.name | ||
for aff in author.affiliations: | ||
affiliation = aff.name | ||
if num == 0: | ||
assert title == doc_id_title | ||
assert doc.title == "Scalable molecular dynamics with NAMD" | ||
assert doc.abstract.startswith("NAMD is a parallel molecular dynamics code") | ||
assert doc.publication_source == "Journal of Computational Chemistry" | ||
assert doc.language == "English" | ||
assert doc.publisher == "John Wiley and Sons Inc." | ||
assert doc.citation_count == 13169 | ||
assert doc.keywords == [ | ||
"Biomolecular simulation", | ||
"Molecular dynamics", | ||
"Parallel computing", | ||
] | ||
assert doc.publication_year == 2005 | ||
|
||
assert len(doc.authors) == 10 | ||
assert doc.authors[0].name == "Phillips, J.C. (ID: 57202138757)" |