In [11]:
import os
import sys

parent_dir = os.path.dirname(os.path.realpath("."))
sys.path.append(parent_dir)


## Example of execution
#### Within this document you will see examples of the executions of the pipeline and how to assess a pdf's (paper's) Bidirectionality and Unidirectionality

In [16]:
# Lets start by first obtaining the metadata of the doi
import sys

sys.path.append('../object_creator')

doi = "10.1016/j.compbiomed.2019.05.002"

# This doi can be used to create a metadata (fetched) obj.

from object_creator.doi_to_metadata import *

meta = doi_to_metadataObj(doi=doi)

#doi_to_metadataObj takes a doi, queries OpenAlex and creates a Metadata Obj: Title, doi, arxiv
print("here is the metadata that it extracts:")
print("-Title: " + meta.title)
print("-Doi: " +meta.doi)
print("-Arxiv: " +meta.arxiv)

here is the metadata that it extracts:
-Title: Association of genomic subtypes of lower-grade gliomas with shape features automatically extracted by a deep learning algorithm
-Doi: 10.1016/j.compbiomed.2019.05.002
-Arxiv: 1906.03720


In [17]:
#All objects can be converted to a dictionary as seen:
print("Example of object to dict function")
print(str(meta.to_dict()))

#But there is a function for each object that creates a dictionary to be converted to JSON
#Key being the doi
#Value being the obj to dictionary
print("\n")
print("Here is an example:")
dict = metadataObj_to_metadataDict(meta)
print(dict)


Example of object to dict function
{'title': 'Association of genomic subtypes of lower-grade gliomas with shape features automatically extracted by a deep learning algorithm', 'doi': '10.1016/j.compbiomed.2019.05.002', 'arxiv': '1906.03720'}


Here is an example:
{'10.1016/j.compbiomed.2019.05.002': {'title': 'Association of genomic subtypes of lower-grade gliomas with shape features automatically extracted by a deep learning algorithm', 'doi': '10.1016/j.compbiomed.2019.05.002', 'arxiv': '1906.03720'}}


Once the metadata has been obtained. You will need to download the pdf to which the doi pertains to.

We store this information as a DownloadedObj.

In [22]:
from object_creator.create_downloadedObj import * 

#We can take the previously created metadataObj to create the downloaded Obj

dwnldd = meta_to_dwnldd(metadataObj=meta,output_dir=".")

#This will download the pdf into a PDFs directory (will be made if it does not exist)

#The metadata is the same as metaObj although now has the file_name and the file_path, filename is a modification on the DOI

print(dwnldd.file_path)
print(dwnldd.file_name)


#This object also has the same .to_dict() function and another dictionary function to create a dict for JSON


./PDFs/10-DOT-1016_j-DOT-compbiomed-DOT-2019-DOT-05-DOT-002.pdf
10-DOT-1016_j-DOT-compbiomed-DOT-2019-DOT-05-DOT-002.pdf
