In [5]:
#%pip install pybliometrics
#%pip install habanero
#%pip install crossref_commons


import os
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref
from crossref_commons.iteration import iterate_publications_as_json
from crossref_commons.retrieval import get_publication_as_json
from pybliometrics.scopus.exception import Scopus429Error
import time
import networkx as nx

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def get_citations(doi):
    cited_by = []
    try:
        for pub in iterate_publications_as_json(cites=doi):
            cited_by.append(pub)
        return cited_by
    except Exception as e:
        print(f"Error getting citations for {doi}: {e}")
        return []

def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    authors = [author.get("given") + " " + author.get("family") for author in pub.get("author")] if pub.get("author") else []
    abstract = None  # Crossref often doesn't have abstracts
    return doi, title, year, authors, abstract

def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [au.given_name + " " + au.surname for au in ab.authors]
        abstract = ab.abstract
        return doi, title, year, authors, abstract
    except Scopus429Error:
        time.sleep(60)
        return get_paper_data_from_scopus(doi)
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None

def add_paper_to_network(G, paper_data):
    doi, title, year, authors, abstract = paper_data
    if doi not in G:
        G.add_node(doi, title=title, year=year, authors=authors, abstract=abstract)

def add_citation_to_network(G, citing_doi, cited_doi):
    if citing_doi not in G:
        paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
        if not paper_data[4]:
            paper_data = get_paper_data_from_scopus(citing_doi)
        add_paper_to_network(G, paper_data)
    G.add_edge(citing_doi, cited_doi)

def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"

    # Add initial paper to the network
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data[4]:  # if no abstract from Crossref, try Scopus
        paper_data = get_paper_data_from_scopus(initial_doi)
    add_paper_to_network(G, paper_data)

    # Add papers that cite the initial paper
    for pub in get_citations(initial_doi):
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data[4]:
            paper_data = get_paper_data_from_scopus(paper_data[0])
        add_paper_to_network(G, paper_data)
        add_citation_to_network(G, paper_data[0], initial_doi)

    # Add citations of those papers
    for node in list(G.nodes())[1:]:  # skipping the initial paper
        for pub in get_citations(node):
            paper_data = get_paper_data_from_crossref(pub)
            if not paper_data[4]:
                paper_data = get_paper_data_from_scopus(paper_data[0])
            add_paper_to_network(G, paper_data)
            add_citation_to_network(G, paper_data[0], node)

    # Export the network to .graphml format
    nx.write_graphml(G, "citation_network_El.graphml")

if __name__ == "__main__":
    main()


Error getting data for 10.1109/CVPR.2016.91 from Scopus: 'NoneType' object is not iterable


TypeError: cannot unpack non-iterable NoneType object

In [13]:
import os
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref
from crossref_commons.iteration import iterate_publications_as_json
from crossref_commons.retrieval import get_publication_as_json
from pybliometrics.scopus.exception import Scopus429Error
import time
import networkx as nx
import lxml

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def get_citations(doi):
    cited_by = []
    cr = Crossref()
    try:
        response = cr.works(query=f"doi:{doi}")
        if response.get("message") and response.get("message").get("items"):
            for item in response["message"]["items"]:
                cited_by.append(item)
        return cited_by
    except Exception as e:
        print(f"Error getting citations for {doi}: {e}")
        return []



def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    
    authors = []
    if pub.get("author"):
        for author in pub.get("author"):
            given = author.get("given") or ""
            family = author.get("family") or ""
            authors.append(given + " " + family)
    
    return doi, title, year, authors


def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [au.given_name + " " + au.surname for au in ab.authors]
        return doi, title, year, authors
    except Scopus429Error:
        time.sleep(60)
        return get_paper_data_from_scopus(doi)
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None

def add_paper_to_network(G, paper_data):
    doi, title, year, authors = paper_data
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    if doi and doi not in G:
        G.add_node(doi, title=title, year=year, authors=authors_str)



def add_citation_to_network(G, citing_doi, cited_doi):
    if citing_doi not in G:
        paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
        if not paper_data:
            paper_data = get_paper_data_from_scopus(citing_doi)
        add_paper_to_network(G, paper_data)
    G.add_edge(citing_doi, cited_doi)

def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"

    # Add initial paper to the network
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data:
        paper_data = get_paper_data_from_scopus(initial_doi)
    add_paper_to_network(G, paper_data)

    # Add papers that cite the initial paper
    for pub in get_citations(initial_doi):
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data:
            paper_data = get_paper_data_from_scopus(paper_data[0])
        add_paper_to_network(G, paper_data)
        add_citation_to_network(G, paper_data[0], initial_doi)

    # Add citations of those papers
    for node in list(G.nodes())[1:]:  # skipping the initial paper
        for pub in get_citations(node):
            paper_data = get_paper_data_from_crossref(pub)
            if not paper_data:
                paper_data = get_paper_data_from_scopus(paper_data[0])
            add_paper_to_network(G, paper_data)
            add_citation_to_network(G, paper_data[0], node)

    # Export the network to .graphml format
    nx.write_graphml(G, "citation_network_El.graphml")

if __name__ == "__main__":
    main()


In [14]:
import os
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref
from crossref_commons.iteration import iterate_publications_as_json
from crossref_commons.retrieval import get_publication_as_json
from pybliometrics.scopus.exception import Scopus429Error
import time
import networkx as nx
import lxml

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def get_citations(doi):
    cited_by = []
    cr = Crossref()
    try:
        response = cr.works(query=f"doi:{doi}")
        if response.get("message") and response.get("message").get("items"):
            for item in response["message"]["items"]:
                cited_by.append(item)
        return cited_by
    except Exception as e:
        print(f"Error getting citations for {doi}: {e}")
        return []



def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    
    authors = []
    if pub.get("author"):
        for author in pub.get("author"):
            given = author.get("given") or ""
            family = author.get("family") or ""
            authors.append(given + " " + family)
    
    return doi, title, year, authors


def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [au.given_name + " " + au.surname for au in ab.authors]
        return doi, title, year, authors
    except Scopus429Error:
        time.sleep(60)
        return get_paper_data_from_scopus(doi)
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None

def add_paper_to_network(G, paper_data):
    doi, title, year, authors = paper_data
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    if doi and doi not in G:
        G.add_node(doi, title=title, year=year, authors=authors_str)



def add_citation_to_network(G, citing_doi, cited_doi):
    if citing_doi not in G:
        paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
        if not paper_data:
            paper_data = get_paper_data_from_scopus(citing_doi)
        add_paper_to_network(G, paper_data)
    G.add_edge(citing_doi, cited_doi)

# ... [previous imports and functions]

def fetch_and_add_to_network(G, doi, depth=2):
    if depth <= 0:
        return

    # Introduce a delay to avoid hitting rate limits
    time.sleep(2)

    # Fetch citations for the given DOI
    citations = get_citations(doi)
    for pub in citations:
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data:
            paper_data = get_paper_data_from_scopus(paper_data[0])
        add_paper_to_network(G, paper_data)
        add_citation_to_network(G, paper_data[0], doi)

        # Recursively fetch citations for the new paper
        fetch_and_add_to_network(G, paper_data[0], depth=depth-1)

def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"

    # Add initial paper to the network
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data:
        paper_data = get_paper_data_from_scopus(initial_doi)
    add_paper_to_network(G, paper_data)

    # Fetch citations recursively for the initial paper
    fetch_and_add_to_network(G, initial_doi, depth=2)

    # Export the network to .graphml format
    nx.write_graphml(G, "citation_network_El.graphml")

if __name__ == "__main__":
    main()


In [16]:
import os
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref
from crossref_commons.iteration import iterate_publications_as_json
from crossref_commons.retrieval import get_publication_as_json
from pybliometrics.scopus.exception import Scopus429Error
import time
import networkx as nx
import lxml

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def get_citations(doi):
    cited_by = []
    cr = Crossref()
    offset = 200
    items_per_page = 100  # Adjust as needed

    while True:
        try:
            response = cr.works(query=f"doi:{doi}", offset=offset, limit=items_per_page)
            if response.get("message") and response.get("message").get("items"):
                items = response["message"]["items"]
                cited_by.extend(items)

                # If less items than expected are returned, it's the last page
                if len(items) < items_per_page:
                    break

                offset += items_per_page
            else:
                break

        except Exception as e:
            print(f"Error getting citations for {doi} at offset {offset}: {e}")
            break

    return cited_by




def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    
    authors = []
    if pub.get("author"):
        for author in pub.get("author"):
            given = author.get("given") or ""
            family = author.get("family") or ""
            authors.append(given + " " + family)
    
    return doi, title, year, authors


def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [au.given_name + " " + au.surname for au in ab.authors]
        return doi, title, year, authors
    except Scopus429Error:
        time.sleep(60)
        return get_paper_data_from_scopus(doi)
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None

def add_paper_to_network(G, paper_data):
    doi, title, year, authors = paper_data
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    if doi and doi not in G:
        G.add_node(doi, title=title, year=year, authors=authors_str)



def add_citation_to_network(G, citing_doi, cited_doi):
    if citing_doi not in G:
        paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
        if not paper_data:
            paper_data = get_paper_data_from_scopus(citing_doi)
        add_paper_to_network(G, paper_data)
    G.add_edge(citing_doi, cited_doi)

# ... [previous imports and functions]

def fetch_and_add_to_network(G, doi, depth=1):
    if depth <= 0:
        return

    # Introduce a delay to avoid hitting rate limits
    time.sleep(2)

    # Fetch citations for the given DOI
    citations = get_citations(doi)
    for pub in citations:
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data:
            paper_data = get_paper_data_from_scopus(paper_data[0])
        add_paper_to_network(G, paper_data)
        add_citation_to_network(G, paper_data[0], doi)

        # Recursively fetch citations for the new paper
        fetch_and_add_to_network(G, paper_data[0], depth=depth-1)

def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"

    # Add initial paper to the network
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data:
        paper_data = get_paper_data_from_scopus(initial_doi)
    add_paper_to_network(G, paper_data)

    # Fetch citations recursively for the initial paper
    fetch_and_add_to_network(G, initial_doi, depth=1)

    # Export the network to .graphml format
    nx.write_graphml(G, "citation_network_El.graphml")

if __name__ == "__main__":
    main()


Error getting citations for 10.1109/CVPR.2016.91 at offset 10000: 400 Client Error: Bad Request for url: https://api.crossref.org/works?query=doi%3A10.1109%2FCVPR.2016.91&offset=10000&rows=100


In [18]:
import os
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref
from crossref_commons.iteration import iterate_publications_as_json
from crossref_commons.retrieval import get_publication_as_json
from pybliometrics.scopus.exception import Scopus429Error
import time
import networkx as nx
import lxml

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def get_citations(doi):
    cited_by = []
    cr = Crossref()
    offset = 0
    items_per_page = 50  # Set limit to 50

    try:
        response = cr.works(query=f"doi:{doi}", offset=offset, limit=items_per_page)
        if response.get("message") and response.get("message").get("items"):
            items = response["message"]["items"]
            cited_by.extend(items[:50])  # Ensure only 50 items are added

    except Exception as e:
        print(f"Error getting citations for {doi} at offset {offset}: {e}")

    return cited_by





def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    
    authors = []
    if pub.get("author"):
        for author in pub.get("author"):
            given = author.get("given") or ""
            family = author.get("family") or ""
            authors.append(given + " " + family)
    
    return doi, title, year, authors


def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [au.given_name + " " + au.surname for au in ab.authors]
        return doi, title, year, authors
    except Scopus429Error:
        time.sleep(60)
        return get_paper_data_from_scopus(doi)
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None

def add_paper_to_network(G, paper_data):
    doi, title, year, authors = paper_data
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    if doi and doi not in G:
        G.add_node(doi, title=title, year=year, authors=authors_str)



def add_citation_to_network(G, citing_doi, cited_doi):
    if citing_doi not in G:
        paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
        if not paper_data:
            paper_data = get_paper_data_from_scopus(citing_doi)
        add_paper_to_network(G, paper_data)
    G.add_edge(citing_doi, cited_doi)

# ... [previous imports and functions]

def fetch_and_add_to_network(G, doi, depth=1):
    if depth <= 0:
        return

    # Introduce a delay to avoid hitting rate limits
    time.sleep(2)

    # Fetch citations for the given DOI
    citations = get_citations(doi)
    for pub in citations:
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data:
            paper_data = get_paper_data_from_scopus(paper_data[0])
        add_paper_to_network(G, paper_data)
        add_citation_to_network(G, paper_data[0], doi)

        # Recursively fetch citations for the new paper
        fetch_and_add_to_network(G, paper_data[0], depth=depth-1)

def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"

    # Add initial paper to the network
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data:
        paper_data = get_paper_data_from_scopus(initial_doi)
    add_paper_to_network(G, paper_data)

    # Fetch citations recursively for the initial paper
    fetch_and_add_to_network(G, initial_doi, depth=2)

    # Export the network to .graphml format
    nx.write_graphml(G, "citation_network_El.graphml")

if __name__ == "__main__":
    main()


In [19]:
import os
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref
from crossref_commons.iteration import iterate_publications_as_json
from crossref_commons.retrieval import get_publication_as_json
from pybliometrics.scopus.exception import Scopus429Error
import time
import networkx as nx
import lxml

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

# ... [previous imports]

def valid_publication(pub):
    """Check if the publication meets the given requirements."""
    # Check publication year
    year = pub.get("created").get("date-parts")[0][0]
    if year < 2016:
        return False

    # Check if the title has at least 5 words
    title = pub.get("title")[0] if pub.get("title") else ""
    if len(title.split()) < 5:
        return False

    # Check if authors are present
    if not pub.get("author"):
        return False

    return True

def get_citations(doi):
    cited_by = []
    cr = Crossref()
    offset = 0
    items_per_page = 1000  # Requesting a larger number to filter out and retain 50 valid ones

    try:
        while len(cited_by) < 50:  # Keep fetching until we have 50 valid citations
            response = cr.works(query=f"doi:{doi}", offset=offset, limit=items_per_page)
            if response.get("message") and response.get("message").get("items"):
                items = response["message"]["items"]
                for item in items:
                    if valid_publication(item):
                        cited_by.append(item)
                        if len(cited_by) >= 50:
                            break
            offset += items_per_page

    except Exception as e:
        print(f"Error getting citations for {doi} at offset {offset}: {e}")

    return cited_by[:50]  # Ensure only 50 items are returned

# ... [rest of the code remains the same]

if __name__ == "__main__":
    main()






def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    
    authors = []
    if pub.get("author"):
        for author in pub.get("author"):
            given = author.get("given") or ""
            family = author.get("family") or ""
            authors.append(given + " " + family)
    
    return doi, title, year, authors


def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [au.given_name + " " + au.surname for au in ab.authors]
        return doi, title, year, authors
    except Scopus429Error:
        time.sleep(60)
        return get_paper_data_from_scopus(doi)
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None

def add_paper_to_network(G, paper_data):
    doi, title, year, authors = paper_data
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    if doi and doi not in G:
        G.add_node(doi, title=title, year=year, authors=authors_str)



def add_citation_to_network(G, citing_doi, cited_doi):
    if citing_doi not in G:
        paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
        if not paper_data:
            paper_data = get_paper_data_from_scopus(citing_doi)
        add_paper_to_network(G, paper_data)
    G.add_edge(citing_doi, cited_doi)

# ... [previous imports and functions]

def fetch_and_add_to_network(G, doi, depth=1):
    if depth <= 0:
        return

    # Introduce a delay to avoid hitting rate limits
    time.sleep(2)

    # Fetch citations for the given DOI
    citations = get_citations(doi)
    for pub in citations:
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data:
            paper_data = get_paper_data_from_scopus(paper_data[0])
        add_paper_to_network(G, paper_data)
        add_citation_to_network(G, paper_data[0], doi)

        # Recursively fetch citations for the new paper
        fetch_and_add_to_network(G, paper_data[0], depth=depth-1)

def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"

    # Add initial paper to the network
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data:
        paper_data = get_paper_data_from_scopus(initial_doi)
    add_paper_to_network(G, paper_data)

    # Fetch citations recursively for the initial paper
    fetch_and_add_to_network(G, initial_doi, depth=2)

    # Export the network to .graphml format
    nx.write_graphml(G, "citation_network_El.graphml")

if __name__ == "__main__":
    main()

In [21]:
import os
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref
from crossref_commons.iteration import iterate_publications_as_json
from crossref_commons.retrieval import get_publication_as_json
from pybliometrics.scopus.exception import Scopus429Error
import time
import networkx as nx

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def valid_publication(pub):
    year = pub.get("created").get("date-parts")[0][0]
    if year < 2016:
        return False

    title = pub.get("title")[0] if pub.get("title") else ""
    if len(title.split()) < 5:
        return False

    if not pub.get("author"):
        return False

    return True

def get_citations(doi):
    cited_by = []
    cr = Crossref()
    offset = 0
    items_per_page = 1000

    try:
        while len(cited_by) < 50:
            response = cr.works(query=f"doi:{doi}", offset=offset, limit=items_per_page)
            if response.get("message") and response.get("message").get("items"):
                items = response["message"]["items"]
                for item in items:
                    if valid_publication(item):
                        paper_data = get_paper_data_from_crossref(item)
                        if not paper_data[-1]:  # Check if abstract is None
                            paper_data = get_paper_data_from_scopus(paper_data[0])
                        if paper_data[-1]:  # Check if abstract is not None after Scopus fetch
                            cited_by.append(item)
                            if len(cited_by) >= 50:
                                break
            offset += items_per_page

    except Exception as e:
        print(f"Error getting citations for {doi} at offset {offset}: {e}")

    return cited_by[:50]

def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    authors = []
    if pub.get("author"):
        for author in pub.get("author"):
            given = author.get("given") or ""
            family = author.get("family") or ""
            authors.append(given + " " + family)
    abstract = None
    return doi, title, year, authors, abstract

def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [au.given_name + " " + au.surname for au in ab.authors]
        abstract = ab.abstract if hasattr(ab, 'abstract') else None
        return doi, title, year, authors, abstract
    except Scopus429Error:
        time.sleep(60)
        return get_paper_data_from_scopus(doi)
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None

def add_paper_to_network(G, paper_data):
    doi, title, year, authors, abstract = paper_data
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    abstract = abstract if abstract else "No Abstract"
    if doi and doi not in G:
        G.add_node(doi, title=title, year=year, authors=authors_str, abstract=abstract)

def add_citation_to_network(G, citing_doi, cited_doi):
    if citing_doi not in G:
        paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
        if not paper_data[-1]:  # Check if abstract is None
            paper_data = get_paper_data_from_scopus(citing_doi)
        if paper_data[-1]:  # Check if abstract is not None after Scopus fetch
            add_paper_to_network(G, paper_data)
            G.add_edge(citing_doi, cited_doi)

def fetch_and_add_to_network(G, doi, depth=1):
    if depth <= 0:
        return

    time.sleep(2)

    citations = get_citations(doi)
    for pub in citations:
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data[-1]:  # Check if abstract is None
            paper_data = get_paper_data_from_scopus(paper_data[0])
        if paper_data[-1]:  # Check if abstract is not None after Scopus fetch
            add_paper_to_network(G, paper_data)
            add_citation_to_network(G, paper_data[0], doi)

            fetch_and_add_to_network(G, paper_data[0], depth=depth-1)

def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"

    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data or not paper_data[-1]:  # Check if data is None or abstract is None
        paper_data = get_paper_data_from_scopus(initial_doi)
    if paper_data and paper_data[-1]:  # Check if data is not None and abstract is not None after Scopus fetch
        add_paper_to_network(G, paper_data)

    fetch_and_add_to_network(G, initial_doi, depth=2)

    nx.write_graphml(G, "citation_network_El_Abs.graphml")

if __name__ == "__main__":
    main()


Error getting data for 10.1109/CVPR.2016.91 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1007/s00115-016-0148-3 from Scopus: 'NoneType' object is not iterable
Error getting citations for 10.1109/CVPR.2016.91 at offset 0: 'NoneType' object is not subscriptable


In [24]:
import os
import requests
import time
import networkx as nx
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_abstract_from_semanticscholar(doi):
    url = f"https://api.semanticscholar.org/v1/paper/{doi}"
    response = requests.get(url)
    data = response.json()
    return data.get('abstract')

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def valid_publication(pub):
    """Check if the publication meets the given requirements."""
    year = pub.get("created").get("date-parts")[0][0]
    if year < 2016:
        return False

    title = pub.get("title")[0] if pub.get("title") else ""
    if len(title.split()) < 5:
        return False

    if not pub.get("author"):
        return False

    return True

def get_citations(doi):
    cited_by = []
    cr = Crossref()
    offset = 0
    items_per_page = 1000

    while len(cited_by) < 50:
        response = cr.works(query=f"doi:{doi}", offset=offset, limit=items_per_page)
        if response.get("message") and response.get("message").get("items"):
            items = response["message"]["items"]
            for item in items:
                if valid_publication(item):
                    cited_by.append(item)
                    if len(cited_by) >= 50:
                        break
        offset += items_per_page

    return cited_by[:50]

def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    authors = [f"{author.get('given', '')} {author.get('family', '')}" for author in pub.get("author", [])]
    abstract = get_abstract_from_semanticscholar(doi)
    return doi, title, year, authors, abstract

def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [f"{au.given_name} {au.surname}" for au in ab.authors]
        abstract = get_abstract_from_semanticscholar(doi)
        return doi, title, year, authors, abstract
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None, None, None, None, None  # Return tuple with None values


def add_paper_to_network(G, paper_data):
    doi, title, year, authors, abstract = paper_data
    if not abstract:  # Check if abstract is available
        return
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    G.add_node(doi, title=title, year=year, authors=authors_str, abstract=abstract)

def add_citation_to_network(G, citing_doi, cited_doi):
    paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(citing_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)
        G.add_edge(citing_doi, cited_doi)

def fetch_and_add_to_network(G, doi, depth=1):
    if depth <= 0:
        return

    time.sleep(2)  # Introduce a delay to avoid hitting rate limits

    citations = get_citations(doi)
    for pub in citations:
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data[-1]:  # Check if abstract is None
            paper_data = get_paper_data_from_scopus(paper_data[0])
        if paper_data and paper_data[-1]:  # Check if paper_data and abstract are not None
            add_paper_to_network(G, paper_data)
            add_citation_to_network(G, paper_data[0], doi)
            fetch_and_add_to_network(G, paper_data[0], depth=depth-1)

def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(initial_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)
        fetch_and_add_to_network(G, initial_doi, depth=1)
        nx.write_graphml(G, "citation_network_El_Abs.graphml")

if __name__ == "__main__":
    main()


Error getting data for 10.1007/s00115-016-0148-3 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1016/j.hjc.2016.03.002 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1007/s11606-016-3822-z from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1016/j.jiac.2016.07.010 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1016/j.surg.2020.08.011 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1149/ma2016-02/9/1109 from Scopus: The resource specified cannot be found.
Error getting data for 10.1016/j.medengphy.2016.07.012 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1016/s1473-3099(16)30338-3 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.5194/os-2016-41-rc1 from Scopus: The resource specified cannot be found.
Error getting data for 10.1016/j.ijbiomac.2016.06.076 from Scopus: 'NoneType' object is not iterable
Error getting d

In [25]:
import os
import requests
import time
import networkx as nx
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_abstract_from_semanticscholar(doi):
    url = f"https://api.semanticscholar.org/v1/paper/{doi}"
    response = requests.get(url)
    data = response.json()
    return data.get('abstract')

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def valid_publication(pub):
    """Check if the publication meets the given requirements."""
    year = pub.get("created").get("date-parts")[0][0]
    if year < 2016:
        return False

    title = pub.get("title")[0] if pub.get("title") else ""
    if len(title.split()) < 5:
        return False

    if not pub.get("author"):
        return False

    return True

def get_citations(doi):
    cited_by = []
    cr = Crossref()
    offset = 0
    items_per_page = 1000

    while len(cited_by) < 50:
        response = cr.works(query=f"doi:{doi}", offset=offset, limit=items_per_page)
        if response.get("message") and response.get("message").get("items"):
            items = response["message"]["items"]
            for item in items:
                if valid_publication(item):
                    cited_by.append(item)
                    if len(cited_by) >= 50:
                        break
        offset += items_per_page

    return cited_by[:50]

def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    authors = [f"{author.get('given', '')} {author.get('family', '')}" for author in pub.get("author", [])]
    abstract = get_abstract_from_semanticscholar(doi)
    return doi, title, year, authors, abstract

def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [f"{au.given_name} {au.surname}" for au in ab.authors]
        abstract = get_abstract_from_semanticscholar(doi)
        return doi, title, year, authors, abstract
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None, None, None, None, None  # Return tuple with None values


def add_paper_to_network(G, paper_data):
    doi, title, year, authors, abstract = paper_data
    if not abstract:  # Check if abstract is available
        return
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    G.add_node(doi, title=title, year=year, authors=authors_str, abstract=abstract)

def add_citation_to_network(G, citing_doi, cited_doi):
    paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(citing_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)
        G.add_edge(citing_doi, cited_doi)

def fetch_and_add_to_network(G, doi, depth=1, max_papers=None):
    if depth <= 0 or (max_papers is not None and len(G) >= max_papers):
        return

    time.sleep(2)  # Introduce a delay to avoid hitting rate limits

    citations = get_citations(doi)
    for pub in citations:
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data[-1]:  # Check if abstract is None
            paper_data = get_paper_data_from_scopus(paper_data[0])
        if paper_data and paper_data[-1]:  # Check if paper_data and abstract are not None
            add_paper_to_network(G, paper_data)
            add_citation_to_network(G, paper_data[0], doi)
            fetch_and_add_to_network(G, paper_data[0], depth=depth-1, max_papers=max_papers)


def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(initial_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)

        # Fetch for different depths
        fetch_and_add_to_network(G, initial_doi, depth=1, max_papers=200)
        fetch_and_add_to_network(G, initial_doi, depth=2, max_papers=210)  # 200 from depth 1 + 10 for depth 2
        fetch_and_add_to_network(G, initial_doi, depth=3, max_papers=213)  # 210 from previous depths + 3 for depth 3
        
        nx.write_graphml(G, "citation_network_El_Abs_Depth.graphml")

if __name__ == "__main__":
    main()

Error getting data for 10.1007/s00115-016-0148-3 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1016/j.hjc.2016.03.002 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1007/s11606-016-3822-z from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1016/j.jiac.2016.07.010 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1016/s1473-3099(16)30338-3 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1016/j.surg.2020.08.011 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1149/ma2016-02/9/1109 from Scopus: The resource specified cannot be found.
Error getting data for 10.1016/j.medengphy.2016.07.012 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.5194/os-2016-41-rc1 from Scopus: The resource specified cannot be found.
Error getting data for 10.1016/j.ijbiomac.2016.06.076 from Scopus: 'NoneType' object is not iterable
Error getting d

In [26]:
import os
import requests
import time
import networkx as nx
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_abstract_from_semanticscholar(doi):
    url = f"https://api.semanticscholar.org/v1/paper/{doi}"
    response = requests.get(url)
    data = response.json()
    return data.get('abstract')

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def valid_publication(pub):
    """Check if the publication meets the given requirements."""
    year = pub.get("created").get("date-parts")[0][0]
    if year < 2016:
        return False

    title = pub.get("title")[0] if pub.get("title") else ""
    if len(title.split()) < 5:
        return False

    if not pub.get("author"):
        return False

    return True

def get_citations(doi, limit=50):
    cited_by = []
    cr = Crossref()
    offset = 0
    items_per_page = 1000

    while len(cited_by) < limit:
        response = cr.works(query=f"doi:{doi}", offset=offset, limit=items_per_page)
        if response.get("message") and response.get("message").get("items"):
            items = response["message"]["items"]
            for item in items:
                if valid_publication(item):
                    cited_by.append(item)
                    if len(cited_by) >= limit:
                        break
        offset += items_per_page

    return cited_by[:limit]

def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    authors = [f"{author.get('given', '')} {author.get('family', '')}" for author in pub.get("author", [])]
    abstract = get_abstract_from_semanticscholar(doi)
    return doi, title, year, authors, abstract

def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [f"{au.given_name} {au.surname}" for au in ab.authors]
        abstract = get_abstract_from_semanticscholar(doi)
        return doi, title, year, authors, abstract
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None, None, None, None, None  # Return tuple with None values


def add_paper_to_network(G, paper_data):
    doi, title, year, authors, abstract = paper_data
    if not abstract:  # Check if abstract is available
        return
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    G.add_node(doi, title=title, year=year, authors=authors_str, abstract=abstract)

def add_citation_to_network(G, citing_doi, cited_doi):
    paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(citing_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)
        G.add_edge(citing_doi, cited_doi)

def fetch_and_add_to_network(G, doi, depth=1, limit=None):
    if depth <= 0 or (limit is not None and len(G) >= limit):
        return

    time.sleep(2)  # Introduce a delay to avoid hitting rate limits

    if depth == 1:
        limit = 50
    elif depth == 2:
        limit = 7
    elif depth == 3:
        limit = 3

    citations = get_citations(doi, limit=limit)
    for pub in citations:
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data[-1]:  # Check if abstract is None
            paper_data = get_paper_data_from_scopus(paper_data[0])
        if paper_data and paper_data[-1]:  # Check if paper_data and abstract are not None
            add_paper_to_network(G, paper_data)
            add_citation_to_network(G, paper_data[0], doi)
            fetch_and_add_to_network(G, paper_data[0], depth=depth-1)


def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(initial_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)
        fetch_and_add_to_network(G, initial_doi, depth=3)
        nx.write_graphml(G, "citation_network_El_Abs_Depth_Again.graphml")

if __name__ == "__main__":
    main()

Error getting data for 10.1007/s00115-016-0148-3 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1016/j.hjc.2016.03.002 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1007/s00115-016-0148-3 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.17660/actahortic.1994.377.45 from Scopus: The resource specified cannot be found.
Error getting data for 10.1007/s00115-016-0148-3 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.17660/actahortic.1994.377.45 from Scopus: The resource specified cannot be found.
Error getting data for 10.47749/t/unicamp.1986.17660 from Scopus: The resource specified cannot be found.
Error getting data for 10.1007/s11606-016-3822-z from Scopus: 'NoneType' object is not iterable
Error getting data for 10.5962/bhl.title.147343 from Scopus: The resource specified cannot be found.
Error getting data for 10.1016/j.jiac.2016.07.010 from Scopus: 'NoneType' object is not iterable


In [29]:
import os
import requests
import time
import networkx as nx
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_abstract_from_semanticscholar(doi):
    url = f"https://api.semanticscholar.org/v1/paper/{doi}"
    response = requests.get(url)
    data = response.json()
    return data.get('abstract')

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def valid_publication(pub):
    """Check if the publication meets the given requirements."""
    year = pub.get("created").get("date-parts")[0][0]
    if year < 2016:
        return False

    title = pub.get("title")[0] if pub.get("title") else ""
    if len(title.split()) < 5:
        return False

    if not pub.get("author"):
        return False

    return True

def get_citations(doi, limit=50):
    cited_by = []
    cr = Crossref()
    offset = 0
    items_per_page = 1000

    while len(cited_by) < limit:
        response = cr.works(query=f"doi:{doi}", offset=offset, limit=items_per_page)
        if response.get("message") and response.get("message").get("items"):
            items = response["message"]["items"]
            for item in items:
                if valid_publication(item):
                    cited_by.append(item)
                    if len(cited_by) >= limit:
                        break
        offset += items_per_page

    return cited_by[:limit]

def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    authors = [f"{author.get('given', '')} {author.get('family', '')}" for author in pub.get("author", [])]
    abstract = get_abstract_from_semanticscholar(doi)
    return doi, title, year, authors, abstract

def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [f"{au.given_name} {au.surname}" for au in ab.authors]
        abstract = get_abstract_from_semanticscholar(doi)
        return doi, title, year, authors, abstract
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None, None, None, None, None  # Return tuple with None values


def add_paper_to_network(G, paper_data):
    doi, title, year, authors, abstract = paper_data
    if not abstract:  # Check if abstract is available
        return
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    G.add_node(doi, title=title, year=year, authors=authors_str, abstract=abstract)

def add_citation_to_network(G, citing_doi, cited_doi):
    paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(citing_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)
        G.add_edge(citing_doi, cited_doi)

def fetch_and_add_to_network(G, doi, depth=1):
    if depth <= 0:
        return

    time.sleep(2)  # Introduce a delay to avoid hitting rate limits

    # Set the limit based on the depth
    if depth == 1:
        limit = 100
    elif depth == 2:
        limit = 50
    elif depth == 3:
        limit = 3

    citations = get_citations(doi, limit=limit)
    for pub in citations:
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data[-1]:  # Check if abstract is None
            paper_data = get_paper_data_from_scopus(paper_data[0])
        if paper_data and paper_data[-1]:  # Check if paper_data and abstract are not None
            add_paper_to_network(G, paper_data)
            # Connect the citing paper (current one) with the cited one (previous in the loop)
            G.add_edge(paper_data[0], doi)
            fetch_and_add_to_network(G, paper_data[0], depth=depth-1)


def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(initial_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)
        fetch_and_add_to_network(G, initial_doi, depth=3)
        nx.write_graphml(G, "citation_network_El_Abs_Depth_Kachow_Mater.graphml")

if __name__ == "__main__":
    main()

Error getting data for 10.1007/s00115-016-0148-3 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1016/j.hjc.2016.03.002 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.1007/s00115-016-0148-3 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.17660/actahortic.1994.377.45 from Scopus: The resource specified cannot be found.
Error getting data for 10.1007/s00115-016-0148-3 from Scopus: 'NoneType' object is not iterable
Error getting data for 10.17660/actahortic.1994.377.45 from Scopus: The resource specified cannot be found.
Error getting data for 10.47749/t/unicamp.1986.17660 from Scopus: The resource specified cannot be found.
Error getting data for 10.1007/s11606-016-3822-z from Scopus: 'NoneType' object is not iterable
Error getting data for 10.5962/bhl.title.147343 from Scopus: The resource specified cannot be found.
Error getting data for 10.1016/j.jiac.2016.07.010 from Scopus: 'NoneType' object is not iterable


In [None]:
import os
import requests
import time
import networkx as nx
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_abstract_from_semanticscholar(doi):
    url = f"https://api.semanticscholar.org/v1/paper/{doi}"
    response = requests.get(url)
    data = response.json()
    return data.get('abstract')

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def valid_publication(pub):
    """Check if the publication meets the given requirements."""
    year = pub.get("created").get("date-parts")[0][0]
    if year < 2016:
        return False

    title = pub.get("title")[0] if pub.get("title") else ""
    if len(title.split()) < 5:
        return False

    if not pub.get("author"):
        return False

    return True

def get_citations(doi, limit=50):
    cited_by = []
    cr = Crossref()
    offset = 0
    items_per_page = 1000

    while len(cited_by) < limit:
        response = cr.works(query=f"doi:{doi}", offset=offset, limit=items_per_page)
        if response.get("message") and response.get("message").get("items"):
            items = response["message"]["items"]
            for item in items:
                if valid_publication(item):
                    cited_by.append(item)
                    if len(cited_by) >= limit:
                        break
        offset += items_per_page

    return cited_by[:limit]

def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    authors = [f"{author.get('given', '')} {author.get('family', '')}" for author in pub.get("author", [])]
    abstract = get_abstract_from_semanticscholar(doi)
    return doi, title, year, authors, abstract

def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [f"{au.given_name} {au.surname}" for au in ab.authors]
        abstract = get_abstract_from_semanticscholar(doi)
        return doi, title, year, authors, abstract
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None, None, None, None, None  # Return tuple with None values


def add_paper_to_network(G, paper_data):
    doi, title, year, authors, abstract = paper_data
    if not abstract:  # Check if abstract is available
        return
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    G.add_node(doi, title=title, year=year, authors=authors_str, abstract=abstract)

def add_citation_to_network(G, citing_doi, cited_doi):
    paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(citing_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)
        G.add_edge(citing_doi, cited_doi)

def fetch_and_add_to_network(G, doi, depth=1):
    if depth <= 0:
        return

    time.sleep(2)  # Introduce a delay to avoid hitting rate limits

    # Set the limit based on the depth
    if depth == 1:
        limit = 100
    elif depth == 2:
        limit = 50
    elif depth == 3:
        limit = 3

    citations = get_citations(doi, limit=limit)
    for pub in citations:
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data[-1]:  # Check if abstract is None
            paper_data = get_paper_data_from_scopus(paper_data[0])
        if paper_data and paper_data[-1]:  # Check if paper_data and abstract are not None
            add_paper_to_network(G, paper_data)
            # Connect the citing paper (current one) with the cited one (previous in the loop)
            G.add_edge(paper_data[0], doi)
            fetch_and_add_to_network(G, paper_data[0], depth=depth-1)


def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(initial_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)
        fetch_and_add_to_network(G, initial_doi, depth=3)
        nx.write_graphml(G, "citation_network_El_Abs_Depth_Kachow_Mater.graphml")

if __name__ == "__main__":
    main()

In [32]:
import os
import requests
import time
import networkx as nx
from pybliometrics.scopus import AbstractRetrieval
from habanero import Crossref

API_KEY = "8a456d9c37dfee8ad35feabdb6d73bac"
os.environ["SCOPUS_API_KEY"] = API_KEY

def get_abstract_from_semanticscholar(doi):
    url = f"https://api.semanticscholar.org/v1/paper/{doi}"
    response = requests.get(url)
    data = response.json()
    return data.get('abstract')

def get_paper_by_doi(doi):
    cr = Crossref()
    return cr.works(ids=doi).get("message")

def valid_publication(pub):
    """Check if the publication meets the given requirements."""
    year = pub.get("created").get("date-parts")[0][0]
    if year < 2016:
        return False

    title = pub.get("title")[0] if pub.get("title") else ""
    if len(title.split()) < 5:
        return False

    if not pub.get("author"):
        return False

    return True

def get_citations(doi, limit=50):
    cited_by = []
    cr = Crossref()
    offset = 0
    items_per_page = 1000

    while len(cited_by) < limit:
        response = cr.works(query=f"doi:{doi}", offset=offset, limit=items_per_page)
        if response.get("message") and response.get("message").get("items"):
            items = response["message"]["items"]
            for item in items:
                if valid_publication(item):
                    cited_by.append(item)
                    if len(cited_by) >= limit:
                        break
        offset += items_per_page

    return cited_by[:limit]

def get_paper_data_from_crossref(pub):
    doi = pub.get("DOI")
    title = pub.get("title")[0] if pub.get("title") else None
    year = pub.get("created").get("date-parts")[0][0]
    authors = [f"{author.get('given', '')} {author.get('family', '')}" for author in pub.get("author", [])]
    abstract = get_abstract_from_semanticscholar(doi)
    return doi, title, year, authors, abstract

def get_paper_data_from_scopus(doi):
    try:
        ab = AbstractRetrieval(doi, view="META")
        title = ab.title
        year = ab.coverDate.split("-")[0]
        authors = [f"{au.given_name} {au.surname}" for au in ab.authors]
        abstract = get_abstract_from_semanticscholar(doi)
        return doi, title, year, authors, abstract
    except Exception as e:
        print(f"Error getting data for {doi} from Scopus: {e}")
        return None, None, None, None, None  # Return tuple with None values


def add_paper_to_network(G, paper_data):
    doi, title, year, authors, abstract = paper_data
    if not abstract:  # Check if abstract is available
        return
    authors_str = "; ".join(authors) if authors else ""
    title = title if title else "Unknown Title"
    year = year if year else "Unknown Year"
    G.add_node(doi, title=title, year=year, authors=authors_str, abstract=abstract)

def add_citation_to_network(G, citing_doi, cited_doi):
    paper_data = get_paper_data_from_crossref(get_publication_as_json(citing_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(citing_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)
        G.add_edge(citing_doi, cited_doi)

def fetch_and_add_to_network(G, doi, depth=1):
    if depth <= 0:
        return

    time.sleep(2)  # Introduce a delay to avoid hitting rate limits

    # Set the limit based on the depth
    if depth == 1:
        limit = 300
    elif depth == 2:
        limit = 100
    elif depth == 10:
        limit = 3

    citations = get_citations(doi, limit=limit)
    for pub in citations:
        paper_data = get_paper_data_from_crossref(pub)
        if not paper_data[-1]:  # Check if abstract is None
            paper_data = get_paper_data_from_scopus(paper_data[0])
        if paper_data and paper_data[-1]:  # Check if paper_data and abstract are not None
            add_paper_to_network(G, paper_data)
            # Connect the citing paper (current one) with the cited one (previous in the loop)
            G.add_edge(paper_data[0], doi)
            fetch_and_add_to_network(G, paper_data[0], depth=depth-1)


def main():
    G = nx.DiGraph()
    initial_doi = "10.1109/CVPR.2016.91"
    paper_data = get_paper_data_from_crossref(get_paper_by_doi(initial_doi))
    if not paper_data[-1]:  # Check if abstract is None
        paper_data = get_paper_data_from_scopus(initial_doi)
    if paper_data[-1]:  # Check if abstract is not None after fetching
        add_paper_to_network(G, paper_data)
        fetch_and_add_to_network(G, initial_doi, depth=3)
        nx.write_graphml(G, "citation_network_El_Abs_Depth_Kachow_Mater_Halloween.graphml")

if __name__ == "__main__":
    main()

UnboundLocalError: local variable 'limit' referenced before assignment

In [30]:
def add_edges_for_all_nodes(G):
    for node in G.nodes():
        # Check if any existing nodes cite this node
        for other_node in G.nodes():
            if other_node == node:
                continue  # Skip checking the same node

            # Check if `other_node` cites `node`
            citations_of_other_node = get_citations(other_node, limit=1000)
            cited_dois = [pub.get("DOI") for pub in citations_of_other_node]
            if node in cited_dois and not G.has_edge(other_node, node):
                G.add_edge(other_node, node)

            # Check if `node` cites `other_node`
            citations_of_node = get_citations(node, limit=1000)
            cited_dois_by_node = [pub.get("DOI") for pub in citations_of_node]
            if other_node in cited_dois_by_node and not G.has_edge(node, other_node):
                G.add_edge(node, other_node)

add_edges_for_all_nodes(G)


NameError: name 'G' is not defined

In [10]:
%pip install lxml

You should consider upgrading via the '/Users/alexsciuto/Library/Mobile Documents/com~apple~CloudDocs/DataWithAlex/MSDA Classes/Network_Science/YOLO_Paper_Network_Analysis/venv/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
