In [2]:
import os
from dotenv import load_dotenv

import sys
from semanticscholar import SemanticScholar

import requests

dotenv_path = '.env'
load_dotenv(dotenv_path=dotenv_path)

openai_api_key = os.getenv("OPENAI_API_KEY")
ss_api_key = os.getenv("SS_API_KEY")

In [37]:
from pprint import pprint



def get_paper_info_by_title(title, ss_api_key):
    """
    논문의 제목으로 정보를 가져오는 함수
    논문의 제목을 입력하여 paper_id와 abstract 정보를 가져오기 위함
    ex) toolformer 논문의 paper id 반환
    """
    # Define the API endpoint URL
    url = 'https://api.semanticscholar.org/graph/v1/paper/search?query={}&fields=paperId,title,abstract,tldr'
    
    headers = {'x-api-key': ss_api_key}

    if isinstance(title, str):
        title = [title]
    results = []
    for t in title:
        response = requests.get(url.format(t), headers=headers).json()
        if response.get('data'):
            paper = response['data'][0]
            results.append(paper)
        else:
            results.append(None)
    return results[0] if len(results) == 1 else results

def get_citing_papers(paper_id, api_key):
    """
    입력받은 paper id 논문을 인용한 논문의 정보 가져오는 함수
    ex) toolformer 논문을 인용한 논문들의 제목, 년도, 인용수, 초록, tldr을 반환
    """
    # Define the API endpoint URL
    url = f'https://api.semanticscholar.org/graph/v1/paper/{paper_id}/citations'

    # Define the query parameters
    # query_params = {'fields': 'title,authors,year,abstract,influentialCitationCount'}
    query_params = {'fields': 'title,year,influentialCitationCount,abstract'}

    # Define the headers
    headers = {'x-api-key': api_key}

    # Make the request
    response = requests.get(url, params=query_params, headers=headers).json()

    real_data = [data["citingPaper"] for data in response['data']]

    return real_data



In [19]:
query_paper_tldr

{'model': 'tldr@v2.0.0',
 'text': 'This paper introduces Toolformer, a model trained to decide which APIs to call, when to call them, what arguments to pass, and how to best incorporate the results into future token prediction, which achieves substantially improved zero-shot performance across a variety of downstream tasks.'}

In [38]:
query = "Toolformer: Language Models Can Teach Themselves to Use Tools"
paper_count = 5
paper_info = get_paper_info_by_title(query, ss_api_key)
print(paper_info)

paper_id = paper_info["paperId"]
query_paper_abstract = paper_info["abstract"]
query_paper_tldr = paper_info["tldr"]
citation_paper_abstract = []
citation_paper_tldr = []

if paper_id:
    # Get the papers that cite the given paper ID
    citing_papers = get_citing_papers(paper_id, ss_api_key)
    
    # paper_id 논문을 인용한 논문들을 influentialCitationCount를 기준으로 내림차순 정렬(유명한 것부터 보기)
    sorted_papers = sorted(citing_papers, key=lambda x: x.get('influentialCitationCount', 0), reverse=True)[:paper_count]

    print(f"{query}\nAbstract: {query_paper_abstract}\n")
    print(f"{query}\nTLDR: {query_paper_tldr}\n")
    
    print(f"Papers citing '{query}' sorted by influential citation count:")
    for paper in sorted_papers:
        title = paper['title']
        # authors = ', '.join([author['name'] for author in paper['authors']])
        year = paper['year']
        abstract = paper.get('abstract', 'No abstract available')
        tldr = paper.get('tldr', 'No tldr available')
        citation_paper_abstract.append(abstract)
        citation_paper_tldr.append(tldr)
        influential_citation_count = paper.get('influentialCitationCount', 0)
        print(f"\nTitle: {title}\nYear: {year}\nInfluential Citation Count: {influential_citation_count}\nAbstract: {abstract}\nTLDR: {tldr}")
else:
    print("No paper found with the given title.")

{'paperId': '53d128ea815bcc0526856eb5a9c42cc977cb36a7', 'title': 'Toolformer: Language Models Can Teach Themselves to Use Tools', 'abstract': 'Language models (LMs) exhibit remarkable abilities to solve new tasks from just a few examples or textual instructions, especially at scale. They also, paradoxically, struggle with basic functionality, such as arithmetic or factual lookup, where much simpler and smaller models excel. In this paper, we show that LMs can teach themselves to use external tools via simple APIs and achieve the best of both worlds. We introduce Toolformer, a model trained to decide which APIs to call, when to call them, what arguments to pass, and how to best incorporate the results into future token prediction. This is done in a self-supervised way, requiring nothing more than a handful of demonstrations for each API. We incorporate a range of tools, including a calculator, a Q\\&A system, two different search engines, a translation system, and a calendar. Toolformer