ここで必要なライブラリ（数学であったり画像処理であったりなど、分野による特定の作業の際に汎用性の高いプログラムをひとまとめにしたもの）をインストールします！  
結構時間かかります。

In [1]:
%pip install sentence-transformers requests numpy

Note: you may need to restart the kernel to use updated packages.Collecting sentence-transformers
  Using cached sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Collecting requests
  Downloading requests-2.32.4-py3-none-any.whl.metadata (4.9 kB)
Collecting numpy
  Downloading numpy-2.2.6-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Using cached transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting tqdm (from sentence-transformers)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Downloading torch-2.7.1-cp310-cp310-win_amd64.whl.metadata (28 kB)
Collecting scikit-learn (from sentence-transformers)
  Downloading scikit_learn-1.7.0-cp310-cp310-win_amd64.whl.metadata (14 kB)
Collecting scipy (from sentence-transformers)
  Downloading scipy-1.15.3-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting huggingface-hub>=0.20.0 (from sen

それからここにコードを入力します！

In [5]:
import requests
import xml.etree.ElementTree as ET
import numpy as np
from sentence_transformers import SentenceTransformer, util

# --- 設定 ---
NUM_RESULTS = 20  # APIから取得する候補数
TOP_K = 10        # 出力する上位件数
MODEL_NAME = 'all-MiniLM-L6-v2'

# --- 入力 ---
query_text = input("検索したいキーワードや研究概要を入力してください：\n")

# --- sentence-transformers モデルロード ---
model = SentenceTransformer(MODEL_NAME)
query_embedding = model.encode(query_text, convert_to_tensor=True)

# --- arXiv API 検索 ---
def search_arxiv(query, max_results=NUM_RESULTS):
    base_url = "http://export.arxiv.org/api/query"
    params = {
        "search_query": f"all:{query}",
        "start": 0,
        "max_results": max_results
    }
    response = requests.get(base_url, params=params)
    root = ET.fromstring(response.content)

    papers = []
    for entry in root.findall('{http://www.w3.org/2005/Atom}entry'):
        title = entry.find('{http://www.w3.org/2005/Atom}title').text.strip().replace("\n", " ")
        authors = [author.find('{http://www.w3.org/2005/Atom}name').text for author in entry.findall('{http://www.w3.org/2005/Atom}author')]
        link = entry.find('{http://www.w3.org/2005/Atom}id').text
        summary = entry.find('{http://www.w3.org/2005/Atom}summary').text.strip()
        papers.append({
            'source': 'arXiv',
            'title': title,
            'authors': authors,
            'summary': summary,
            'link': link
        })
    return papers

# --- Semantic Scholar API 検索 ---
def search_semantic_scholar(query, max_results=NUM_RESULTS):
    url = f"https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": query,
        "limit": max_results,
        "fields": "title,abstract,authors,url"
    }
    headers = {
        "User-Agent": "ResearchQOL/1.0"
    }
    response = requests.get(url, params=params, headers=headers)
    data = response.json()

    papers = []
    for item in data.get("data", []):
        papers.append({
            'source': 'SemanticScholar',
            'title': item.get('title', ''),
            'authors': [a.get('name') for a in item.get('authors', [])],
            'summary': item.get('abstract', ''),
            'link': item.get('url', '')
        })
    return papers

# --- 類似度計算と出力 ---
def find_similar_papers(papers, query_embedding):
    summaries = [p['summary'] for p in papers]
    embeddings = model.encode(summaries, convert_to_tensor=True)
    cosine_scores = util.cos_sim(query_embedding, embeddings)[0]
    top_indices = np.argsort(-cosine_scores.cpu().numpy())[:TOP_K]

    print(f"\n🔍 類似スコア上位 {TOP_K} 件の論文:\n")
    for i in top_indices:
        paper = papers[i]
        score = cosine_scores[i].item()
        print(f"[{paper['source']}] {paper['title']}")
        print(f"    Authors: {', '.join(paper['authors'])}")
        print(f"    Link: {paper['link']}")
        print(f"    Score: {score:.4f}\n")

# --- 実行フロー ---
arxiv_papers = search_arxiv(query_text)
sem_scholar_papers = search_semantic_scholar(query_text)
all_papers = arxiv_papers + sem_scholar_papers
find_similar_papers(all_papers, query_embedding)



🔍 類似スコア上位 10 件の論文:

[arXiv] Aging and Immortality in a Cell Proliferation Model
    Authors: T. Antal, K. B. Blagoev, S. A. Trugman, S. Redner
    Link: http://arxiv.org/abs/q-bio/0609040v2
    Score: 0.3181

[arXiv] Lattice Percolation Approach to 3D Modeling of Tissue Aging
    Authors: Vyacheslav Gorshkov, Vladimir Privman, Sergiy Libert
    Link: http://arxiv.org/abs/1606.01788v1
    Score: 0.3166

[arXiv] Lattice Percolation Approach to Numerical Modeling of Tissue Aging
    Authors: Vladimir Privman, Vyacheslav Gorshkov, Sergiy Libert
    Link: http://arxiv.org/abs/1504.03576v1
    Score: 0.3061

[arXiv] Senescent fibroblasts can drive melanoma initiation and progression
    Authors: Eunjung Kim, Vito Rebecca, Inna V. Fedorenko, Jane L. Messina, Rahel Mathew, Silvya S. Maria-Engler, David Basanta, Keiran S. M. Smalley, Alexander R. A. Anderson
    Link: http://arxiv.org/abs/1304.1054v1
    Score: 0.3032

[arXiv] Do cells sense time by number of divisions?
    Authors: Zeev Schus