In [62]:
import json
import pandas as pd

prompt_template = open('prompt.txt').read()
papers = open('papers.txt').readlines()

In [63]:
for i, paper in enumerate(papers):
    try:
        title, authors = paper.strip().split('\xa0')
    except:
        print(f"Error on line {i+1}: {paper}")
    papers[i] = {'title': title, 'authors': authors}

In [64]:
with open("papers.json", "w") as f:
    f.write(json.dumps(papers, indent=4))

In [41]:
start_id = 0
end_id = 5
paper_batch = '\n'.join(papers[start_id:end_id])
prompt = prompt_template.replace('{PAPER_LIST}', paper_batch)
print(prompt)

# 任务要求
给定一份论文题目的清单，你需要依次在网络中搜寻每一篇论文，然后判断该论文的研究方向是否和以下领域中的一个或多个相关：
1. 情感分析（sentiment analysis）
2. 情绪识别（emotion recognition）
3. 论辩挖掘（argumentation mining）
4. 情感对话（empathetic conversation）
5. 社会情感（social emotion）

你需要以json(字典)的格式依次返回对于每一篇论文的判断结果，格式如下：
    {
        "id": "论文在清单中的索引",
        "title": "论文题目",
        "url": "论文的网址", // 如果找不到论文，请返回空字符串""
        // 如果找到了url，则继续返回下面的字段，否则不需要返回
        "matched_domain": ["领域1", "领域2", ...] // 请以列表形式返回上述领域中的一个或多个；使用领域的中文名称，如"情感分析"；如果没有匹配的领域，请返回空列表[]
        // 如果存在匹配的领域，则继续返回下面的字段，否则不需要返回
        "authors": ["作者1", "作者2", ...], // 作者列表
        "affiliation": ["机构1", "机构2", ...], // 机构列表
        "summmary": "对这篇论文的简要总结" // 要求尽可能简洁，不超过100字 
    }

    不同论文的返回结果之间使用逗号+换行符进行分隔，如下所示：
    {
        "id": "0",
        "title": "Combo of Thinking and Observing for Outside-Knowledge VQA",
        "url": "url of this paper",
        "matched_domain": [],
    },
    {
        "id": "1",
        "title": "Improving Empathetic Response Generation by Recogn

In [1]:
import arxiv
client = arxiv.Client()
search = arxiv.Search(
            query='Combo of Thinking and Observing for Outside-Knowledge VQA',
            max_results=5,
            sort_by=arxiv.SortCriterion.Relevance
        )
results = client.results(search)

In [17]:
from typing import Dict
NOT_FOUND = {'error': 'not found'}

def match(res: arxiv.Result, title: str, authors: list = []):
    success = len(set(title.split()).intersection(res.title.split())) / len(title.split()) > 0.9 
    if authors:
        res_authors = set([a.name for a in res.authors])
        success = success and len(set(authors).intersection(res_authors)) / len(set(authors)) > 0.9
    return success

def search(title: str, authors: list = []):
    client = arxiv.Client()
    query = title + ' ' + ' '.join(authors)
    search = arxiv.Search(
        query=query, max_results=10, sort_by=arxiv.SortCriterion.Relevance
    )
    results = client.results(search)
    for res in results:
        if match(res, title, authors):
            return {'title': res.title, 'authors': [a.name for a in res.authors], 'url': res.entry_id, 'abstract': res.summary}
    return NOT_FOUND

In [25]:
search(title='Improving Empathetic Response Generation by Recognizing Emotion Cause in Conversations')

{'error': 'not found'}

In [27]:
data = [
        {
            "input": {
                "title": "Combo of Thinking and Observing for Outside-Knowledge VQA",
                "authors": [
                    "Qingyi Si",
                    "Yuchen Mo",
                    "Zheng Lin",
                    "Huishan Ji",
                    "Weiping Wang"
                ],
                "url": "http://arxiv.org/abs/2305.06407v1",
                "abstract": "Outside-knowledge visual question answering is a challenging task that\nrequires both the acquisition and the use of open-ended real-world knowledge.\nSome existing solutions draw external knowledge into the cross-modality space\nwhich overlooks the much vaster textual knowledge in natural-language space,\nwhile others transform the image into a text that further fuses with the\ntextual knowledge into the natural-language space and completely abandons the\nuse of visual features. In this paper, we are inspired to constrain the\ncross-modality space into the same space of natural-language space which makes\nthe visual features preserved directly, and the model still benefits from the\nvast knowledge in natural-language space. To this end, we propose a novel\nframework consisting of a multimodal encoder, a textual encoder and an answer\ndecoder. Such structure allows us to introduce more types of knowledge\nincluding explicit and implicit multimodal and textual knowledge. Extensive\nexperiments validate the superiority of the proposed method which outperforms\nthe state-of-the-art by 6.17% accuracy. We also conduct comprehensive ablations\nof each component, and systematically study the roles of varying types of\nknowledge. Codes and knowledge data can be found at\nhttps://github.com/PhoebusSi/Thinking-while-Observing."
            },
            "output": "不相关"
        },
        {
            "input": {
                "title": "Improving Empathetic Response Generation by Recognizing Emotion Cause in Conversations",
                "authors": [
                    "Jun Gao",
                    "Yuhan Liu",
                    "Haolin Deng",
                    "Wei Wang",
                    "Yu Cao",
                    "Jiachen Du",
                    "Ruifeng Xu"
                ],
                "url": "https://aclanthology.org/2021.findings-emnlp.70",
                "abstract": "Current approaches to empathetic response generation focus on learning a model to predict an emotion label and generate a response based on this label and have achieved promising results. However, the emotion cause, an essential factor for empathetic responding, is ignored. The emotion cause is a stimulus for human emotions. Recognizing the emotion cause is helpful to better understand human emotions so as to generate more empathetic responses. To this end, we propose a novel framework that improves empathetic response generation by recognizing emotion cause in conversations. Specifically, an emotion reasoner is designed to predict a context emotion label and a sequence of emotion cause-oriented labels, which indicate whether the word is related to the emotion cause. Then we devise both hard and soft gated attention mechanisms to incorporate the emotion cause into response generation. Experiments show that incorporating emotion cause information improves the performance of the model on both emotion recognition and response generation."
            },
            "output": {
                "domains": [
                    "情感对话",
                    "情绪识别"
                ],
                "summary": "这篇论文探讨了通过识别对话中的情绪原因来提升生成共情响应的方法。研究者们开发了一种模型，该模型能够分析对话内容，识别情绪触发因素，并据此生成更为贴切和富有同理心的回复，从而改善人机交互体验。"
            }
        }
    ]

print(json.dumps(data, indent=4, ensure_ascii=False))

[
    {
        "input": {
            "title": "Combo of Thinking and Observing for Outside-Knowledge VQA",
            "authors": [
                "Qingyi Si",
                "Yuchen Mo",
                "Zheng Lin",
                "Huishan Ji",
                "Weiping Wang"
            ],
            "url": "http://arxiv.org/abs/2305.06407v1",
            "abstract": "Outside-knowledge visual question answering is a challenging task that\nrequires both the acquisition and the use of open-ended real-world knowledge.\nSome existing solutions draw external knowledge into the cross-modality space\nwhich overlooks the much vaster textual knowledge in natural-language space,\nwhile others transform the image into a text that further fuses with the\ntextual knowledge into the natural-language space and completely abandons the\nuse of visual features. In this paper, we are inspired to constrain the\ncross-modality space into the same space of natural-language space which makes\nthe visu

In [22]:
data

{'title': 'Combo of Thinking and Observing for Outside-Knowledge VQA',
 'authors': ['Qingyi Si',
  'Yuchen Mo',
  'Zheng Lin',
  'Huishan Ji',
  'Weiping Wang'],
 'url': 'http://arxiv.org/abs/2305.06407v1',
 'abstract': 'Outside-knowledge visual question answering is a challenging task that\nrequires both the acquisition and the use of open-ended real-world knowledge.\nSome existing solutions draw external knowledge into the cross-modality space\nwhich overlooks the much vaster textual knowledge in natural-language space,\nwhile others transform the image into a text that further fuses with the\ntextual knowledge into the natural-language space and completely abandons the\nuse of visual features. In this paper, we are inspired to constrain the\ncross-modality space into the same space of natural-language space which makes\nthe visual features preserved directly, and the model still benefits from the\nvast knowledge in natural-language space. To this end, we propose a novel\nframework c