In [2]:
import os
from pathlib import Path
import sqlite3
import pandas as pd

database_path = Path("./databases")

databases = os.listdir(database_path)

In [3]:
db_uri = database_path / databases[0]
conn = sqlite3.connect(db_uri)
articles = pd.read_sql_query("SELECT * FROM article_meta", conn)

In [15]:
pdf_path = articles.iloc[3].pdf_path

In [25]:
from PyPDF2 import PdfReader
from dataclasses import dataclass

@dataclass
class ArticlePdfReader:
    
    pdf_path:str
    
    def __post_init__(self):
        self.reader = PdfReader(self.pdf_path)
        self.num_pages = len(self.reader.pages)
    
    def gen(self):
        pages = self.reader.pages
        for i,page in enumerate(pages):
            text = page.extract_text()
            yield i+1, text
            
    @property    
    def text(self):
        return "\n".join([text for _, text in self.gen()])

In [52]:
from simple_chat import Chat

SUMMARY_PROMPT =  """
You are a researcher in the field of deeplearning and you are interested in deep learning theory and applications.
You always answer in {language} language.
In this page, you see the following text:
{page_text}

Your have 2 tasks
1. Summarize the page in one sentence. Keep it short and simple in {language}.

2. Answer: what's problems with previous studies, where are the unsolved problems, tell me in 2 sentence. Keep is short and simple in {language}.
"""
for i in range(3):
    
    pdf_path = articles.iloc[i].pdf_path

    article_reader = ArticlePdfReader(pdf_path)

    page = next(article_reader.gen())[1]

    chat = Chat()
    chat.set_param("temperature",0)
    chat.go()
    
    print("论文题目: ", articles.iloc[i].title)

    chat(SUMMARY_PROMPT.format(page_text=page, language="english"))
    print("-"*120)

论文题目:  Opening the black box of deep learning


------------------------------------------------------------------------------------------------------------------------
论文题目:  Concept-Oriented Deep Learning


------------------------------------------------------------------------------------------------------------------------
论文题目:  Deep learning research landscape & roadmap in a nutshell: past, present and future -- Towards deep cortical learning


------------------------------------------------------------------------------------------------------------------------
