In [1]:
import os
import eel
import shutil
import tkinter
import utils.trees as trees
import utils.parsing as parsing
import utils.ranking as ranking
import utils.scraping as scraping
import utils.constants as constants
import utils.text_processing as text_processing
from catboost import CatBoostClassifier

In [2]:
embeddings, dim = text_processing.load_embeddings(constants.STARSPACE_EMBEDDINGS)

In [3]:
def close_callback(route, websockets):
    if not websockets:
        print("Application terminated successfully!")
        return

eel.init("frontend", allowed_extensions=[".js", ".html"])

@eel.expose
def get_paper_results(search_query):
    return scraping.scrape_semantic_scholar(search_query, 9)

@eel.expose
def get_paper_details(paper_id):
    return scraping.scrape_semantic_scholar_by_id(paper_id)[paper_id]

@eel.expose
def get_similar_papers_subjects(paper, subjects, keywords, top_n):
    return ranking.rank_candidates_per_subject(
        paper, 
        top_n, 
        embeddings, 
        dim, 
        subjects, 
        include_embeddings=False,
        stringify=True, 
        keywords=keywords
    )

@eel.expose
def get_similar_papers_neighborhood(paper_id, paper, keywords, top_n):
    return ranking.rank_citation_neighborhood(
        paper_id,
        paper,
        top_n,
        embeddings,
        dim, 
        include_embeddings=False,
        stringify=True,
        keywords=keywords
    )

@eel.expose
def get_similar_papers_all(paper, subjects, keywords, top_n):
    return ranking.rank_candidates_adjusted(
        paper,
        top_n,
        embeddings,
        dim,
        subjects,
        include_embeddings = False,
        stringify=True,
        keywords=keywords
    )

@eel.expose
def get_reference_tree(paper_id, paper):

    papers = {paper_id: paper}

    trees.build_semantic_scholar_reference_tree(paper_id, papers, depth = 2)
    fig = trees.visualize_reference_tree_plotly(papers)
    figJSON = fig.to_plotly_json()

    return figJSON

@eel.expose
def get_paper_subject(abstract): 
    model = CatBoostClassifier()
    model.load_model(constants.SUBJECT_LABELING_MODEL_SINGLE)
    subject = model.predict([abstract])[0]
    return subject

@eel.expose
def get_pdf_information():

    root = tkinter.Tk()
    root.withdraw()
    file_name = tkinter.filedialog.askopenfilename()
    root.destroy()

    if not os.path.exists(constants.USER_UPLOAD_DIR):
        os.makedirs(constants.USER_UPLOAD_DIR)

    if os.path.exists(constants.USER_UPLOAD):
        os.remove(constants.USER_UPLOAD)

    if os.path.exists(constants.USER_UPLOAD_CERM):
        os.remove(constants.USER_UPLOAD_CERM)

    papers = {}

    shutil.copyfile(file_name, constants.USER_UPLOAD)
    parsing.parse_pdfs_to_xml(pdfs_dir=str(constants.USER_UPLOAD_DIR))
    parsing.integrate_xml(papers, pdfs_dir=constants.USER_UPLOAD_DIR)

    return papers['user_input']

eel.start("index.html", size=(1200,800), close_callback=close_callback, mode = False)

Application terminated successfully!


KeyboardInterrupt
2022-05-11T16:29:26Z
