In [1]:
import os
import numpy as np
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI
from langchain.output_parsers.json import SimpleJsonOutputParser
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.schema import StrOutputParser
from dotenv import load_dotenv
import json


In [2]:
# Function to read a JSON file
def read_json_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

In [3]:
doc_list = sorted(os.listdir('reading_resource/json_summary'))

In [4]:
index_list = list(np.arange(0, len(doc_list), 40)) + [len(doc_list)]
index_list

[0, 40, 80, 120, 160, 199]

In [5]:
paper_chunk_lst = []
for i in range(len(index_list)-1):
    start = index_list[i]
    end = index_list[i+1]
    print(start, end)

    paper_chunk = ''
    for index in range(start, end):
        paper_data = read_json_file(f'reading_resource/json_summary/{doc_list[index]}')
        paper_chunk += 'Title: ' + paper_data["Title"] + '\n'
        paper_chunk += 'Summary: ' + paper_data["Summary"] + '\n'
        paper_chunk += '-------------------' +'\n'
    
    paper_chunk_lst.append(paper_chunk)

0 40
40 80
80 120
120 160
160 199


In [6]:
print(paper_chunk_lst[0])

Title: A Benchmark to Understand the Role of Knowledge Graphs on Large Language Model's Accuracy for Question Answering on Enterprise SQL Databases
Summary: - The study investigates the effectiveness of Large Language Models (LLMs) for question answering on enterprise SQL databases.
- There is a lack of suitable Text-to-SQL benchmarks for enterprise settings, making it difficult to assess LLM accuracy.
- The research explores the potential of Knowledge Graphs (KGs) to enhance LLM-based question answering by providing business context.
- A new benchmark is introduced, featuring an enterprise SQL schema in the insurance domain, various enterprise queries, and a contextual layer with an ontology and mappings for a knowledge graph.
- The primary finding indicates that GPT-4 achieves an accuracy of 16% for zero-shot prompts on SQL databases.
- Accuracy improves to 54% when questions are asked over a Knowledge Graph representation of the SQL database.
- The study concludes that investing in 

In [7]:
def research_paper_recommendation(paper_info, custom_prompt):
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", custom_prompt),
            ("human", f"Please read the text containing paper's information:\n\n{paper_info}"),
            ("human", """
                Here are the rules that you need to adhere:
                ## Rules:
                - Make sure to answer in the standard text format.
                - DO NOT HALLUCINATE.
             """),
        ]
    )
    
    llm = ChatOpenAI(openai_api_key=os.getenv('OPENAI_KEY'),
                    temperature=0, 
                    model_name="gpt-4o")

    llm_chain = prompt | llm | StrOutputParser()

    response = llm_chain.invoke({'input': paper_info})
    return response


In [None]:
custom_prompt1 = """You are a helpful assistant that can help me to find relevant research paper for my thesis project.
                          The topic of my thesis is 'Leveraging Large Language Models for Knowledge Graph Construction'.
                          I need you to read a text of research paper title and summary abstract and return me a list of research paper titles that might be relevant to my project.
                          I am interested in the topic of: 
                          - Using large language model for knowledge graph construction
                          - Build knowledge graph with custom or industry-specific dataset
                          - Survey or review paper that covers the technology of knowledge graph and large language model
                          Return the response in this string format: "title_1; title_2; title_3, ..."
                          Only return the article title that are highly relevant to my research interest.
                          """

### GPT4O-MINI OUTPUT

In [81]:
recommendation_paper_list = []
for i in range(len(paper_chunk_lst)):
    response = research_paper_recommendation(paper_chunk_lst[i], custom_prompt1)
    print(response.split(';'))
    recommendation_paper_list += response.split(';')

["A Benchmark to Understand the Role of Knowledge Graphs on Large Language Model's Accuracy for Question Answering on Enterprise SQL Databases", ' A Condensed Transition Graph Framework for Zero-shot Link Prediction with Large Language Models', ' A Graph-to-Text Approach to Knowledge-Grounded Response Generation in Human-Robot Interaction', ' A Preliminary Roadmap for LLMs as Assistants in Exploring, Analyzing, and Visualizing Knowledge Graphs', ' A Survey of Knowledge Enhanced Pre-trained Language Models', ' A knowledge representation approach for construction contract knowledge modeling', ' An Enhanced Prompt-Based LLM Reasoning Scheme via Knowledge Graph-Integrated Collaboration', ' Automated Construction of Theme-specific Knowledge Graphs', ' AttacKG+:Boosting Attack Knowledge Graph Construction with Large Language Models', ' AutoRD: An Automatic and End-to-End System for Rare Disease Knowledge Graph Construction Based on Ontologies-enhanced Large Language Models', ' Building A Kno

In [82]:
recommendation_paper_list

["A Benchmark to Understand the Role of Knowledge Graphs on Large Language Model's Accuracy for Question Answering on Enterprise SQL Databases",
 ' A Condensed Transition Graph Framework for Zero-shot Link Prediction with Large Language Models',
 ' A Graph-to-Text Approach to Knowledge-Grounded Response Generation in Human-Robot Interaction',
 ' A Preliminary Roadmap for LLMs as Assistants in Exploring, Analyzing, and Visualizing Knowledge Graphs',
 ' A Survey of Knowledge Enhanced Pre-trained Language Models',
 ' A knowledge representation approach for construction contract knowledge modeling',
 ' An Enhanced Prompt-Based LLM Reasoning Scheme via Knowledge Graph-Integrated Collaboration',
 ' Automated Construction of Theme-specific Knowledge Graphs',
 ' AttacKG+:Boosting Attack Knowledge Graph Construction with Large Language Models',
 ' AutoRD: An Automatic and End-to-End System for Rare Disease Knowledge Graph Construction Based on Ontologies-enhanced Large Language Models',
 ' Buil

### GPT4O OUTPUT

In [84]:
recommendation_paper_list = []
for i in range(len(paper_chunk_lst)):
    response = research_paper_recommendation(paper_chunk_lst[i], custom_prompt1)
    print(response.split(';'))
    recommendation_paper_list += response.split(';')

["A Benchmark to Understand the Role of Knowledge Graphs on Large Language Model's Accuracy for Question Answering on Enterprise SQL Databases", ' A Condensed Transition Graph Framework for Zero-shot Link Prediction with Large Language Models', ' A Preliminary Roadmap for LLMs as Assistants in Exploring, Analyzing, and Visualizing Knowledge Graphs', ' A Survey of Knowledge Enhanced Pre-trained Language Models', ' An Enhanced Prompt-Based LLM Reasoning Scheme via Knowledge Graph-Integrated Collaboration', ' AttacKG+:Boosting Attack Knowledge Graph Construction with Large Language Models', ' AutoRD: An Automatic and End-to-End System for Rare Disease Knowledge Graph Construction Based on Ontologies-enhanced Large Language Models', ' Automated Construction of Theme-specific Knowledge Graphs', ' Benchmarking Large Language Models in Complex Question Answering Attribution using Knowledge Graphs', ' Benchmarking the Abilities of Large Language Models for RDF Knowledge Graph Creation and Comp

In [85]:
recommendation_paper_list

["A Benchmark to Understand the Role of Knowledge Graphs on Large Language Model's Accuracy for Question Answering on Enterprise SQL Databases",
 ' A Condensed Transition Graph Framework for Zero-shot Link Prediction with Large Language Models',
 ' A Preliminary Roadmap for LLMs as Assistants in Exploring, Analyzing, and Visualizing Knowledge Graphs',
 ' A Survey of Knowledge Enhanced Pre-trained Language Models',
 ' An Enhanced Prompt-Based LLM Reasoning Scheme via Knowledge Graph-Integrated Collaboration',
 ' AttacKG+:Boosting Attack Knowledge Graph Construction with Large Language Models',
 ' AutoRD: An Automatic and End-to-End System for Rare Disease Knowledge Graph Construction Based on Ontologies-enhanced Large Language Models',
 ' Automated Construction of Theme-specific Knowledge Graphs',
 ' Benchmarking Large Language Models in Complex Question Answering Attribution using Knowledge Graphs',
 ' Benchmarking the Abilities of Large Language Models for RDF Knowledge Graph Creation

In [8]:
custom_prompt2 = """You are a helpful assistant that can help me to find relevant research paper for my thesis project.
                    The topic of my thesis is 'Leveraging Large Language Models for Knowledge Graph Construction'.
                    I need you to read a text of research paper title and summary abstract and return me a list of research paper titles that might be relevant to my project.
                    I am interested in the topic of: 
                    - Using large language model for knowledge graph construction
                    - Build domian-specific knowledge graph with custom or industry-specific dataset
                    - Construction of knowledge graph using pre-defined ontologies and relationships
                    Return the response in this string format: "title_1; title_2; title_3, ..."
                    Only return the article title that are highly relevant to my research interest.
                    """

In [9]:
recommendation_paper_list = []
for i in range(len(paper_chunk_lst)):
    response = research_paper_recommendation(paper_chunk_lst[i], custom_prompt2)
    print(response.split(';'))
    recommendation_paper_list += response.split(';')

["A Benchmark to Understand the Role of Knowledge Graphs on Large Language Model's Accuracy for Question Answering on Enterprise SQL Databases", ' A Condensed Transition Graph Framework for Zero-shot Link Prediction with Large Language Models', ' A Graph-to-Text Approach to Knowledge-Grounded Response Generation in Human-Robot Interaction', ' A Multimodal Ecological Civilization Pattern Recommendation Method Based on Large Language Models and Knowledge Graph', ' A Preliminary Roadmap for LLMs as Assistants in Exploring, Analyzing, and Visualizing Knowledge Graphs', ' A knowledge representation approach for construction contract knowledge modeling', ' An Enhanced Prompt-Based LLM Reasoning Scheme via Knowledge Graph-Integrated Collaboration', ' AutoRD: An Automatic and End-to-End System for Rare Disease Knowledge Graph Construction Based on Ontologies-enhanced Large Language Models', ' Automated Construction of Theme-specific Knowledge Graphs', ' Benchmarking Large Language Models in Co

In [10]:
recommendation_paper_list

["A Benchmark to Understand the Role of Knowledge Graphs on Large Language Model's Accuracy for Question Answering on Enterprise SQL Databases",
 ' A Condensed Transition Graph Framework for Zero-shot Link Prediction with Large Language Models',
 ' A Graph-to-Text Approach to Knowledge-Grounded Response Generation in Human-Robot Interaction',
 ' A Multimodal Ecological Civilization Pattern Recommendation Method Based on Large Language Models and Knowledge Graph',
 ' A Preliminary Roadmap for LLMs as Assistants in Exploring, Analyzing, and Visualizing Knowledge Graphs',
 ' A knowledge representation approach for construction contract knowledge modeling',
 ' An Enhanced Prompt-Based LLM Reasoning Scheme via Knowledge Graph-Integrated Collaboration',
 ' AutoRD: An Automatic and End-to-End System for Rare Disease Knowledge Graph Construction Based on Ontologies-enhanced Large Language Models',
 ' Automated Construction of Theme-specific Knowledge Graphs',
 ' Benchmarking Large Language Mod