In [None]:
from pyaltmetric import Altmetric, Citation
from pybliometrics.scopus import PlumXMetrics
from pyaltmetric import Altmetric
from habanero import Crossref
import os
from langchain.llms import OpenAI
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import TextLoader
from langchain.vectorstores import Chroma
from langchain.agents.agent_toolkits import (
    create_vectorstore_agent,
    VectorStoreToolkit,
    VectorStoreInfo
)
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.agents import Tool
from langchain.agents import AgentType
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
from langchain.utilities import SerpAPIWrapper
from langchain.agents import initialize_agent
from langchain.chains import ConversationChain
from langchain.vectorstores import LanceDB
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.agents import AgentExecutor
from langchain_core.messages import AIMessage, HumanMessage
from langchain.agents import create_openai_functions_agent
from langchain import hub
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain.agents import create_openai_functions_agent
from langchain.agents import AgentExecutor
from langchain.tools.retriever import create_retriever_tool
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
import re
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers.multi_query import MultiQueryRetriever
import math
from typing import List, Dict
from langchain.schema import Document
from langchain.retrievers import BaseRetriever
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
import math
import asyncio
import aiohttp
from typing import List, Dict
from langchain.schema import Document
from langchain.retrievers import BaseRetriever
from langchain.callbacks.manager import CallbackManagerForRetrieverRun

In [None]:
# Define the weights for each metric
weights = {
    'citation': 0.4,
    'socialmedia': 0.3,
    'capture': 0.2,
    'mention': 0.1
}

def calculate_scores(document_metrics: Dict[str, float]) -> float:
    score = sum(weights[key] * math.log(1 + document_metrics.get(key, 0)) for key in weights)
    return score

class RefRetriever(BaseRetriever):
    def __init__(self, db, k: int, weight: float = 0.5):
        self.db = db
        self.k = k
        self.weight = weight
        self.metadata_cache = {}  # For caching fetched metadata

    async def fetch_metadata(self, session, document_identifier: str) -> Dict[str, float]:
        """Fetch metadata for a single document asynchronously."""
        # Check if metadata is already in cache
        if document_identifier in self.metadata_cache:
            return self.metadata_cache[document_identifier]

        # Initialize default metrics
        metrics = {'citation': 0, 'socialmedia': 0, 'capture': 0, 'mention': 0}

        try:
            # Fetch data from Crossref
            cr_url = f"https://api.crossref.org/works?query={document_identifier}"
            async with session.get(cr_url) as resp:
                cr_result = await resp.json()
                items = cr_result.get('message', {}).get('items', [])
                if not items:
                    # Cache and return default metrics if no items found
                    self.metadata_cache[document_identifier] = metrics
                    return metrics
                doi = items[0]['DOI']

                # Fetch data from PlumXMetrics or Altmetric
                # For the sake of example, let's assume we have an async function to fetch PlumXMetrics
                # You would need to implement these async fetches as per the API's documentation

                # Fetch PlumXMetrics
                plumx_url = f"https://api.elsevier.com/analytics/plumx/doi/{doi}"
                headers = {'Accept': 'application/json'}  # Include necessary headers and API keys
                async with session.get(plumx_url, headers=headers) as resp:
                    if resp.status == 200:
                        plumx_data = await resp.json()
                        # Parse plumx_data to extract metrics
                        # This is a placeholder; you need to adjust it based on the actual API response
                        metrics['citation'] = plumx_data.get('citation', {}).get('total', 0)
                        metrics['socialmedia'] = plumx_data.get('socialMedia', {}).get('total', 0)
                        metrics['capture'] = plumx_data.get('capture', {}).get('total', 0)
                        metrics['mention'] = plumx_data.get('mention', {}).get('total', 0)
                    else:
                        # If PlumX fails, fallback to Altmetric
                        altmetric_url = f"https://api.altmetric.com/v1/doi/{doi}"
                        async with session.get(altmetric_url) as resp:
                            if resp.status == 200:
                                altmetric_data = await resp.json()
                                # Parse altmetric_data to extract metrics
                                metrics['citation'] = altmetric_data.get('cited_by_posts_count', 0)
                                metrics['socialmedia'] = altmetric_data.get('cited_by_tweeters_count', 0)
                                metrics['capture'] = altmetric_data.get('readers_count', 0)
                                metrics['mention'] = altmetric_data.get('cited_by_feeds_count', 0)
        except Exception as e:
            print(f"Error fetching metadata for {document_identifier}: {e}")
            # Keep default metrics in case of any error

        # Cache the fetched metrics
        self.metadata_cache[document_identifier] = metrics
        return metrics

    async def fetch_all_metadata(self, document_identifiers: List[str]) -> Dict[str, Dict[str, float]]:
        """Fetch metadata for all documents concurrently."""
        metadata_results = {}
        async with aiohttp.ClientSession() as session:
            tasks = []
            for identifier in document_identifiers:
                task = asyncio.ensure_future(self.fetch_metadata(session, identifier))
                tasks.append(task)
            results = await asyncio.gather(*tasks)
            for identifier, metrics in zip(document_identifiers, results):
                metadata_results[identifier] = metrics
        return metadata_results

    async def _aget_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun = None
    ) -> List[Document]:
        """Asynchronous method to retrieve relevant documents."""
        # Fetch 3k documents based on embedding similarity
        results_with_scores = self.db.similarity_search_with_score(query, k=self.k * 3)

        # Extract unique document identifiers
        document_identifiers = []
        documents = []
        for doc, similarity_score in results_with_scores:
            identifier = doc.metadata.get('source', '')
            if identifier and identifier not in document_identifiers:
                document_identifiers.append(identifier)
                documents.append((doc, similarity_score))

        # Fetch metadata for all documents
        metadata_results = await self.fetch_all_metadata(document_identifiers)

        # Combine scores
        results_with_combined_scores = []
        for (doc, similarity_score) in documents:
            d = doc.metadata.get('source', '')
            # Retrieve metrics for the document
            metrics = metadata_results.get(d, {'citation': 0, 'socialmedia': 0, 'capture': 0, 'mention': 0})
            # Calculate the metrics score
            metrics_score = calculate_scores(metrics)
            # Combine the similarity score and the metrics score
            final_score = similarity_score + self.weight * metrics_score
            results_with_combined_scores.append((doc, final_score))

        # Sort the documents based on the combined score
        results_with_combined_scores.sort(key=lambda x: x[1], reverse=True)

        # Return the top k documents
        top_documents = [doc for doc, _ in results_with_combined_scores[:self.k]]
        return top_documents

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun = None
    ) -> List[Document]:
        """Synchronous method that calls the asynchronous method."""
        return asyncio.run(self._aget_relevant_documents(query, run_manager=run_manager))


In [None]:
# Load the embedding function and vector store
embedding_function = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")

vector = FAISS.load_local("faiss_index", embedding_function, allow_dangerous_deserialization=True)

# Instantiate RefRetriever instead of vector.as_retriever
k = 5  # Number of documents to retrieve
retriever = RefRetriever(db=vector, k=k, weight=0.5)

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.01, openai_api_key='YOUR_OPENAI_API_KEY', openai_api_base='https://api.chatgptid.net/v1')

retriever_MQ = MultiQueryRetriever.from_llm(
    retriever=retriever, llm=llm
)

retriever_tool = create_retriever_tool(
    retriever_MQ,
    "literature_search",
    "Search for information about input questions. For any questions about battery recycle, you must use this tool!",
)

tools = [retriever_tool]

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            '''
            You are recognized as a leading authority in the field of battery recycling, tasked with the pivotal role of providing expert responses to inquiries. Your responses must embody the highest standards of accuracy, comprehensiveness, and depth of knowledge. Approach each question with a methodical and thoughtful mindset, ensuring your answers are:

            Informative: Furnish detailed insights, drawing upon the most relevant facts and figures.
            Correct: Ensure factual accuracy in every aspect of your response.
            Knowledgeable: Display a profound understanding of the subject matter, including advanced concepts and recent advancements in the field of battery recycling.
            Holistic: Offer a well-rounded perspective, considering various facets of the topic at hand.
            As you address each question, please adhere to the following guidelines:

            Cite Examples: When referencing data or examples from the provided literature, include comprehensive information to contextualize your points effectively. Clearly indicate these instances by stating, "For example," followed by a detailed explanation.
            Stay On Topic: Concentrate solely on the query posed. Your reply should be closely aligned with the question, avoiding tangential or unrelated content.
            Format Your Answer: Present your response in a structured manner, using either bullet points or numbered lists for clarity and ease of understanding.
            Before responding, take a moment to center yourself. Breathe deeply, and proceed with a step-by-step analytical approach, ensuring that your expertise shines through in a manner that is both engaging and enlightening.

            ''',
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

agent = create_openai_functions_agent(llm, tools, prompt)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, reduce_k_below_max_tokens=True)

input_question = "What are the recycling methods for spent lithium-ion batteries?"

breakdown_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", '''
        You are an expert in battery recycling, and your task is to answer user's question professionally by first planning what the answer should be composed of.
        You need to consider comprehensive aspects in battery recycle to reach the final answers, however your answers must only be in material science, focus on the scientific facts and methodology.
        Your job is to search for information in literature review and identify the key aspects that are answers to the input question.
        Make sure you are directly answering the subject of the input question(for example: method, definition, components, mechanism), use terminology to describe the aspects
        Your answer should be in planning step where you need to set up the framework of the final answers.
        ......
        '''),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)


agent_breakdown = create_openai_functions_agent(llm, tools, breakdown_prompt)

agent_breakdown_executor = AgentExecutor(agent=agent_breakdown, tools=tools, verbose=True, reduce_k_below_max_tokens=True)

breakdown_a = agent_breakdown_executor.invoke({"input": input_question, 'chat_history': []})

thoughts = breakdown_a['output']

step_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", '''
        As an expert in battery recycling, your task is to provide professional answers to user questions. When addressing queries about battery recycling, follow these detailed steps:

        Informative: Provide detailed insights, drawing upon the most relevant facts and figures about battery recycling.
        Correct: Ensure factual accuracy in every aspect of your response, verifying information against current industry standards and research findings.
        Knowledgeable: Demonstrate a profound understanding of the subject matter, including advanced concepts and recent advancements in the field of battery recycling.
        Additionally, integrate the following guidelines into your responses:

        Stay On Topic: Concentrate solely on the query posed. Ensure that your reply is closely aligned with the question, avoiding tangential or unrelated content.
        Literature Review: Before responding, conduct a brief literature search to identify any relevant methods or perspectives that are not commonly discussed. This should include checking the literature search tool to supplement your answer with any missing information.
        Analytical Approach: Take a moment to center yourself before responding. Breathe deeply and proceed with a step-by-step analysis, ensuring that your expertise is communicated in a manner that is both engaging and enlightening. Use this approach to critically assess and verify different methods of battery recycling discussed in your literature review.

        Describe all the methods and process for the given query, including both traditional techniques and any innovative approaches (especially acronym) in the literature search to make sure you are not neglecting any method.

        '''),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

agent_step = create_openai_functions_agent(llm, tools, step_prompt)

agent_step_executor = AgentExecutor(agent=agent_step, tools=tools, verbose=True, reduce_k_below_max_tokens=True)

thought = agent_step_executor.invoke({"input": "To address the question " + input_question + ", first list all innovative methods in the literature search which are not mentioned in the previous chat history, and then answer the question", 'chat_history': [HumanMessage(content=breakdown_a['input']), AIMessage(content=breakdown_a['output'])]})

thoughts += thought['output']

chat_history = [HumanMessage(content=thought['input']), AIMessage(content=thought['output'])]

# Load the second vector store and instantiate RefRetriever
vector2 = FAISS.load_local("faiss_index2", embedding_function, allow_dangerous_deserialization=True)

k2 = 10
retriever2 = RefRetriever(db=vector2, k=k2, weight=0.5)

llm2 = ChatOpenAI(model="gpt-4", temperature=0.01, openai_api_key='YOUR_OPENAI_API_KEY')

retriever_MQ2 = MultiQueryRetriever.from_llm(
    retriever=retriever2, llm=llm2
)

retriever_tool2 = create_retriever_tool(
    retriever_MQ2,
    "literature_search",
    "Search for information about input questions. For any questions about battery recycle, you must use this tool!",
)

tools2 = [retriever_tool2]

critique_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", '''

        The task requires a detailed critique of an existing explanation concerning technologies for recycling lithium-ion batteries. This critique is intended to serve as guidance for another individual tasked with revising the original answer. The critique must rigorously address specific points:

        Fact-Checking:
        Scrutinize the original answer for discrepancies or outdated information by cross-verifying each mentioned detail against contemporary research and findings in the lithium-ion battery recycling domain. Highlight inaccuracies and ensure that every fact aligns with the most recent studies and technological advancements.
        Supplement:
        Dive into the existing literature review to identify and incorporate missing crucial information not covered in the original answer, focus only on material science.
        Avoiding Hallucinations:
        Identify any baseless assertions, speculative statements, or inaccuracies within the original response. Highlight instances where the presented information lacks direct support from the literature review or recognized sources in the field.
        Accuracy and Precision:
        Evaluate the technical accuracy and precision of the original answer's descriptions of recycling methods. Identify and correct inaccuracies or over-generalizations, particularly regarding process efficiencies, environmental impacts, and the validation of technological advancements through recent studies.
        Literature Alignment:
        Assess the degree to which the original answer reflects the insights and conclusions found within the provided literature review. Pinpoint areas where the original response fails to capture the importance of specific studies, technologies, or findings emphasized in the literature.
        Instructions for Revision:
        The individual responsible for revising the original answer should address all issues identified in this critique. The revised answer must correct any inaccuracies, align with the latest research, and incorporate significant findings from the provided literature accurately.
        The revised answer should be factual, precise, and devoid of speculative content, adhering closely to established sources within the field.

        The ultimate goal is for the revised answer to be thoroughly informed by this critique, resulting in a comprehensive and accurate response that meets academic standards for inclusion in a literature review on lithium-ion battery recycling.

        Provide your answer to the questions as well as your revision to the original question.

        '''),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

agent_critique = create_openai_functions_agent(llm2, tools2, critique_prompt)

agent_critique_executor = AgentExecutor(agent=agent_critique, tools=tools2, verbose=True, reduce_k_below_max_tokens=True)

critique = agent_critique_executor.invoke({"input": "Here's the original question you need to provide insights for: " + input_question + " You need to provide your answers and revision to the answer in the chat history.", 'chat_history': chat_history})

chat_history = [HumanMessage(content=thought['input']), AIMessage(content=thought['output']), HumanMessage(content=critique['input']), AIMessage(content=critique['output'])]

revise_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", '''

        You are an expert in battery recycling, and your task is to answer user's question professionally.
        Given the feedback on your initial answer regarding battery recycling, the goal now is to revise your response to make it more informative, accurate, and research-backed.
        You are now giving the final answer, make sure the final answer is based on facts and literature.

        '''),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

agent_revise = create_openai_functions_agent(llm, tools, revise_prompt)

agent_revise_executor = AgentExecutor(agent=agent_revise, tools=tools, verbose=True, reduce_k_below_max_tokens=True)

update_answer = agent_revise_executor.invoke({'input': "Based on the original answer you provided about battery recycling, I've identified several areas that need revision for a more accurate and comprehensive response: " + critique['output'] + " Please answer the question based on literature search and revision.", 'chat_history': chat_history})

chat_history = [HumanMessage(content=thought['input']), AIMessage(content=thought['output'])]

stop_signal = "[STOP]"

while True:
    critique = agent_critique_executor.invoke({"input": "Here's the original question you need to provide insights for: " + input_question + " You need to provide your answers and revision to the answer in the chat history. If you think the answer is correct and accurate, only respond: [STOP]", 'chat_history': chat_history})

    if stop_signal in critique['output']:
        break

    if len(chat_history) > 2:
        chat_history = chat_history[2:]
        chat_history.append(HumanMessage(content=critique['input']))
        chat_history.append(AIMessage(content=critique['output']))

    update_answer = agent_revise_executor.invoke({'input': "Based on the original answer you provided about battery recycling, I've identified several areas that need revision for a more accurate and comprehensive response: " + critique['output'] + " Please answer the question based on literature search and revision.", 'chat_history': chat_history})

    chat_history = [HumanMessage(content=critique['input']), AIMessage(content=critique['output']), HumanMessage(content=update_answer['input']), AIMessage(content=update_answer['output'])]
