# ReadMe

This notebook is to connect and experiment with materials project [api](https://api.materialsproject.org/docs#/Materials%20Thermo/search_materials_thermo_phase_diagram__get)

In [30]:
import chainlit as cl
from langchain.llms import OpenAI
from langchain.chains import LLMChain, APIChain
from langchain.memory.buffer import ConversationBufferMemory
from dotenv import load_dotenv

load_dotenv()

True

In [31]:
from typing import Any, Dict, List, Optional, Sequence, Tuple
from urllib.parse import urlparse

from langchain_community.utilities.requests import TextRequestsWrapper
from langchain_core.callbacks import (
    AsyncCallbackManagerForChainRun,
    CallbackManagerForChainRun,
)
from langchain_core.language_models import BaseLanguageModel
from langchain_core.prompts import BasePromptTemplate
from langchain_core.pydantic_v1 import Field, root_validator

from langchain.chains.api.prompt import API_RESPONSE_PROMPT, API_URL_PROMPT
from langchain.chains.base import Chain
from langchain.chains.llm import LLMChain

In [55]:
class CustomAPIChain(APIChain):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    def process_api_url(self, url: str) -> str:
        # Custom logic to modify the API URL
        return url  # Modify this as needed

    def process_api_response(self, response: str) -> str:
        if len(response) > 1000:
            response = response[:1000]
        return response  # Modify this as needed

    @classmethod
    def from_llm_and_api_docs(
        cls,
        llm: BaseLanguageModel,
        api_docs: str,
        headers: Optional[dict] = None,
        api_url_prompt: BasePromptTemplate = API_URL_PROMPT,
        api_response_prompt: BasePromptTemplate = API_RESPONSE_PROMPT,
        limit_to_domains: Optional[Sequence[str]] = tuple(),
        **kwargs: Any,
    ) -> 'CustomAPIChain':
        get_request_chain = LLMChain(llm=llm, prompt=api_url_prompt)
        requests_wrapper = TextRequestsWrapper(headers=headers)
        get_answer_chain = LLMChain(llm=llm, prompt=api_response_prompt)
        return cls(
            api_request_chain=get_request_chain,
            api_answer_chain=get_answer_chain,
            requests_wrapper=requests_wrapper,
            api_docs=api_docs,
            limit_to_domains=limit_to_domains,
            **kwargs,
        )

    def _call(self, inputs: Dict[str, Any], run_manager: Optional[CallbackManagerForChainRun] = None) -> Dict[str, str]:
        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
        question = inputs[self.question_key]
        api_url = self.api_request_chain.predict(
            question=question,
            api_docs=self.api_docs,
            callbacks=_run_manager.get_child(),
        )
        api_url = api_url.strip()
        api_url = self.process_api_url(api_url)  # Apply custom URL processing
        if self.limit_to_domains and not _check_in_allowed_domain(api_url, self.limit_to_domains):
            raise ValueError(f"{api_url} is not in the allowed domains: {self.limit_to_domains}")
        api_response = self.requests_wrapper.get(api_url)
        api_response = self.process_api_response(api_response)  # Apply custom response processing
        answer = self.api_answer_chain.predict(
            question=question,
            api_docs=self.api_docs,
            api_url=api_url,
            api_response=api_response,
            callbacks=_run_manager.get_child(),
        )
        return {self.output_key: answer}

    async def _acall(self, inputs: Dict[str, Any], run_manager: Optional[AsyncCallbackManagerForChainRun] = None) -> Dict[str, str]:
        _run_manager = run_manager or AsyncCallbackManagerForChainRun.get_noop_manager()
        question = inputs[self.question_key]
        api_url = await self.api_request_chain.apredict(
            question=question,
            api_docs=self.api_docs,
            callbacks=_run_manager.get_child(),
        )
        api_url = api_url.strip()
        api_url = self.process_api_url(api_url)  # Apply custom URL processing
        if self.limit_to_domains and not _check_in_allowed_domain(api_url, self.limit_to_domains):
            raise ValueError(f"{api_url} is not in the allowed domains: {self.limit_to_domains}")
        api_response = await self.requests_wrapper.aget(api_url)
        api_response = self.process_api_response(api_response)  # Apply custom response processing
        answer = await self.api_answer_chain.apredict(
            question=question,
            api_docs=self.api_docs,
            api_url=api_url,
            api_response=api_response,
            callbacks=_run_manager.get_child(),
        )
        return {self.output_key: answer}
    def _check_in_allowed_domain(url: str, limit_to_domains: Sequence[str]) -> bool:
        """Check if a URL is in the allowed domains.

        Args:
            url (str): The input URL.
            limit_to_domains (Sequence[str]): The allowed domains.

        Returns:
            bool: True if the URL is in the allowed domains, False otherwise.
        """
        scheme, domain = _extract_scheme_and_domain(url)

        for allowed_domain in limit_to_domains:
            allowed_scheme, allowed_domain = _extract_scheme_and_domain(allowed_domain)
            if scheme == allowed_scheme and domain == allowed_domain:
                return True
        return False
    @property
    def input_keys(self) -> List[str]:
        """Expect input key.

        :meta private:
        """
        return [self.question_key]

    @property
    def output_keys(self) -> List[str]:
        """Expect output key.

        :meta private:
        """
        return [self.output_key]

    @root_validator(pre=True)
    def validate_api_request_prompt(cls, values: Dict) -> Dict:
        """Check that api request prompt expects the right variables."""
        input_vars = values["api_request_chain"].prompt.input_variables
        expected_vars = {"question", "api_docs"}
        if set(input_vars) != expected_vars:
            raise ValueError(
                f"Input variables should be {expected_vars}, got {input_vars}"
            )
        return values

    @root_validator(pre=True)
    def validate_limit_to_domains(cls, values: Dict) -> Dict:
        """Check that allowed domains are valid."""
        if "limit_to_domains" not in values:
            raise ValueError(
                "You must specify a list of domains to limit access using "
                "`limit_to_domains`"
            )
        if not values["limit_to_domains"] and values["limit_to_domains"] is not None:
            raise ValueError(
                "Please provide a list of domains to limit access using "
                "`limit_to_domains`."
            )
        return values

    @root_validator(pre=True)
    def validate_api_answer_prompt(cls, values: Dict) -> Dict:
        """Check that api answer prompt expects the right variables."""
        input_vars = values["api_answer_chain"].prompt.input_variables
        expected_vars = {"question", "api_docs", "api_url", "api_response"}
        if set(input_vars) != expected_vars:
            raise ValueError(
                f"Input variables should be {expected_vars}, got {input_vars}"
            )
        return values
    @property
    def _chain_type(self) -> str:
        return "api_chain"


In [33]:
from api_docs.materials_project_doc import material_api_docs
from langchain.prompts import PromptTemplate

In [80]:
api_url_template = """
Given the following API Documentation for Material Project's
API: {api_docs}
Your task is to construct the most efficient API URL to answer 
the user's question, ensuring the 
call is optimized to include only necessary information.
Question: {question} with license="BY-C" and _limit=10
API URL:
"""
api_url_prompt = PromptTemplate(input_variables=['api_docs', 'question'],
                                template=api_url_template)


In [81]:
api_response_template = """"
With the API Documentation for Material Platform's official API: {api_docs} 
and the user question: {question} in mind,
and given this API URL: {api_url} for querying, here is the 
response from Material Project's API: {api_response}. 
Please provide a summary that directly addresses the user's question based on the response, 
omitting technical details like response format, and 
focusing on delivering the answer with clarity and conciseness, 
as if a material scientist is providing this information.
Summary:
"""

api_response_prompt = PromptTemplate(input_variables=['api_docs',
                                                      'question', 
                                                      'api_url',
                                                      'api_response'],
                                     template=api_response_template)

In [82]:
llm = OpenAI(model='gpt-3.5-turbo-instruct',
             temperature=0)

In [83]:
api_chain = APIChain.from_llm_and_api_docs(
        headers={'X-API-KEY': 'cHlaLNDodbKlfGkbYUQwCkOzE6GK8zdD'},
        llm=llm,
        api_docs=material_api_docs,
        api_url_prompt=api_url_prompt,
        api_response_prompt=api_response_prompt,
        verbose=True,
        limit_to_domains=["https://api.materialsproject.org/materials/core/"]
    )

In [84]:
api_chain.run("List me everything relevant to TiO2")



[1m> Entering new APIChain chain...[0m
[32;1m[1;3mhttps://api.materialsproject.org/materials/core/?formula=TiO2&license=BY-C&_all_fields=true[0m
2024-04-16 22:28:32 - error_code=rate_limit_exceeded error_message='Request too large for gpt-3.5-turbo-instruct in organization org-r3N81HmUWAbtH2ge71ducYhe on tokens per min (TPM): Limit 90000, Requested 314114. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.com/account/rate-limits to learn more.' error_param=None error_type=tokens message='OpenAI API error received' stream_error=False
2024-04-16 22:28:32 - Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Request too large for gpt-3.5-turbo-instruct in organization org-r3N81HmUWAbtH2ge71ducYhe on tokens per min (TPM): Limit 90000, Requested 314114. The input or output tokens must be reduced in order to run successfully. Visit https://platform.openai.c

KeyboardInterrupt: 