In [1]:
#!playwright install
#!playwright install-deps

In [2]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.document_loaders import AsyncChromiumLoader
import json
import asyncio
import logging
from typing import AsyncIterator, Iterator, List, Optional

from langchain_core.documents import Document

import os
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

import nest_asyncio
nest_asyncio.apply()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
# loader = WebBaseLoader("https://materiahunter.com/faq")
# loader = AsyncChromiumLoader(["https://materiahunter.com/faq"])


urls = [
    f"https://m2pote0hr6y7aw8dp.a1.typesense.net/collections/rules/documents/search?q=*&limit=250&page={i}&facet_by=serials,tags&query_by=serials,+tags,+title,+body" for i in range(1, 5)
]
extra_http_headers = {
    "x-typesense-api-key": "0RbWmbctto4gEdrHVPeF8ltS1Bv16yyJ"
}

class MyACLoader(AsyncChromiumLoader):
    async def ascrape_playwright(self, url: str) -> str:
        """
        Asynchronously scrape the content of a given URL using Playwright's async API.

        Args:
            url (str): The URL to scrape.

        Returns:
            str: The scraped HTML content or an error message if an exception occurs.

        """
        from playwright.async_api import async_playwright

        # logger.info("Starting scraping...")
        results = ""
        async with async_playwright() as p:
            browser = await p.chromium.launch(headless=self.headless)
            try:
                page = await browser.new_page(user_agent=self.user_agent, extra_http_headers=extra_http_headers)
                await page.goto(url)
                results = await page.content()  # Simply get the HTML content
                # logger.info("Content scraped")
                a_handle = await page.evaluate_handle("document.body")
                result_handle = await page.evaluate_handle("body => body.querySelector('pre').innerHTML", a_handle)
                results = await result_handle.json_value()
                await result_handle.dispose()
            except Exception as e:
                results = f"Error: {e}"
            await browser.close()
        return json.loads(results)["hits"]

    def lazy_load(self) -> Iterator[Document]:
        """
        Lazily load text content from the provided URLs.

        This method yields Documents one at a time as they're scraped,
        instead of waiting to scrape all URLs before returning.

        Yields:
            Document: The scraped content encapsulated within a Document object.

        """
        for url in self.urls:
            html_content = asyncio.run(self.ascrape_playwright(url))
            # metadata = {"source": url}
            for content in html_content:
                content = content["document"]
                metadata = {
                    "title": content['title'],
                    "source": content['source']
                }
                yield Document(page_content=f"In reference to the card {content['title']}: {content['body']}", metadata=metadata)


loader = MyACLoader(urls)

docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
docs

[Document(metadata={'title': '[23-080R] Odin', 'source': 'https://fftcg.square-enix-games.com/en/play-article/faq'}, page_content='In reference to the card [23-080R] Odin: It is not possible to remove the Lightning element card discarded to pay the cost for this ability from the game.'),
 Document(metadata={'title': '[23-081C] Puppetmaster', 'source': 'https://fftcg.square-enix-games.com/en/play-article/faq'}, page_content='In reference to the card [23-081C] Puppetmaster: - You still need to select a Forward card, even if you control five or more Backup cards.\n\n- The ability’s effects are rendered invalid if the Forward selected is no longer a valid target,  due to having moved away from the field by the point when effects are resolved etc., even if you control five or more Backup cards.'),
 Document(metadata={'title': '[23-082H] King', 'source': 'https://fftcg.square-enix-games.com/en/play-article/faq'}, page_content='In reference to the card [23-082H] King: - When using this abilit

In [5]:
from langchain_ollama import OllamaEmbeddings

embedding = OllamaEmbeddings(model="mxbai-embed-large")

In [6]:
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(documents=docs, embedding=embedding)
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x7f0f79c22a50>

In [7]:
# from langchain_qdrant import QdrantVectorStore
# from qdrant_client import QdrantClient
# from qdrant_client.http.models import Distance, VectorParams

# client = QdrantClient(":memory:")

# client.create_collection(
#     collection_name="demo_collection",
#     vectors_config=VectorParams(size=3072, distance=Distance.COSINE),
# )

# vectorstore = QdrantVectorStore(
#     client=client,
#     collection_name="demo_collection",
#     embedding=embedding,
# )
# vectorstore.add_documents(docs)
# vectorstore

In [8]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever(
    # search_type="mmr",
    # search_kwargs={'lambda_mult': 1.0}
    # search_type="similarity_score_threshold",
    # search_kwargs={'score_threshold': 0.35}
)
retriever

VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7f0f79c22a50>, search_kwargs={})

In [9]:
from langchain_ollama import OllamaLLM

# llm = OllamaLLM(model="qwen2.5:14b")
# llm = OllamaLLM(model="gemma2:27b", temperature=0.0)
llm = OllamaLLM(model="llama3.2")
# llm = OllamaLLM(model="phi3.5")

In [10]:
from langchain import hub
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)



In [11]:
rag_chain.invoke("Tell me about the crystals and the card 23-038H Lady Lilith")

"The crystal for card 23-038H Lady Lilith is paid at the point when effects are resolved, and after payment, her auto ability to select one Forward and gain control of it is added to the stack. The cost of using this special ability is not specified in the context provided. I don't know if there are any other costs associated with paying the crystal for Lady Lilith."

In [12]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use five sentences maximum and keep the "
    "answer concise. Be aware that same name can refer to multiple different cards."
    "\n\n"
    "{context}"
)

prompt2 = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt2)
rag_chain2 = create_retrieval_chain(retriever, question_answer_chain)

In [13]:
rag_chain2.invoke({"input": "Tell me about the crystals and the card 23-038H Lady Lilith"})

{'input': 'Tell me about the crystals and the card 23-038H Lady Lilith',
 'context': [Document(metadata={'source': 'https://fftcg.square-enix-games.com/na/play-article/faq', 'title': '[23-038H] Lady Lilith'}, page_content='In reference to the card [23-038H] Lady Lilith: The Crystal is paid at the point when effects are resolved. The auto ability to select one Forward and gain control of it is then added to the stack after payment is made.'),
  Document(metadata={'source': 'https://fftcg.square-enix-games.com/na/play-article/faq', 'title': '17-113L - Glaciela Wezette'}, page_content='In reference to the card 17-113L - Glaciela Wezette: You can use the special ability by paying 1 crystal instead of discarding a card of the same name. When doing so, you much pay other costs such as CP, etc.'),
  Document(metadata={'source': 'https://fftcg.square-enix-games.com/na/play-article/faq', 'title': '20-017R - Palom'}, page_content='In reference to the card 20-017R - Palom: - Crystals are paid at 

In [14]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever, chain_type="stuff",
        # chain_type_kwargs=chain_type_kwargs,
        return_source_documents=True)

In [15]:
qa_chain.invoke("Tell me about the crystals and the card 23-038H Lady Lilith")

{'query': 'Tell me about the crystals and the card 23-038H Lady Lilith',
 'result': 'Based on the provided context, for the card [23-038H] Lady Lilith: The Crystal, payment of crystals is made at the point when effects are resolved. After payment is made, the auto ability to select one Forward and gain control of it is then added to the stack.',
 'source_documents': [Document(metadata={'source': 'https://fftcg.square-enix-games.com/na/play-article/faq', 'title': '[23-038H] Lady Lilith'}, page_content='In reference to the card [23-038H] Lady Lilith: The Crystal is paid at the point when effects are resolved. The auto ability to select one Forward and gain control of it is then added to the stack after payment is made.'),
  Document(metadata={'source': 'https://fftcg.square-enix-games.com/na/play-article/faq', 'title': '17-113L - Glaciela Wezette'}, page_content='In reference to the card 17-113L - Glaciela Wezette: You can use the special ability by paying 1 crystal instead of discarding

In [16]:
qa_chain.invoke("Can I remove the Lightning element card discarded to pay the cost for Odin as part of its ability?")

{'query': 'Can I remove the Lightning element card discarded to pay the cost for Odin as part of its ability?',
 'result': 'No, according to the context provided in reference to the card [23-080R] Odin, it is not possible to remove the Lightning element card discarded to pay the cost for this ability from the game.',
 'source_documents': [Document(metadata={'source': 'https://fftcg.square-enix-games.com/en/play-article/faq', 'title': '[23-080R] Odin'}, page_content='In reference to the card [23-080R] Odin: It is not possible to remove the Lightning element card discarded to pay the cost for this ability from the game.'),
  Document(metadata={'source': 'https://fftcg.square-enix-games.com/na/play-article/faq', 'title': '18-024C - Shiva'}, page_content='In reference to the card 18-024C - Shiva: If the additional cost is paid when cast, 10 cards in the break zone are removed from the game at the same time the CP are paid.\n\nIt is not possible to pay the additional cost in situations wher

In [17]:
qa_chain.invoke("Can cards discarded to pay the cost for Odin as part of its ability?")

{'query': 'Can cards discarded to pay the cost for Odin as part of its ability?',
 'result': "Based on the provided context, I don't know if cards discarded to pay the cost for Odin as part of its ability can be removed from the game. The first reference to Odin mentions that it is not possible to remove the Lightning element card discarded to pay the cost for this ability from the game, but it does not explicitly state whether other types of discarded cards are affected by this rule.",
 'source_documents': [Document(metadata={'source': 'https://fftcg.square-enix-games.com/en/play-article/faq', 'title': '[23-080R] Odin'}, page_content='In reference to the card [23-080R] Odin: It is not possible to remove the Lightning element card discarded to pay the cost for this ability from the game.'),
  Document(metadata={'source': 'https://fftcg.square-enix-games.com/na/play-article/faq', 'title': '18-024C - Shiva'}, page_content='In reference to the card 18-024C - Shiva: If the additional cost 

In [18]:
qa_chain.invoke("Can play a 6th backup?")

{'query': 'Can play a 6th backup?',
 'result': "According to the context, it appears that playing a 6th Backup is not possible when controlling exactly 5 Backups, because of card 17-128L - Maria. However, without more specific information about other cards or abilities that may affect this situation, I don't know for certain if there are any exceptions or alternative ways to play another Backup in this scenario.",
 'source_documents': [Document(metadata={'source': 'https://fftcg.square-enix-games.com/na/play-article/faq', 'title': 'If I already control 5 Backups, how can I play another one onto the field?'}, page_content="In reference to the card If I already control 5 Backups, how can I play another one onto the field?: You can only have 5 Backups on your field at a given time but there are some abilities that can break Backups or return them to your hand in order to play different ones. Certain Backups like 1-120C Monk also need to be put into the Break Zone as part of their ability'