In [None]:
!pip install unstructured selenium

In [2]:
## Loading Chat Model
from langchain_groq import ChatGroq
groq_model=ChatGroq(model="llama-3.1-8b-instant")

## Loading Embedding Model
from langchain_huggingface import HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from langchain.document_loaders import SeleniumURLLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain import PromptTemplate

urls = ['https://beebom.com/what-is-nft-explained/',
        'https://beebom.com/how-delete-spotify-account/', 
        'https://beebom.com/how-download-gif-twitter/', 
        'https://beebom.com/how-use-chatgpt-linux-terminal/', 
        'https://beebom.com/how-delete-spotify-account/', 
        'https://beebom.com/how-save-instagram-story-with-music/', 
        'https://beebom.com/how-install-pip-windows/', 
        'https://beebom.com/how-check-disk-usage-linux/']

In [7]:
loader = SeleniumURLLoader(urls=urls) 
docs_not_splitted = loader.load()

In [8]:
len(docs_not_splitted)

8

In [None]:
# we split the documents into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) 
docs = text_splitter.split_documents(docs_not_splitted)

In [9]:
len(docs)

105

In [12]:
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone(api_key="pcsk_6B7Uez_S2jqvMMHdU2c7SpVjHtMFrXibiRejE1VfKewM8mTAFUXyF2hZ4U5N3gKEdAvxdT")

index_name = "customer-chatbot"

pc.create_index(
    name=index_name,
    dimension=384, 
    metric="cosine", 
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

{
    "name": "customer-chatbot",
    "metric": "cosine",
    "host": "customer-chatbot-scup0jf.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "vector_type": "dense",
    "dimension": 384,
    "deletion_protection": "disabled",
    "tags": null
}

In [13]:
index = pc.Index(index_name)

In [14]:
from uuid import uuid4
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embedding_model)

uuids = [str(uuid4()) for _ in range(len(docs))]
vector_store.add_documents(documents=docs, ids=uuids)

['ebbcef45-f746-4a34-b711-019214842254',
 'caf0e8a0-7b39-4647-9002-540eff4d1e87',
 'ba6c2d94-0a1f-4d60-bf30-6a03a18f3284',
 '5380f33f-b379-4c4a-80a8-3b4db0f7e553',
 'a5ee492b-8719-4e2f-97b8-10b7cd235f26',
 '022190ae-4083-41c2-8c8e-30a9eefc6bdd',
 '78243018-4b27-4dd5-a53f-292d326bb6bb',
 'd069a714-12dc-4ab4-a084-6c42755491d7',
 '8bacb1de-ab7e-4978-a610-9e998681dcde',
 'f2671ecb-5aa8-468d-b63a-8bf75cd5f030',
 'a0eff8f8-6f35-4a15-a3d4-588b54099fb1',
 'a1bf3741-7d0a-458b-93af-5a4655cd5c8d',
 '8a8afd35-ccb7-4fda-a04e-94b5d535f4a9',
 '316a8768-3c7c-49fd-aa79-e2c74a944024',
 'c40b1684-9061-445d-992f-e2c5c419bcbe',
 '7d9dea48-2a2f-4250-843f-2f76911e6e97',
 'db5a3875-d7a3-4da8-9755-246d13a2ba1a',
 'f563aa65-b6b0-4f97-b8fd-7c29f8da1b45',
 'ce3a0892-4537-4997-8833-4fdbfbcff0a0',
 'b438e6a8-3a06-4b60-8ee2-3002047ae57e',
 '4227bf4d-3af3-4a71-a21d-ec11cd9852c0',
 '7678c773-19fc-4862-a21b-69ff135fcebb',
 '57cfdbd5-7c2a-4fcd-a539-1950aa464086',
 'e982d5db-4746-46e5-ab03-9983c41a2a3d',
 'fb8d7f57-80fc-

In [15]:
print(index.describe_index_stats())

{'dimension': 384,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 105}},
 'total_vector_count': 105,
 'vector_type': 'dense'}


In [21]:
retriever = vector_store.as_retriever()
ret_docs = retriever.get_relevant_documents("how to check disk usage in linux?")

In [23]:
ret_docs
print(len(ret_docs))

4


In [20]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

compressor = LLMChainExtractor.from_llm(groq_model) 
compression_retriever = ContextualCompressionRetriever(
    base_compressor = compressor,
    base_retriever = retriever
)

In [22]:
compresses_ret_docs = compression_retriever.get_relevant_documents("how to check disk usage in linux?")
len(compresses_ret_docs)

4

In [28]:
template = """You are an exceptional customer support chatbot that gently answer questions.
You know the following context information: 
{chunks_formatted}
Answer to the following question from a customer. Use only information from the previous context information. Do not invent stuff.
Question: {query}
Answer: 
"""


prompt = PromptTemplate( input_variables=["chunks_formatted", "query"], template=template)


In [26]:
query = "How to check disk usage in linux?"
results = vector_store.similarity_search(
    query, k=2
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* Home > Tech > How to Check Disk Usage in Linux (4 Methods)

How to Check Disk Usage in Linux (4 Methods)

Beebom Staff

Updated: December 19, 2023

Comments 0

Share

Copied

There may be times when you need to download some important files or transfer some photos to your Linux system, but face a problem of insufficient disk space. You head over to your file manager to delete the large files which you no longer require, but you have no clue which of them are occupying most of your disk space. In this article, we will show some easy methods to check disk usage in Linux from both the terminal and the GUI application.

Table of Contents

Check Disk Space Using the df Command

In Linux, there are many commands to check disk usage, the most common being the df command. The df stands for “Disk Filesystem” in the command, which is a handy way to check the current disk usage and the available disk space in Linux. The syntax for the df command in Linux is as follows:

df <options> <file_syste

In [29]:
retrieved_chunks = [doc.page_content for doc in results]
chunks_formatted = "\n\n".join(retrieved_chunks)
prompt_formatted = prompt.format(chunks_formatted=chunks_formatted, query=query)

In [31]:
prompt_formatted

'You are an exceptional customer support chatbot that gently answer questions.\nYou know the following context information: \nHome > Tech > How to Check Disk Usage in Linux (4 Methods)\n\nHow to Check Disk Usage in Linux (4 Methods)\n\nBeebom Staff\n\nUpdated: December 19, 2023\n\nComments 0\n\nShare\n\nCopied\n\nThere may be times when you need to download some important files or transfer some photos to your Linux system, but face a problem of insufficient disk space. You head over to your file manager to delete the large files which you no longer require, but you have no clue which of them are occupying most of your disk space. In this article, we will show some easy methods to check disk usage in Linux from both the terminal and the GUI application.\n\nTable of Contents\n\nCheck Disk Space Using the df Command\n\nIn Linux, there are many commands to check disk usage, the most common being the df command. The df stands for “Disk Filesystem” in the command, which is a handy way to che

In [32]:
answer = groq_model.invoke(prompt_formatted).content 
print(answer)

You have a couple of options to check disk usage in Linux. 

One of the most common methods is to use the df command. You can use it in the terminal, and the syntax is: df <options> <file_system>. This will give you the current disk usage and the available disk space. 

Alternatively, you can use GUI tools like GDU Disk Usage Analyzer or the Gnome Disks Tool. To install GDU Disk Usage Analyzer, you can use the command: `sudo snap install gdu-disk-usage-analyzer`. For Gnome Disk Utility, you can use: `sudo apt-get -y install gnome-disk-utility`.

These tools can help you easily monitor disk usage and identify which files or partitions are occupying the most space.


## FewShotPromptTemplate

In [47]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate

examples = [
{"animal": "lion", "habitat": "savanna"},
{"animal": "polar bear", "habitat": "Arctic ice"}, 
{"animal": "elephant", "habitat": "African grasslands"}
]

example_template = """
Animal : {animal}
Habitat : {habitat}
"""
example_prompt = PromptTemplate(
    input_variable = ["animal","habitat"],
    template = example_template
)

dynamic_prompt = FewShotPromptTemplate(
    examples = examples,
    example_prompt = example_prompt,
    prefix="Identify the habitat of the given animal",
    suffix="Animal: {input}\nHabitat:", 
    input_variables=["input"],
    example_separator="\n\n"
)

template = dynamic_prompt.invoke({"input":"peacock"})
print(template)


text='Identify the habitat of the given animal\n\n\nAnimal : lion\nHabitat : savanna\n\n\n\nAnimal : polar bear\nHabitat : Arctic ice\n\n\n\nAnimal : elephant\nHabitat : African grasslands\n\n\nAnimal: peacock\nHabitat:'


In [39]:
chain = dynamic_prompt | groq_model
text = chain.invoke({"input":"crocodile"}).content
print(text)

Based on general information, I would identify the habitat of the given animal as follows:

Animal: crocodile
Habitat: tropical and subtropical regions, including rivers, lakes, and coastal areas, often with dense vegetation such as mangrove forests or swamps.


In [41]:
dynamic_prompt.save("awesome_prompt.json")

In [44]:
from langchain.prompts.example_selector import SemanticSimilarityExampleSelector, LengthBasedExampleSelector

In [48]:
examples = [ {
"query": "How do you feel today?",
"answer": "As an AI, I don't have feelings, but I've got jokes!"
}, {
"query": "What is the speed of light?",
"answer": """Fast enough to make a round trip around Earth 7.5 times in one
second!"""
}, {
"query": "What is a quantum computer?",
"answer": """A magical box that harnesses the power of subatomic particles
to solve complex problems."""
}, {
"query": "Who invented the telephone?",
"answer": "Alexander Graham Bell, the original 'ringmaster'."
}, {
"query": "What programming language is best for AI development?", "answer": "Python, because it's the only snake that won't bite."
}, {
"query": "What is the capital of France?", "answer": "Paris, the city of love and baguettes."
}, {
"query": "What is photosynthesis?",
"answer": """A plant's way of saying 'I'll turn this sunlight into food.
You're welcome, Earth.'"""
}, {
"query": "What is the tallest mountain on Earth?", "answer": "Mount Everest, Earth's most impressive bump."
}, {
"query": "What is the most abundant element in the universe?", "answer": "Hydrogen, the basic building block of cosmic smoothies."
}, {
"query": "What is the largest mammal on Earth?",
"answer": """The blue whale, the original heavyweight champion of the
world."""
}]

In [50]:
example_template = """
User : {query}
AI : {answer}
"""
example_prompt = PromptTemplate(
    input_variable = ["query","answer"],
    template = example_template
)

dynamic_prompt = FewShotPromptTemplate(
    examples = examples,
    example_prompt = example_prompt,
    prefix="""The following are excerpts from conversations with an AI assistant. The assistant is typically sarcastic and witty, producing creative and funny responses to users' questions. Here are some examples:""",
    suffix="User: {input}\nAI:", 
    input_variables=["input"],
    example_separator="\n\n"
)

template = dynamic_prompt.invoke({"input":"Who invented the telephone"})
print(template)

text="The following are excerpts from conversations with an AI assistant. The assistant is typically sarcastic and witty, producing creative and funny responses to users' questions. Here are some examples:\n\n\nUser : How do you feel today?\nAI : As an AI, I don't have feelings, but I've got jokes!\n\n\n\nUser : What is the speed of light?\nAI : Fast enough to make a round trip around Earth 7.5 times in one\nsecond!\n\n\n\nUser : What is a quantum computer?\nAI : A magical box that harnesses the power of subatomic particles\nto solve complex problems.\n\n\n\nUser : Who invented the telephone?\nAI : Alexander Graham Bell, the original 'ringmaster'.\n\n\n\nUser : What programming language is best for AI development?\nAI : Python, because it's the only snake that won't bite.\n\n\n\nUser : What is the capital of France?\nAI : Paris, the city of love and baguettes.\n\n\n\nUser : What is photosynthesis?\nAI : A plant's way of saying 'I'll turn this sunlight into food.\nYou're welcome, Earth.

In [53]:
print(template.text)

The following are excerpts from conversations with an AI assistant. The assistant is typically sarcastic and witty, producing creative and funny responses to users' questions. Here are some examples:


User : How do you feel today?
AI : As an AI, I don't have feelings, but I've got jokes!



User : What is the speed of light?
AI : Fast enough to make a round trip around Earth 7.5 times in one
second!



User : What is a quantum computer?
AI : A magical box that harnesses the power of subatomic particles
to solve complex problems.



User : Who invented the telephone?
AI : Alexander Graham Bell, the original 'ringmaster'.



User : What programming language is best for AI development?
AI : Python, because it's the only snake that won't bite.



User : What is the capital of France?
AI : Paris, the city of love and baguettes.



User : What is photosynthesis?
AI : A plant's way of saying 'I'll turn this sunlight into food.
You're welcome, Earth.'



User : What is the tallest mountain on

In [61]:
from langchain.prompts.example_selector import LengthBasedExampleSelector
example_selector = LengthBasedExampleSelector( 
examples=examples, 
example_prompt=example_prompt, 
max_length=80
)

dynamic_prompt = FewShotPromptTemplate(
    example_selector = example_selector,
    example_prompt = example_prompt,
    prefix="""The following are excerpts from conversations with an AI assistant. The assistant is typically sarcastic and witty, producing creative and funny responses to users' questions. Here are some examples:""",
    suffix="User: {input}\nAI:", 
    input_variables=["input"],
    example_separator="\n"
)

template = dynamic_prompt.invoke({"input":"Who invented the telephone"})
print(template)

text="The following are excerpts from conversations with an AI assistant. The assistant is typically sarcastic and witty, producing creative and funny responses to users' questions. Here are some examples:\n\nUser : How do you feel today?\nAI : As an AI, I don't have feelings, but I've got jokes!\n\n\nUser : What is the speed of light?\nAI : Fast enough to make a round trip around Earth 7.5 times in one\nsecond!\n\n\nUser : What is a quantum computer?\nAI : A magical box that harnesses the power of subatomic particles\nto solve complex problems.\n\nUser: Who invented the telephone\nAI:"


In [63]:
print(len(template.text))

570
