In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.text_splitter import CharacterTextSplitter
from langchain import OpenAI
from langchain.document_loaders import SeleniumURLLoader
from langchain import PromptTemplate

In [2]:
# we'll use information from the following articles
urls = ['https://beebom.com/what-is-nft-explained/',
        'https://beebom.com/how-delete-spotify-account/',
        'https://beebom.com/how-download-gif-twitter/',
        'https://beebom.com/how-use-chatgpt-linux-terminal/',
        'https://beebom.com/how-delete-spotify-account/',
        'https://beebom.com/how-save-instagram-story-with-music/',
        'https://beebom.com/how-install-pip-windows/',
        'https://beebom.com/how-check-disk-usage-linux/']

### Splitting the documents in chunks

In [3]:
# using selenium to scrape the documents
loader = SeleniumURLLoader(urls=urls)
docs_not_splitted = loader.load()

#we split the documents into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap = 0)
docs = text_splitter.split_documents(docs_not_splitted)

In [5]:
docs[0]

Document(page_content='Home  Internet  NFTs Explained: What is an NFT and What is Its Use\n\nNFTs Explained: What is an NFT and What is Its Use\n\nArjun Sha\n\nLast Updated: December 6, 2021 4:42 pm\n\nAfter Bitcoin and Blockchain, NFT is another word to have entered our lexicon. The buzzword is everywhere and people are wondering what is NFT and what is its use? Well, there is not really a one-line explainer. And that’s why we have brought a comprehensive explainer on NFT, what is its use in digital art, and more. So without wasting any time, let’s go ahead and learn about NFTs (Non-fungible Token) in complete detail.\n\nWhat is NFT: A Definitive Explainer (2021)\n\nHere, we have mentioned all the questions that people have in their minds regarding NFT. You can click on the table to find all the sections that we have covered in this article and click on the link to move to the corresponding section.\n\nTable of Contents\n\nNFTs Explained: What is NFT in Crypto?\n\nWhat is the Use of N

In [6]:
len(docs)

124

In [4]:
import os, sys
sys.path.insert(1, 'D:\Github\DeepLake-Langchain')
import credentials
os.environ["OPENAI_API_KEY"] = credentials.openai
os.environ["ACTIVELOOP_TOKEN"] = credentials.active_loop

In [7]:
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
# create Deep Lake dataset

my_activeloop_org_id = credentials.active_loop_org_id
my_activeloop_dataset_name = "customer_support"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

# add documents to our Deep Lake dataset
db.add_documents(docs)

Your Deep Lake dataset has been successfully created!
The dataset is private so make sure you are logged in!


|

Dataset(path='hub://megatron17/customer_support', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype       shape      dtype  compression
  -------    -------     -------    -------  ------- 
 embedding  embedding  (124, 1536)  float32   None   
    id        text      (124, 1)      str     None   
 metadata     json      (124, 1)      str     None   
   text       text      (124, 1)      str     None   


 

['cbe71ead-1a61-11ee-b60b-00d861dd19c7',
 'cbe71eae-1a61-11ee-98a7-00d861dd19c7',
 'cbe71eaf-1a61-11ee-b082-00d861dd19c7',
 'cbe71eb0-1a61-11ee-b8f9-00d861dd19c7',
 'cbe71eb1-1a61-11ee-9aea-00d861dd19c7',
 'cbe71eb2-1a61-11ee-8297-00d861dd19c7',
 'cbe71eb3-1a61-11ee-9092-00d861dd19c7',
 'cbe71eb4-1a61-11ee-bdca-00d861dd19c7',
 'cbe71eb5-1a61-11ee-a0b8-00d861dd19c7',
 'cbe71eb6-1a61-11ee-9f19-00d861dd19c7',
 'cbe71eb7-1a61-11ee-99ea-00d861dd19c7',
 'cbe71eb8-1a61-11ee-bac4-00d861dd19c7',
 'cbe71eb9-1a61-11ee-b229-00d861dd19c7',
 'cbe71eba-1a61-11ee-b51c-00d861dd19c7',
 'cbe71ebb-1a61-11ee-9369-00d861dd19c7',
 'cbe71ebc-1a61-11ee-bdf0-00d861dd19c7',
 'cbe71ebd-1a61-11ee-9e80-00d861dd19c7',
 'cbe71ebe-1a61-11ee-96ea-00d861dd19c7',
 'cbe71ebf-1a61-11ee-a556-00d861dd19c7',
 'cbe71ec0-1a61-11ee-b910-00d861dd19c7',
 'cbe71ec1-1a61-11ee-ba1e-00d861dd19c7',
 'cbe71ec2-1a61-11ee-8dfa-00d861dd19c7',
 'cbe71ec3-1a61-11ee-a04c-00d861dd19c7',
 'cbe71ec4-1a61-11ee-88e9-00d861dd19c7',
 'cbe71ec5-1a61-

In [8]:
# let's see the top relevant documents to a specific query
query = "how to check disk usage in linux?"
docs = db.similarity_search(query)
print(docs[0].page_content)

Home  Tech  How to Check Disk Usage in Linux (4 Methods)

How to Check Disk Usage in Linux (4 Methods)

Beebom Staff

Last Updated: June 19, 2023 5:14 pm

There may be times when you need to download some important files or transfer some photos to your Linux system, but face a problem of insufficient disk space. You head over to your file manager to delete the large files which you no longer require, but you have no clue which of them are occupying most of your disk space. In this article, we will show some easy methods to check disk usage in Linux from both the terminal and the GUI application.

Monitor Disk Usage in Linux (2023)

Table of Contents

Check Disk Space Using the df Command
		
Display Disk Usage in Human Readable FormatDisplay Disk Occupancy of a Particular Type

Check Disk Usage using the du Command
		
Display Disk Usage in Human Readable FormatDisplay Disk Usage for a Particular DirectoryCompare Disk Usage of Two Directories


### Using prompts to enhance the answer

In [9]:
# let's write a prompt for a customer support chatbot that
# answer questions using information extracted from our db
template = """You are an exceptional customer support chatbot that gently answer questions.

You know the following context information.

{chunks_formatted}

Answer to the following question from a customer. Use only information from the previous context information. Do not invent stuff.

Question: {query}

Answer:"""

prompt = PromptTemplate(
    input_variables=["chunks_formatted", "query"],
    template=template,
)

In [10]:
# the full pipeline

# user question
query = "How to check disk usage in linux?"

# retrieve relevant chunks
docs = db.similarity_search(query)
retrieved_chunks = [doc.page_content for doc in docs]

# format the prompt
chunks_formatted = "\n\n".join(retrieved_chunks)
prompt_formatted = prompt.format(chunks_formatted=chunks_formatted, query=query)

# generate answer
llm = OpenAI(model="text-davinci-003", temperature=0)
answer = llm(prompt_formatted)
print(answer)

 You can check disk usage in Linux using the df command or by using a GUI tool such as the GDU Disk Usage Analyzer or the Gnome Disks Tool. The df command is used to check the current disk usage and the available disk space in Linux. The syntax for the df command is: df <options> <file_system>. The options to use with the df command are: a, h, t, and x. To install the GDU Disk Usage Analyzer, use the command: sudo snap install gdu-disk-usage-analyzer. To install the Gnome Disks Tool, use the command: sudo apt-get -y install gnome-disk-utility.
