In [13]:
!pip3 install  -r requirements.txt



In [2]:
from dotenv import load_dotenv, dotenv_values
import openai, os
import numpy as np
from numpy.linalg import norm


secrets= dotenv_values(".env")

In [3]:
os.environ['OPENAI_API_KEY'] = secrets['OPENAI_API_KEY']

# Indexing Data

In [4]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

file_path = '/Users/rayanaay/Desktop/projects/langchain/summarizing_project/mix_data/rust_essentials.pdf'

loader = PyPDFLoader(file_path=file_path)

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=0
)

data = loader.load_and_split(text_splitter=text_splitter)
data

[Document(page_content='Rust Essentials', metadata={'source': '/Users/rayanaay/Desktop/projects/langchain/summarizing_project/mix_data/rust_essentials.pdf', 'page': 1}),
 Document(page_content='Second Edition\n \n \n \n \n \n \n \n \n \nA quick guide to writing fast, safe, and concurrent systems\nand applications', metadata={'source': '/Users/rayanaay/Desktop/projects/langchain/summarizing_project/mix_data/rust_essentials.pdf', 'page': 2}),
 Document(page_content='Ivo Balbaert\n \n \n \n \n \n \nBIRMINGHAM - MUMBAI', metadata={'source': '/Users/rayanaay/Desktop/projects/langchain/summarizing_project/mix_data/rust_essentials.pdf', 'page': 3}),
 Document(page_content='Rust Essentials', metadata={'source': '/Users/rayanaay/Desktop/projects/langchain/summarizing_project/mix_data/rust_essentials.pdf', 'page': 4}),
 Document(page_content='Second Edition\nCopyright © 2017 Packt Publishing\n \nAll rights reserved. \nNo part of this book may be reproduced, stored in a retrieval\nsystem, or tran

In [5]:
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(show_progress_bar=True)

vector1 = embeddings.embed_query('what do you think about yourself?')

len(vector1)

  0%|          | 0/1 [00:00<?, ?it/s]

1536

length of the embedded vector is 1536

In [7]:
def get_cosine(vec1, vec2):
    return np.dot(vec1,vec2)/(norm(vec1)*norm(vec2))
    
vector1 = embeddings.embed_query('Rustlang')
vector2 = embeddings.embed_query('C++')
cosine = get_cosine(vector1, vector2)
cosine

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

0.7764651582361364

In [8]:
vector3 = embeddings.embed_query('kimora')
cosine = get_cosine(vector1, vector3)
cosine

  0%|          | 0/1 [00:00<?, ?it/s]

0.7340700989913878

Interesting, even if the word 3 is decorrelated from programming langage, the cosine similarity is still high, but relatively small compared to the word 2.

# FAISS (Vector Database)

FAISS is a library for efficient similarity search and clustering of dense vectors.

In [9]:
from langchain.vectorstores import FAISS

index = FAISS.from_documents(data, embeddings)

  0%|          | 0/1 [00:00<?, ?it/s]

In [10]:
index.similarity_search_with_relevance_scores(
    "Concurrency"
)

  0%|          | 0/1 [00:00<?, ?it/s]

[(Document(page_content='Concurrency and threads\nShared mutable states\nCommunication through channels', metadata={'source': '/Users/rayanaay/Desktop/projects/langchain/summarizing_project/mix_data/rust_essentials.pdf', 'page': 250}),
  0.7437462759699456),
 (Document(page_content='Concurrency and threads\nA system is concurrent when several computations are executing at the same time\nand potentially interacting with each other. \nThe computations can only run in\nparallel (that is, simultaneously) when they are executing on different cores or\nprocessors.\nAn executing Rust program consists of a collection of native \nOperating System\n(\nOS\n) threads; the OS is also responsible for their scheduling. \nThe unit of\ncomputation in Rust is called a \nthread', metadata={'source': '/Users/rayanaay/Desktop/projects/langchain/summarizing_project/mix_data/rust_essentials.pdf', 'page': 251}),
  0.7201602462670973),
 (Document(page_content="Concurrency and threads\n253\nCreating threads\n25

In [11]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StdOutCallbackHandler

retriever = index.as_retriever()
retriever.search_kwargs['fetch_k'] = 20
retriever.search_kwargs['maximal_marginal_relevance'] = True
retriever.search_kwargs['k'] = 10

llm = ChatOpenAI()

chain = RetrievalQA.from_chain_type(
    llm=llm, 
    retriever=retriever,
    verbose=True
)

handler = StdOutCallbackHandler()

chain.run(
    'What is concurrency in Rust ?',
    callbacks=[handler]
)



[1m> Entering new RetrievalQA chain...[0m


  0%|          | 0/1 [00:00<?, ?it/s]



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Concurrency - Coding for
Multicore Execution
As a modern system-level programming language, Rust has to have a good method
for executing code concurrently and in parallel on many processors
simultaneously. 
And indeed, it does: Rust provides a wide selection of concurrency
and parallel tools. 
Its type system is strong enough to write concurrency primitives
that have properties unlike anything that has existed before. 
In particular, it can

Concurrency and threads
A system is concurrent when several computations are executing at the same time
and potentially interacting with each other. 
The computations can only run in
parallel (that is, simultaneously) when they are ex

"Concurrency in Rust refers to the ability of the language to execute multiple computations or tasks simultaneously. This allows different parts of a Rust program to run independently and potentially interact with each other. In Rust, concurrency is achieved through the use of threads, which are the unit of computation. Each thread can execute code concurrently, but true parallel execution happens when threads run on different cores or processors. Rust provides tools for working with concurrency, such as threads and channels, and ensures safety by preventing data races through its ownership system. Overall, Rust's approach to concurrency allows for efficient and safe concurrent programming on multicore machines."