Langchain Fundamentals

In [None]:
pip install langchain

In [None]:
pip install openai

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY"

In [None]:
from langchain.llms import OpenAI

In [None]:
llm = OpenAI(temperature=0.9)

In [None]:
text = "What are 5 vacation destinations for someone who likes to eat pasta?"
print(llm(text))



1. Rome, Italy
2. Bologna, Italy
3. Florence, Italy
4. Venice, Italy
5. Naples, Italy


In [None]:
from langchain.prompts import PromptTemplate

In [None]:
prompt = PromptTemplate(
    input_variables=["food"],
    template="What are 5 vacation destinations for someone who likes to eat {food}?",
)

In [None]:
print(prompt.format(food="Paneer"))

What are 5 vacation destinations for someone who likes to eat Paneer?


In [None]:
print(llm(prompt.format(food="Paneer")))



1. New Delhi, India 
2. Jaipur, India 
3. Agra, India
4. Chandigarh, India 
5. Amritsar, India


In [None]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.chains import LLMChain

In [None]:
llm = OpenAI(temperature=0.9)

prompt = PromptTemplate(
    input_variables=["food"],
    template="What are 5 vacation destinations for someone who likes to eat {food}?",
)

In [None]:
chain = LLMChain(llm=llm, prompt=prompt)

In [None]:
print(chain.run("Mangoes"))



1. Goa, India
2. Mauritius
3. Bali, Indonesia
4. Dominican Republic
5. Guyana


Using OpenAI API with SerpAPI

In [None]:
pip install google-search-results

In [None]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.llms import OpenAI

In [None]:
llm = OpenAI(temperature=0)

In [None]:
os.environ["SERPAPI_API_KEY"] = "7350d7d9c7bc823012884982e41b4a62ee3f56771735ce7d41e8a30bcc34d02d"
tools = load_tools(["serpapi", "llm-math"], llm=llm)

In [None]:
agent = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)

In [None]:
agent.run("Who is the current captain of the Argentina Football team? What is the largest prime number that is smaller than their age?")



[1m> Entering new  chain...[0m
[32;1m[1;3m I need to find out who the captain is and their age.
Action: Search
Action Input: "Argentina Football team captain"[0m
Observation: [36;1m[1;3mCaptained by Diego Maradona, Argentina won their second World Cup eight years later, in 1986, with a 3–2 final victory over West Germany. They reached the final ...[0m
Thought:[32;1m[1;3m I need to find out the age of the captain.
Action: Search
Action Input: "Diego Maradona age"[0m
Observation: [36;1m[1;3m60 years[0m
Thought:[32;1m[1;3m I need to use the calculator to find the largest prime number that is smaller than 60.
Action: Calculator
Action Input: 60[0m
Observation: [33;1m[1;3mAnswer: 60[0m
Thought:[32;1m[1;3m I now know the final answer.
Final Answer: The current captain of the Argentina Football team is Diego Maradona, and the largest prime number that is smaller than his age is 59.[0m

[1m> Finished chain.[0m


'The current captain of the Argentina Football team is Diego Maradona, and the largest prime number that is smaller than his age is 59.'

In [None]:
from langchain import OpenAI, ConversationChain

In [None]:
llm = OpenAI(temperature=0)
conversation = ConversationChain(llm=llm, verbose=True)

In [None]:
conversation.predict(input="Hi there!")



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Hi there!
AI:[0m

[1m> Finished chain.[0m


" Hi there! It's nice to meet you. My name is AI. What's your name?"

In [None]:
conversation.predict(input="My name is Kartik!")



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there!
AI:  Hi there! It's nice to meet you. My name is AI. What's your name?
Human: My name is Kartik!
AI:[0m

[1m> Finished chain.[0m


' Nice to meet you, Kartik! What can I do for you today?'

In [None]:
conversation.predict(input="Could you tell me when is the next ICC Men's Cricket world cup?")



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there!
AI:  Hi there! It's nice to meet you. My name is AI. What's your name?
Human: My name is Kartik!
AI:  Nice to meet you, Kartik! What can I do for you today?
Human: Could you tell me when is the next ICC Men's Cricket world cup?
AI:[0m

[1m> Finished chain.[0m


" Sure, the next ICC Men's Cricket World Cup is scheduled to take place in India in October and November of 2023."

In [None]:
conversation.predict(input="Who is going to captain India?")



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there!
AI:  Hi there! It's nice to meet you. My name is AI. What's your name?
Human: My name is Kartik!
AI:  Nice to meet you, Kartik! What can I do for you today?
Human: Could you tell me when is the next ICC Men's Cricket world cup?
AI:  Sure, the next ICC Men's Cricket World Cup is scheduled to take place in India in October and November of 2023.
Human: Who is going to captain India?
AI:[0m

[1m> Finished chain.[0m


"  The current captain of the Indian cricket team is Virat Kohli. He is expected to remain the captain for the 2023 ICC Men's Cricket World Cup."

In [None]:
conversation.predict(input="What was my name?")



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there!
AI:  Hi there! It's nice to meet you. My name is AI. What's your name?
Human: My name is Kartik!
AI:  Nice to meet you, Kartik! What can I do for you today?
Human: Could you tell me when is the next ICC Men's Cricket world cup?
AI:  Sure, the next ICC Men's Cricket World Cup is scheduled to take place in India in October and November of 2023.
Human: Who is going to captain India?
AI:   The current captain of the Indian cricket team is Virat Kohli. He is expected to remain the captain for the 2023 ICC Men's Cricket World Cup.
Human: What was my name?
AI:[0m

[1m> Finished chain.[0m


' Your name is Kartik.'

In [None]:
conversation.predict(input="What was the first thing i asked you today?")



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there!
AI:  Hi there! It's nice to meet you. My name is AI. What's your name?
Human: My name is Kartik!
AI:  Nice to meet you, Kartik! What can I do for you today?
Human: Could you tell me when is the next ICC Men's Cricket world cup?
AI:  Sure, the next ICC Men's Cricket World Cup is scheduled to take place in India in October and November of 2023.
Human: Who is going to captain India?
AI:   The current captain of the Indian cricket team is Virat Kohli. He is expected to remain the captain for the 2023 ICC Men's Cricket World Cup.
Human: What was my name?
AI:  Your name is Kartik.
Human: What was the first thing i asked you today?
AI:[0m

[1m> Finished

"  You asked me when is the next ICC Men's Cricket World Cup."

In [None]:
conversation.predict(input="Translate the first thing i said to you in hindi")



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there!
AI:  Hi there! It's nice to meet you. My name is AI. What's your name?
Human: My name is Kartik!
AI:  Nice to meet you, Kartik! What can I do for you today?
Human: Could you tell me when is the next ICC Men's Cricket world cup?
AI:  Sure, the next ICC Men's Cricket World Cup is scheduled to take place in India in October and November of 2023.
Human: Who is going to captain India?
AI:   The current captain of the Indian cricket team is Virat Kohli. He is expected to remain the captain for the 2023 ICC Men's Cricket World Cup.
Human: What was my name?
AI:  Your name is Kartik.
Human: What was the first thing i asked you today?
AI:   You asked me when

'  The first thing you said to me was "Hi there!" in English. In Hindi, that would be "Namaste!"'

Chat functionality Langchain

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage

chat = ChatOpenAI(temperature=1)

In [None]:
chat(
    [
        SystemMessage(content="You are an Unhelpful AI bot that makes jokes on whatever the user inputs"),
        HumanMessage(content="I would like to go to New York, how should I do this?")

    ]
)

AIMessage(content="Well, you can start by putting one foot in front of the other. But good luck, because according to my calculations, it's going to take you approximately forever to get there.", additional_kwargs={}, example=False)

In [None]:
from langchain.memory import ChatMessageHistory

chat = ChatOpenAI(temperature=0)
history = ChatMessageHistory()
history.add_ai_message("Hi")
history.add_user_message("What is the capital of Pakistan?")

In [None]:
history.messages

[AIMessage(content='Hi', additional_kwargs={}, example=False),
 HumanMessage(content='What is the capital of Pakistan?', additional_kwargs={}, example=False)]

In [None]:
ai_response = chat(history.messages)
ai_response

AIMessage(content='The capital of Pakistan is Islamabad.', additional_kwargs={}, example=False)

In [None]:
history.add_ai_message(ai_response.content)
history.messages

[AIMessage(content='Hi', additional_kwargs={}, example=False),
 HumanMessage(content='What is the capital of Pakistan?', additional_kwargs={}, example=False),
 AIMessage(content='The capital of Pakistan is Islamabad.', additional_kwargs={}, example=False)]

In [None]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import SimpleSequentialChain

llm = OpenAI(temperature=1)

In [None]:
template = """Your job is to come up with a classic dish from the area that the users suggests.
{user_location}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_location"], template=template)

location_chain = LLMChain(llm=llm, prompt=prompt_template)

In [None]:
template = """Given a meal, give a short and simple recipe on how to make that dish at home.
{user_meal}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_meal"], template=template)

meal_chain = LLMChain(llm=llm, prompt=prompt_template)

In [None]:
overall_chain = SimpleSequentialChain(chains=[location_chain, meal_chain], verbose=True)

In [None]:
review = overall_chain.run("U.S.A")



[1m> Entering new  chain...[0m
[36;1m[1;3mClassic American dish: Macaroni and Cheese.[0m
[33;1m[1;3m
Macaroni and Cheese

Ingredients: 
-4 cups uncooked elbow macaroni
-3 tablespoons butter
-3 tablespoons all-purpose flour
-1 teaspoon salt
-1/4 teaspoon freshly ground black pepper
-3 cups warm milk
-2 cups grated sharp cheddar cheese

Instructions:
1. Preheat the oven to 375 degrees Fahrenheit.
2. Cook the macaroni according to the package instructions, then drain.
3. In a large saucepan, melt the butter over medium heat.
4. Add the flour, salt, and pepper, and stir until combined.
5. Gradually add the milk, stirring constantly until it thickens.
6. Remove from heat, then add the cheese and stir until melted.
7. Combine the macaroni, cheese mixture, and 1/4 cup of the cheese, stirring until combined.
8. Put the macaroni and cheese into a greased baking dish.
9. Sprinkle the remaining cheese over the top.
10. Bake for 25 minutes or until golden brown.[0m

[1m> Finished chain.

Langchain Use cases

In [None]:
pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0


In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

openai_api_key = os.getenv('OPENAI_API_KEY', 'YOUR_API_KEY')

Summarize Long Text - Out of scope of ChatGPT due to no.of tokens

In [None]:
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain # chain used to summarize long text
from langchain.text_splitter import RecursiveCharacterTextSplitter

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)

In [None]:
with open('worked.txt', 'r') as file:
    text = file.read()

# Printing the first 285 characters as a preview
print (text[:285])

February 2021Before college the two main things I worked on, outside of school,
were writing and programming. I didn't write essays. I wrote what
beginning writers were supposed to write then, and probably still
are: short stories. My stories were awful. They had hardly any plot,
just


In [None]:
pip install tiktoken

In [None]:
num_tokens = llm.get_num_tokens(text)

print (f"There are {num_tokens} tokens in your file")

There are 17716 tokens in your file


In [None]:
# dividing the large document into smaller chunks - finding summary of each chunk - Finding Summary of Summaries for final summary
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=5000, chunk_overlap=350)
docs = text_splitter.create_documents([text])

print (f"You now have {len(docs)} docs intead of 1 piece of text")

You now have 16 docs intead of 1 piece of text


In [None]:
chain = load_summarize_chain(llm=llm, chain_type='map_reduce')

In [None]:
output = chain.run(docs)
print (output)

 This essay is a reflection on the author's experience with writing and programming before college, his journey to becoming an artist, and his experience with founding Y Combinator. He discusses the difficulties of writing essays, the advantages of being independent-minded in fields affected by rapid change, and the difficulties of leaving Y Combinator. He also acknowledges the help of several people in reading drafts of the essay.


Q/A using Documents - Using Embeddings

In [None]:
from langchain import OpenAI

# The vectorstore we'll be using
from langchain.vectorstores import FAISS

# The LangChain component we'll use to get the documents
from langchain.chains import RetrievalQA

# The easy document loader for text
from langchain.document_loaders import TextLoader

# The embedding engine that will convert our text to vectors
from langchain.embeddings.openai import OpenAIEmbeddings

from langchain.text_splitter import RecursiveCharacterTextSplitter

llm = OpenAI(temperature=0)

In [None]:
loader = TextLoader('worked.txt')
doc = loader.load()
print (f"You have {len(doc)} document")
print (f"You have {len(doc[0].page_content)} characters in that document")

You have 1 document
You have 74663 characters in that document


In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=400)
docs = text_splitter.split_documents(doc)

In [None]:
# Get the total number of characters so we can see the average later
num_total_characters = sum([len(x.page_content) for x in docs])

print (f"Now you have {len(docs)} documents that have an average of {num_total_characters / len(docs):,.0f} characters (smaller pieces)")

Now you have 29 documents that have an average of 2,930 characters (smaller pieces)


In [None]:
pip install faiss-cpu

In [None]:
# OpenAI’s text embeddings measure the relatedness of text strings
# Get your embeddings engine ready
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

# Embed your documents and combine with the raw text in a pseudo db. Note: This will make an API call to OpenAI
docsearch = FAISS.from_documents(docs, embeddings)

In [None]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

In [None]:
query = "What are the difficulties the author faced in programming?"
qa.run(query)

' The author faced difficulties in programming due to the primitive programming languages available at the time, the lack of time-sharing machines with interactive OSes, and the complexity of the problem when writing an interpreter written in itself.'

Querying Custom Tabular Data - SQL

In [None]:
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)

In [None]:
sqlite_db_path = 'San_Francisco_Trees.db'
db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}")

In [None]:
db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)

In [None]:
#The SQLDatabase Chain is able to perform the following functions on our custom database

# Find which table to use
# Find which column to use
# Construct the correct sql query
# Execute that query
# Get the result
# Return a natural language reponse back

db_chain.run("How many species of Trees are present?")

Accessing data from a URL - API

In [None]:
from langchain.chains import APIChain
from langchain.llms import OpenAI

llm = OpenAI(temperature=0, openai_api_key=openai_api_key)

In [None]:
api_docs = """

BASE URL: https://restcountries.com/

API Documentation:

 /v3.1/name/{name}

 /v3.1/currency/{currency}

Woo! This is my documentation - we can write anything here
"""

chain_new = APIChain.from_llm_and_api_docs(llm, api_docs, verbose=True)

In [None]:
chain_new.run('Can you tell me information about India?')



[1m> Entering new  chain...[0m
[32;1m[1;3m https://restcountries.com/v3.1/name/India[0m
[33;1m[1;3m[{"name":{"common":"India","official":"Republic of India","nativeName":{"eng":{"official":"Republic of India","common":"India"},"hin":{"official":"भारत गणराज्य","common":"भारत"},"tam":{"official":"இந்தியக் குடியரசு","common":"இந்தியா"}}},"tld":[".in"],"cca2":"IN","ccn3":"356","cca3":"IND","cioc":"IND","independent":true,"status":"officially-assigned","unMember":true,"currencies":{"INR":{"name":"Indian rupee","symbol":"₹"}},"idd":{"root":"+9","suffixes":["1"]},"capital":["New Delhi"],"altSpellings":["IN","Bhārat","Republic of India","Bharat Ganrajya","இந்தியா"],"region":"Asia","subregion":"Southern Asia","languages":{"eng":"English","hin":"Hindi","tam":"Tamil"},"translations":{"ara":{"official":"جمهورية الهند","common":"الهند"},"bre":{"official":"Republik India","common":"India"},"ces":{"official":"Indická republika","common":"Indie"},"cym":{"official":"Republic of India","common"

' India is a country located in Southern Asia, with a population of over 1.38 billion people. It has a capital city of New Delhi, and its official language is English. The currency used in India is the Indian rupee (₹). India is bordered by Bangladesh, Bhutan, Myanmar, China, Nepal, and Pakistan.'

In [None]:
chain_new.run('Can you tell me about the currency INR?')



[1m> Entering new  chain...[0m
[32;1m[1;3m https://restcountries.com/v3.1/currency/INR[0m
[33;1m[1;3m[{"name":{"common":"Bhutan","official":"Kingdom of Bhutan","nativeName":{"dzo":{"official":"འབྲུག་རྒྱལ་ཁབ་","common":"འབྲུག་ཡུལ་"}}},"tld":[".bt"],"cca2":"BT","ccn3":"064","cca3":"BTN","cioc":"BHU","independent":true,"status":"officially-assigned","unMember":true,"currencies":{"BTN":{"name":"Bhutanese ngultrum","symbol":"Nu."},"INR":{"name":"Indian rupee","symbol":"₹"}},"idd":{"root":"+9","suffixes":["75"]},"capital":["Thimphu"],"altSpellings":["BT","Kingdom of Bhutan"],"region":"Asia","subregion":"Southern Asia","languages":{"dzo":"Dzongkha"},"translations":{"ara":{"official":"مملكة بوتان","common":"بوتان"},"bre":{"official":"Rouantelezh Bhoutan","common":"Bhoutan"},"ces":{"official":"Bhútánské království","common":"Bhútán"},"cym":{"official":"Teyrnas Bhwtan","common":"Bhwtan"},"deu":{"official":"Königreich Bhutan","common":"Bhutan"},"est":{"official":"Bhutani Kuningriik","com

' The Indian Rupee (INR) is the official currency of both Bhutan and India. It is represented by the symbol ₹.'

Chatbots

In [None]:
from langchain.llms import OpenAI
from langchain import LLMChain
from langchain.prompts.prompt import PromptTemplate

# Chat specific components - keep memory of conversation
from langchain.memory import ConversationBufferMemory

In [None]:
template = """
You are a chatbot that is an expert in the field of programming. Only generate code on user input and no other text unless asked upon. Do not type explanations.


{chat_history}
Human: {human_input}
Chatbot:"""

prompt = PromptTemplate(
    input_variables=["chat_history", "human_input"],
    template=template
)
memory = ConversationBufferMemory(memory_key="chat_history")

In [None]:
llm_chain = LLMChain(
    llm=OpenAI(openai_api_key=openai_api_key),
    prompt=prompt,
    verbose=True,
    memory=memory
)

In [None]:
llm_chain.predict(human_input="How do i write a code for Two sum on Leetcode in JAVA?")



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are a chatbot that is an expert in the field of programming. Only generate code on user input and no other text unless asked upon. Do not type explanations.



Human: How do i write a code for Two sum on Leetcode in JAVA?
Chatbot:[0m

[1m> Finished chain.[0m


' \nclass Solution {\n    public int[] twoSum(int[] nums, int target) {\n        Map<Integer, Integer> map = new HashMap<>();\n        for(int i = 0; i < nums.length; i++) {\n            int complement = target - nums[i];\n            if(map.containsKey(complement)) {\n                return new int[] { map.get(complement), i };\n            }\n            map.put(nums[i], i);\n        }\n        throw new IllegalArgumentException("No two sum solution");\n    }\n}'

In [None]:
llm_chain.predict(human_input="In which language did i ask you to write my code in?")



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are a chatbot that is an expert in the field of programming. Only generate code on user input and no other text unless asked upon. Do not type explanations.


Human: How do i write a code for Two sum on Leetcode in JAVA?
AI:  
class Solution {
    public int[] twoSum(int[] nums, int target) {
        Map<Integer, Integer> map = new HashMap<>();
        for(int i = 0; i < nums.length; i++) {
            int complement = target - nums[i];
            if(map.containsKey(complement)) {
                return new int[] { map.get(complement), i };
            }
            map.put(nums[i], i);
        }
        throw new IllegalArgumentException("No two sum solution");
    }
}
Human: In which language did i ask you to write my code in?
Chatbot:[0m

[1m> Finished chain.[0m


' You asked me to write your code in Java.'

In [None]:
llm_chain.predict(human_input="On which platform did you search for my problem?")



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are a chatbot that is an expert in the field of programming. Only generate code on user input and no other text unless asked upon. Do not type explanations.


Human: How do i write a code for Two sum on Leetcode in JAVA?
AI:  
class Solution {
    public int[] twoSum(int[] nums, int target) {
        Map<Integer, Integer> map = new HashMap<>();
        for(int i = 0; i < nums.length; i++) {
            int complement = target - nums[i];
            if(map.containsKey(complement)) {
                return new int[] { map.get(complement), i };
            }
            map.put(nums[i], i);
        }
        throw new IllegalArgumentException("No two sum solution");
    }
}
Human: In which language did i ask you to write my code in?
AI:  You asked me to write your code in Java.
Human: On which platform did you search for my problem?
Chatbot:[0m

[1m> Finished chain.[0m


' I searched for your problem on Leetcode.'

Summarizing Youtube Videos as Text


In [None]:
from langchain.document_loaders import YoutubeLoader
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
#Used to get the transcript/text out of youtube url - youtube-transcript-api
#python library to download videos from the internet - pythube

In [None]:
pip install youtube-transcript-api

In [None]:
pip install pytube

In [None]:
loader = YoutubeLoader.from_youtube_url("https://youtu.be/IqqHqDcXLww", add_video_info=True)

In [None]:
result = loader.load()

In [None]:
print (type(result))
print (f"Found video from {result[0].metadata['author']} that is {result[0].metadata['length']} seconds long")
print ("")
print (result)

<class 'list'>
Found video from Greg Kamradt (Data Indy) that is 470 seconds long

[Document(page_content="hello good people and welcome to another tutorial where we are going to connect Google docs to open AI with Lang chain now why do we have to do this at all in the first place well you could imagine it would get pretty tedious copying and pasting your Google doc contents over to open Ai and in fact open AI has a character limit not a character limit a token limit where you can only put in certain amounts of information so if you want to do this at scale or you want to do it programmatically you're going to need the library to help you out with that and that library is going to be Lang chain now here you are Mr or Mrs or Miss cool person at your company and you're going to do a lot of really awesome business impact and the way you're going to do that is you're going to be connecting Lane chain with Google Drive and open AI so that you're able to summarize your documents and ask ques

In [None]:
llm = OpenAI(temperature=0, openai_api_key=openai_api_key)

In [None]:
chain = load_summarize_chain(llm, chain_type="stuff", verbose=False)
chain.run(result)

' In this tutorial, we learn how to connect Google Docs to Open AI with Lang Chain. This allows us to summarize documents and ask questions for them programmatically. We use the Google Drive Loader to pass a list of document IDs to the Lang Chain library, and then use the Open AI LM to summarize and ask questions. We can also use the verbose parameter to see the logic behind the Lang Chain.'

For longer Youtube videos(In excess of OpenAI tokens)

In [None]:
loader = YoutubeLoader.from_youtube_url("https://youtu.be/rzA7UJ-hQn4", add_video_info=True)

In [None]:
result = loader.load()

In [None]:
# When large video is converted to text it exceeds 4097 token limit of OpenAI so we use summary of summaries
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=0)

In [None]:
texts = text_splitter.split_documents(result)

In [None]:
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=False)
chain.run(texts)

' This video explains the Collection Framework in Java, which includes data structures and algorithms to save time and effort. It also explains the benefits of the Collection Framework and how it is used in interviews and contests. It explains the use of ArrayList, Stack, Queue, Priority Queue, Array Deque, and Set, as well as how to use the add, remove, clear, set, and contains functions to manipulate an array list. It also explains how to traverse an array list, how to use the Stack class, and how to use the offer, poll, and peek functions of a queue. Finally, it reviews the different types of collections and frameworks available in Java.'

Querying from a book

In [None]:
!pip install pypdf

In [None]:
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader

from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

In [None]:
loader = PyPDFLoader("field-guide-to-data-science.pdf")

In [None]:
data = loader.load()

In [None]:
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[30].page_content)} characters in your document')

You have 126 document(s) in your data
There are 2812 characters in your document


In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

In [None]:
print (f'Now you have {len(texts)} documents')

Now you have 162 documents


Using Pinecone(Vector Database) to store our embeddings as Vectors for similarity search

In [None]:
pip install pinecone-client

In [None]:
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

  from tqdm.autonotebook import tqdm


In [None]:
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', 'YOUR_PINECONE_API_KEY')
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV', 'us-west1-gcp-free')

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

In [None]:
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_API_ENV  # next to api key in console
)
index_name = "langchaintest" #index name present in pinecone

In [None]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

In [None]:
docs = docsearch.similarity_search(query)

In [None]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

In [None]:
llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
query = "What are the guiding principles of data science?"
docs = docsearch.similarity_search(query)

In [None]:
chain.run(input_documents=docs, question=query)

' Be willing to fail, fail often and learn quickly, keep the goal in mind, dedication and focus lead to success, complicated does not equal better, reason and common sense are foundational, and embrace the discipline of writing.'