In [1]:
!pip install -q huggingface_hub

In [2]:
# Importing OS and setting environment variables
import os
from apikeys import OPEN_AI_KEY, DEEP_LAKE_KEY, HUGGINGFACE_TOKEN

# For logging wandb langchain
os.environ["LANGCHAIN_WANDB_TRACING"] = "true"

# Setting wandb project details
os.environ["WANDB_PROJECT"] = "langchain"

# Setting up openai environment variable
os.environ["OPENAI_API_KEY"] = OPEN_AI_KEY

# Setting up hugging face token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACE_TOKEN

# Introduction to Large Language Models

## Token Distributions and Predicting the Next Token

In [2]:
from langchain import OpenAI
llm = OpenAI(model_name = "text-davinci-003", temperature = 0)
text = "What is a good company name for a company that sails in the ocean and represents One Piece?"
print(llm(text))



One Piece Sailing Company.


In [5]:
# Tracking token usage
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback

with get_openai_callback() as cb:
    result = llm("Generate a Seinfield sort of joke")
    print("Output: ", result)
    print(cb)

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


Output:  

Q: What did the fish say when it hit the wall?
A: Dam!
Tokens Used: 29
	Prompt Tokens: 9
	Completion Tokens: 20
Successful Requests: 1
Total Cost (USD): $0.00058


In [3]:
from langchain import PromptTemplate, FewShotPromptTemplate

# Create the examples set
examples = [
        {
        "query" : "Fish",
        "answer" : "What did the fish say when it hit the wall? Dam!"
        },{
        "query" : "Kendrick Lamar",
        "answer" : "What did Kendrick Lamar say when he hit the wall? Damn!"
        },
        # forgive my dumb jokes
        {
        "query" : "Pitbull",
        "answer" : "Why is Pitbull the world's most popular singer? Because he is Mr. Worldwide"
        }
]

# Creating the example template
example_template = """
User : {query},
Answer : {answer}
"""

# Creating prompt template
example_prompt = PromptTemplate(
    input_variables = ["query", "answer"],
    template = example_template
)

# Now we can have prefix and suffix for our input and dynamically feed in examples
prefix = """The following are excerpts from conversations with an AI
assistant. The assistant is known for telling dumb jokes. Here are some examples:"""

suffix = """
User: {query}
AI:
"""

# Let's create a few shot prompt template
few_shot_prompt_template = FewShotPromptTemplate(
    examples = examples,
    example_prompt = example_prompt,
    prefix = prefix,
    suffix = suffix,
    input_variables = ["query"],
)

In [4]:
from langchain.chat_models import ChatOpenAI
from langchain import LLMChain

In [8]:
chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.0)

In [9]:
chain = LLMChain(llm=chat, prompt=few_shot_prompt_template)

In [10]:
chain.run("J Cole")

'What did J Cole say when he hit the wall? "I guess it\'s just another brick in the Cole."'

`wandb` logging can only take place when we run agents, not in any other case. Running a chain won't do anything. 

## Question Answering Bot

First, we need a template for answering questions.

In [3]:
from langchain import PromptTemplate

template = """
Question : {question}
Answer:
"""

prompt = PromptTemplate(
    template = template,
    input_variables = ["question"]
)

# Now, we can enter question
question = "Where is Bordeaux?"

In [5]:
from langchain import HuggingFaceHub, LLMChain

hub_llm = HuggingFaceHub(
    repo_id = "google/flan-t5-large",
    model_kwargs={'temperature':0}
)

llm_chain = LLMChain(
    prompt = prompt,
    llm = hub_llm
)

print(llm_chain.run(question))

flanders


In [6]:
question = "What is the capital of France?"
print(llm_chain.run(question))

paris


In [8]:
question = "Who is the president of America?"
print(llm_chain.run(question))

gerald ford


In [9]:
# We can put it all in a list and ask multiple questions
question_list = [
    {"question" : "What is the capital of France?"},
    {"question" : "Who is th president of America?"},
    {"question" : "What is the best J Cole Album?"},
    {"question" : "Who is the greatest rapper of all time?"},
]

In [10]:
response = llm_chain.generate(question_list)
print(response)

generations=[[Generation(text='paris', generation_info=None)], [Generation(text='abraham lincoln', generation_info=None)], [Generation(text='i want to be king', generation_info=None)], [Generation(text='dr dre', generation_info=None)]] llm_output=None


## Text Summarization

In [11]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(model = "gpt-3.5-turbo", temperature = 0)

summarization_template = "Summarize the following text into one sentence: {text}"
summarization_prompt = PromptTemplate(input_variables = ["text"], template = summarization_template)
summarization_chain = LLMChain(llm = llm, prompt = summarization_prompt)


text = "Girard's main contribution to philosophy, and in turn to other disciplines, was in the psychology of desire. Girard claimed that human desire functions imitatively, or mimetically, rather than arising as the spontaneous byproduct of human individuality, as much of theoretical psychology had assumed. Girard found that human development proceeds triangularly from a model of desire who indicates some object of desire as desirable by desiring it themselves. We copy this desire for the object of the model and appropriate it as our own, most often without recognizing that the source of this desire comes from another apart from ourselves completing the triangle of mimetic desire. This process of appropriation of desire includes (but is not limited to) identity formation, the transmission of knowledge and social norms, and material aspirations which all have their origin in copying the desires of others who we take, consciously or unconsciously, as models for desire."

In [12]:
summarized_text = summarization_chain.predict(text=text)

In [13]:
print(summarized_text)

Girard's main contribution to philosophy was the understanding that human desire is imitative, meaning that it is not a spontaneous result of individuality but rather a process of copying the desires of others, which influences identity formation, knowledge transmission, social norms, and material aspirations.


## Text Translation

In [15]:
translation_template = "Translate the following from {language1} to {language2} : {text}"
translation_prompt = PromptTemplate(input_variables = ["language1","language2","text"], template = translation_template)
translation_chain = LLMChain(llm = llm, prompt = translation_prompt)

In [16]:
language1 = "English"
language2 = "French"
text = "Life is Good, life is good my friend"

In [17]:
translated_text = translation_chain.predict(language1 = language1, language2 = language2, text = text)

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')).


In [18]:
print(translated_text)

La vie est belle, la vie est belle mon ami.


## Testing out Tokenization

In [19]:
!pip install -q transformers

In [20]:
from transformers import AutoTokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
print(tokenizer.vocab)

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



In [22]:
token_ids = tokenizer.encode("This is a sample text to test the tokenizer.")

print( "Tokens:   ", tokenizer.convert_ids_to_tokens( token_ids ) )
print( "Token IDs:", token_ids )

Tokens:    ['This', 'Ġis', 'Ġa', 'Ġsample', 'Ġtext', 'Ġto', 'Ġtest', 'Ġthe', 'Ġtoken', 'izer', '.']
Token IDs: [1212, 318, 257, 6291, 2420, 284, 1332, 262, 11241, 7509, 13]
