In [None]:
%pip install openai

In [None]:
%pip install langchain

In [None]:
%pip install langchain-openai

In [None]:
%pip install langchain_experimental

In [1]:
import os

import openai
import langchain

In [2]:
#Initializing OpenAI connection
openai.api_key = os.getenv("OPENAI_API_KEY")

In [3]:
#Import required libraries for respective LLM

from langchain.chat_models import ChatOpenAI
from langchain_openai import OpenAI

In [4]:
#Using Langchain to answer questions over documents (CSVs, PDFs)

llm = OpenAI(model="gpt-3.5-turbo-instruct")

question = "Which is the pomeranian?"

#Does not give desired answer
output = llm.invoke(question)
print(output)



The Pomeranian is a breed of dog, typically small in size with a fluffy coat and a fox-like face. They are known for their playful and friendly nature.


In [5]:
#Simple use case of external documents. Ex. Convert PDF to text -> split text -> embed text -> search against text

context = """

Max is a cockapoo.
Misa is a pomeranian.
Leo is a mixed pomeranian.
Ollie is a golden doodle.

"""

In [6]:
#Instead of appending like below, we can use langchain's "load_qa_chain"

output = llm.invoke(context + question)
print(output)



Misa and Leo are both pomeranians.


In [7]:
#Using Langchain to query tabular data and answer questions related to them

from langchain import SQLDatabase
from langchain_experimental.sql.base import SQLDatabaseChain
from langchain_openai import OpenAI

In [8]:
#Create connection to database with OpenAI

sqlite_db_path = "langchain.db"
db = SQLDatabase.from_uri(f"sqlite:///{sqlite_db_path}")
llm = OpenAI(model="gpt-3.5-turbo-instruct", temperature=0) #Temperature is the randomness of each response/output; where 0.7 is more random and 0.2 is less random
#In this case we are not looking for something dynamic

db_chain = SQLDatabaseChain.from_llm(llm=llm, db=db, verbose=True)

In [9]:
db_question = "Which is the most expensive item?"

In [10]:
db_answer = db_chain.invoke(db_question)



[1m> Entering new SQLDatabaseChain chain...[0m
Which is the most expensive item?
SQLQuery:[32;1m[1;3mSELECT "ITEM_NAME", "ITEM_PRICE" FROM item ORDER BY "ITEM_PRICE" DESC LIMIT 1[0m
SQLResult: [33;1m[1;3m[('Xbox', 499.99)][0m
Answer:[32;1m[1;3mXbox[0m
[1m> Finished chain.[0m


In [11]:
print(db_answer)

{'query': 'Which is the most expensive item?', 'result': 'Xbox'}


In [12]:
#Using Langchain to interact with APIs

from langchain.chains import APIChain
from langchain.prompts.prompt import PromptTemplate
from langchain.llms import OpenAI

In [13]:
endpoint = "https://api.jikan.moe/v4/anime?q=naruto&sfw"

In [14]:
api_chain = APIChain.from_llm_and_api_docs(llm, endpoint ,verbose=True, limit_to_domains=["https://api.jikan.moe/"])

In [15]:
response = api_chain.invoke("What is the highest rated Naruto film?")



[1m> Entering new APIChain chain...[0m
[32;1m[1;3m https://api.jikan.moe/v4/anime?q=naruto&sfw&order_by=score&sort=desc&limit=1[0m
[33;1m[1;3m{"pagination":{"last_visible_page":30,"has_next_page":true,"current_page":1,"items":{"count":1,"total":30,"per_page":1}},"data":[{"mal_id":53236,"url":"https:\/\/myanimelist.net\/anime\/53236\/Road_of_Naruto","images":{"jpg":{"image_url":"https:\/\/cdn.myanimelist.net\/images\/anime\/1731\/128787.jpg","small_image_url":"https:\/\/cdn.myanimelist.net\/images\/anime\/1731\/128787t.jpg","large_image_url":"https:\/\/cdn.myanimelist.net\/images\/anime\/1731\/128787l.jpg"},"webp":{"image_url":"https:\/\/cdn.myanimelist.net\/images\/anime\/1731\/128787.webp","small_image_url":"https:\/\/cdn.myanimelist.net\/images\/anime\/1731\/128787t.webp","large_image_url":"https:\/\/cdn.myanimelist.net\/images\/anime\/1731\/128787l.webp"}},"trailer":{"youtube_id":null,"url":null,"embed_url":null,"images":{"image_url":null,"small_image_url":null,"medium_imag

In [16]:
print(response)

{'question': 'What is the highest rated Naruto film?', 'output': ' The highest rated Naruto film is "Road of Naruto", a 9-minute PV celebrating the 20th anniversary of Naruto. It has a score of 8.44 and is rated PG-13. The PV contains new animation and scenes from the original Naruto series. It was released on October 3, 2022.'}


In [17]:
#Using Langchain to extract key data from text and make it more readable

from langchain.schema import HumanMessage
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate

from langchain_openai import ChatOpenAI

from langchain.output_parsers import StructuredOutputParser, ResponseSchema

In [18]:
chat_model = ChatOpenAI(temperature=0)

In [19]:
#Define instructions on what you want extracted
instructions = """

Given a text which contains human names, extract the human names of those who have a dog.

"""

In [20]:
text = "Jeff has a cockapoo, Bob has a fish and John has a wolf"

In [21]:
prompt = instructions + text
output = chat_model.invoke([HumanMessage(content=prompt)])

In [22]:
print(output.content)

Jeff, John


In [None]:
#Using Kor (thin wrapper on LLMs for providing structured data response)
%pip install kor

In [23]:
from kor.extraction import create_extraction_chain
from kor.nodes import Object, Text, Number
from langchain_openai import ChatOpenAI

In [24]:
#Provide your schema (We can also use different models)
schema = Object(
    id="person",
    description="personal information",
    examples=[
        ("John Doe and Jane Doe are siblings", [{"first_name": "John", "last_name": "Doe"}, {"first_name": "Jane", "last_name": "Doe"}])
    ],
         attributes=[
             Text(
                 id="first_name",
                 description="First name of a person"
             ),
             Text(
                 id="last_name",
                 description="Last name of a person"
             )
         ],
         many=True
)

In [25]:
text = "John Doe has a sister named Jane. Their mom is named Lisa and she has a friend named Jeff"

In [26]:
llm = ChatOpenAI(temperature=0)
extraction_chain = create_extraction_chain(llm, schema, encoder_or_encoder_class="csv")

In [27]:
#The prompt that is fed to ChatGPT behind the scenes
print(extraction_chain.prompt.format_prompt(text="[user input]").to_string())

Your goal is to extract structured information from the user's input that matches the form described below. When extracting information please make sure it matches the type information exactly. Do not add any attributes that do not appear in the schema shown below.

```TypeScript

person: Array<{ // personal information
 first_name: string // First name of a person
 last_name: string // Last name of a person
}>
```


Please output the extracted information in CSV format in Excel dialect. Please use a | as the delimiter. 
 Do NOT add any clarifying information. Output MUST follow the schema above. Do NOT add any additional columns that do not appear in the schema.



Input: John Doe and Jane Doe are siblings
Output: first_name|last_name
John|Doe
Jane|Doe

Input: [user input]
Output:


In [53]:
extraction = extraction_chain.invoke(text)

In [29]:
print(extraction["data"])

{'person': [{'first_name': 'John', 'last_name': 'Doe'}, {'first_name': 'Jane', 'last_name': 'Doe'}]}


In [30]:
#Pydantic Model
from typing import Optional

from kor import from_pydantic
from pydantic import BaseModel, Field

class Person(BaseModel):
    first_name: Optional[str] = Field(
        default=None, description="First name of the person"
    )
    last_name: Optional[str] = Field(
        default=None, description="Last name of the person"
    )

In [31]:
schema, validator = from_pydantic(Person)

In [32]:
extraction_chain = create_extraction_chain(
    llm, schema, encoder_or_encoder_class="csv", validator=validator
)

In [33]:
extraction = extraction_chain.run(text)

In [34]:
print(extraction["data"])

{'person': [{'first_name': 'John', 'last_name': 'Doe'}]}


In [None]:
#Popular math archive
%pip install arxiv

In [35]:
#Using Langchains Agents (Langchain provides a variety of agents, we can also create our own)

from langchain.chat_models import ChatOpenAI
from langchain.agents import load_tools, initialize_agent, AgentType

In [36]:
llm = ChatOpenAI(temperature=0)
tools = load_tools(
    ["arxiv"]
)

agent_chain = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

  warn_deprecated(
  warn_deprecated(


In [37]:
#Ask question about the text
text = "what is the 2402.08284 paper about?"

In [38]:
agent_response = agent_chain.run(text)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should search for the paper on arxiv to find out its content.
Action: arxiv
Action Input: 2402.08284[0m
Observation: [36;1m[1;3mPublished: 2024-02-13
Title: A Logical Approach to Criminal Case Investigation
Authors: Takanori Ugai, Yusuke Koyanagi, Fumihito Nishino
Summary: XAI (eXplanable AI) techniques that have the property of explaining the
reasons for their conclusions, i.e. explainability or interpretability, are
attracting attention. XAI is expected to be used in the development of forensic
science and the justice system. In today's forensic and criminal investigation
environment, experts face many challenges due to large amounts of data, small
pieces of evidence in a chaotic and complex environment, traditional laboratory
structures and sometimes inadequate knowledge. All these can lead to failed
investigations and miscarriages of justice. In this paper, we describe the
application of one logical approach to crime

In [39]:
print(agent_response)

The 2402.08284 paper is about using XAI techniques for criminal case investigation.


In [40]:
#Using Langchain to understand code

from langchain.text_splitter import Language
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_openai import OpenAIEmbeddings

llm = ChatOpenAI()

In [41]:
embeddings = OpenAIEmbeddings(disallowed_special=())

In [42]:
import os

In [None]:
#Clone repository you want explained
!git clone https://github.com/Taisunnn/riskthinking-data

In [43]:
root_dir = "riskthinking-data"

In [44]:
loader = GenericLoader.from_filesystem(
    root_dir,
    glob="**/*",
    suffixes=[".py"],
    exclude=["**/non-utf8-encoding.py"],
    parser=LanguageParser(parser_threshold=500)
)

In [45]:
documents = loader.load()
len(documents)

6

In [46]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

python_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, chunk_size=2000, chunk_overlap=200
)
texts = python_splitter.split_documents(documents)
len(texts)

9

In [47]:
from langchain_community.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

db = Chroma.from_documents(texts, OpenAIEmbeddings(disallowed_special=()))

vectorstore = Chroma.from_documents(documents, embeddings).as_retriever()

retriever = db.as_retriever(
    search_type="mmr",  # Also test "similarity"
    search_kwargs={"k": 5},
)

  warn_deprecated(


In [48]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI()
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever)

In [49]:
query = "What is this repository about?"

In [50]:
description = {
"dialect": "Python",
"question": query,
"chat_history": [], # or provide an actual chat history
"agent_scratchpad": {} # or provide an actual agent scratchpad
}

In [51]:
response = qa.run(description)

Number of requested results 20 is greater than number of elements in index 15, updating n_results = 15


In [52]:
print(response)

This repository is related to a Stock Market Pipeline project. The pipeline involves downloading stock market data from Kaggle, transforming the data, and training a machine learning model on the transformed data. The project also includes a FastAPI implementation for making predictions using the trained model. The repository contains scripts for downloading data, transforming data, training the model, and serving the model via FastAPI endpoints.
