In [11]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import Ollama
import pickle

In [7]:
llm = Ollama(model="llama2")

In [8]:
%%time
# Not RAG
# response = llm.invoke("how can langsmith help with testing?")
response = llm.invoke("Who is Shohei Ohtani?")
print(response)


Shohei Ohtani is a Japanese professional baseball player who currently plays as a pitcher and designated hitter for the Los Angeles Angels in Major League Baseball (MLB). He was born on July 5, 1994, in Tokyo, Japan, and began his professional career with the Nippon Professional Baseball (NPB) team Hanshin Tigers in 2011.

Ohtani quickly gained attention for his exceptional talent on both the mound and at the plate, earning him the nickname "The Japanese Babe Ruth." In 2018, he made history by becoming the first player to pitch and hit in a MLB game since 1937, and has continued to impress with his versatility and skill ever since.

Some of Ohtani's notable achievements include:

* 2-time NPB MVP (2016, 2018)
* 4-time NPB All-Star (2013, 2015, 2017, 2018)
* MLB All-Star (2018)
* MLB Home Run Derby champion (2018)
* MLB Pitcher of the Month (April 2018)
* MLB Rookie of the Year (2018)

Ohtani's unique ability to excel as both a pitcher and hitter has made him one of the most exciting p

In [15]:
%%time
# faiss data load
loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")
loader = WebBaseLoader("https://en.wikipedia.org/wiki/Shohei_Ohtani")
docs = loader.load()
embeddings = OllamaEmbeddings()

CPU times: user 794 ms, sys: 51.4 ms, total: 845 ms
Wall time: 1.46 s


In [16]:
%%time
# faiss create vector database
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings)

CPU times: user 334 ms, sys: 9.49 ms, total: 343 ms
Wall time: 52.7 s


In [26]:
# %%time
# import faiss
# # faiss write and read database
# faiss.write_index(vector.index,"./faiss_database/vector.index")  # save the index to disk
# index = faiss.read_index("./faiss_database/vector.index")  # load the index

CPU times: user 2.01 ms, sys: 3.26 ms, total: 5.27 ms
Wall time: 2.81 ms


In [33]:
# faiss write
index = vector.index
with open('./faiss_database/data_and_index.db', 'wb') as f:
    pickle.dump((vector, index), f)

In [12]:
# faiss read
with open('./faiss_database/data_and_index.db', 'rb') as f:
    vector, index = pickle.load(f)

In [17]:
# HM
# # Set your query here manually
context = ""
question = "Who is Shohei Otani?"
matched_docs = vector.similarity_search(question, 4)
for i, doc in enumerate(matched_docs):
    context = context + doc.page_content + " \n\n "
    print("### ", i)
    print(doc.page_content)

###  0
vteSporting News MLB Player of the Year Award
1936: Hubbell
1937: Allen
1938: Vander Meer
1939: DiMaggio
1940: Feller
1941: T. Williams
1942: T. Williams
1943: Chandler
1944: Marion
1945: Newhouser
1946: Musial
1947: T. Williams
1948: Boudreau
1949: T. Williams
1950: Rizzuto
1951: Musial
1952: Roberts
1953: Rosen
1954: Mays
1955: Snider
1956: Mantle
1957: T. Williams
1958: Turley
1959: Wynn
1960: Mazeroski
1961: Maris
1962: Wills & Drysdale
1963: Koufax
1964: Boyer
1965: Koufax
1966: Robinson
1967: Yastrzemski
1968: McLain
1969: McCovey
1970: Bench
1971: Torre
1972: B. Williams
1973: Jackson
1974: Brock
1975: Morgan
1976: Morgan
1977: Carew
1978: Guidry
1979: Stargell
1980: Brett
1981: Valenzuela
1982: Yount
1983: Ripken Jr.
1984: Sandberg
1985: Mattingly
1986: Clemens
1987: Bell
1988: Hershiser
1989: Mitchell
1990: Bonds
1991: Ripken Jr.
1992: Sheffield
1993: Thomas
1994: Bagwell
1995: Belle
1996: Rodriguez
1997: Griffey Jr.
1998: Sosa
1999: Palmeiro
2000: Delgado
2001: Bonds
2

In [30]:
context

'vteSporting News MLB Player of the Year Award\n1936: Hubbell\n1937: Allen\n1938: Vander Meer\n1939: DiMaggio\n1940: Feller\n1941: T. Williams\n1942: T. Williams\n1943: Chandler\n1944: Marion\n1945: Newhouser\n1946: Musial\n1947: T. Williams\n1948: Boudreau\n1949: T. Williams\n1950: Rizzuto\n1951: Musial\n1952: Roberts\n1953: Rosen\n1954: Mays\n1955: Snider\n1956: Mantle\n1957: T. Williams\n1958: Turley\n1959: Wynn\n1960: Mazeroski\n1961: Maris\n1962: Wills & Drysdale\n1963: Koufax\n1964: Boyer\n1965: Koufax\n1966: Robinson\n1967: Yastrzemski\n1968: McLain\n1969: McCovey\n1970: Bench\n1971: Torre\n1972: B. Williams\n1973: Jackson\n1974: Brock\n1975: Morgan\n1976: Morgan\n1977: Carew\n1978: Guidry\n1979: Stargell\n1980: Brett\n1981: Valenzuela\n1982: Yount\n1983: Ripken Jr.\n1984: Sandberg\n1985: Mattingly\n1986: Clemens\n1987: Bell\n1988: Hershiser\n1989: Mitchell\n1990: Bonds\n1991: Ripken Jr.\n1992: Sheffield\n1993: Thomas\n1994: Bagwell\n1995: Belle\n1996: Rodriguez\n1997: Griffey J

In [18]:
prompt = ChatPromptTemplate.from_template(
    """Answer the following question based only on the provided context:

    <context>
    {context}
    </context>

    Question: {input}""")

In [19]:
document_chain = create_stuff_documents_chain(llm, prompt)

In [20]:
retriever = vector.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [23]:
# response = retrieval_chain.invoke({"input": "how can langsmith help with testing?"})
response = retrieval_chain.invoke({"input": "Who is Shohei Otani?"})
print(response["answer"])

Shohei Otani is a Japanese professional baseball player who currently plays as a pitcher and outfielder for the Los Angeles Angels in Major League Baseball (MLB). He was born on September 18, 1994, in Okinawa, Japan. Otani is considered one of the most talented baseball players in the world, known for his exceptional pitching and hitting abilities.

Otani made his professional debut in 2011 with the Nippon Professional Baseball (NPB) team SoftBank Hawks, where he quickly established himself as a dominant force on the mound and at the plate. In 2016, he became the first Japanese player to win the MLB Rookie of the Year award after being posted by his NPB team and selected by an MLB club.

In addition to his impressive stats and awards, Otani has gained international recognition for his unique ability to excel in both pitching and hitting. He is often compared to Babe Ruth, who was also a two-way threat in baseball, for his exceptional talent and dominance on the field.

Otani's career a

In [24]:
# good
response = retrieval_chain.invoke({"input": "What team dose Shohei Otani play now?"})
print(response["answer"])


Shohei Ohtani currently plays for the Los Angeles Dodgers in Major League Baseball (MLB). He signed a multi-year contract with the Dodgers in December 2023, becoming the highest-paid player in baseball history. Prior to joining the Dodgers, Ohtani played for the Seattle Mariners in 2018 and 2019.


In [27]:
# no good...
response = retrieval_chain.invoke({"input": "What unmber is his contract sallary with Dodgers?"})
print(response["answer"])


According to reports, Shohei Ohtani signed a 2-year contract with the Los Angeles Dodgers worth $3.5 million in 2018, with an option for a third year worth $4.5 million. The deal included a $2 million signing bonus and a $500,000 performance bonuses based on his playing time and statistics.


In [29]:
# no good...
response = retrieval_chain.invoke({"input": "Tell me exactly about Shohei Otani's contract with Dodgers?"})
print(response["answer"])


Shohei Ohtani's contract with the Los Angeles Angels is a multi-year deal that was signed in 2018. The details of the contract are as follows:

* Years: 2018-2023 (6 years)
* Total value: $85 million (approximately 9.7 billion yen)
* Base salary: $3 million in 2018, $5 million in 2019, and $6 million in 2020-2023
* Bonus structure: Ohtani can earn up to an additional $25 million through performance bonuses based on his individual and team achievements.
* Incentives: Ohtani can earn up to $10 million in incentives based on his pitching and hitting performances.

It's worth noting that the contract is structured in a unique way, with Ohtani receiving a base salary for his time as a player and also receiving a separate payment for his appearance fees as a pitcher. This allows him to earn more money through his pitching performances while still receiving a fair salary as a player.

It's also worth noting that the contract is a significant increase from Ohtani's previous salary with the Ni