In [15]:
!pip install -q sentence-transformers
!pip install -q wikipedia-api
!pip install -q numpy
!pip install -q scipy

In [16]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("Alibaba-NLP/gte-base-en-v1.5", trust_remote_code=True)

In [17]:
from wikipediaapi import Wikipedia
wiki = Wikipedia('RAGBot/0.0', 'en')
doc = wiki.page('Hayao_Miyazaki').text
paragraphs = doc.split('\n\n') # chunking

In [18]:
import textwrap

In [19]:
for i, p in enumerate(paragraphs):
  wrapped_text = textwrap.fill(p, width=100)

  print("-----------------------------------------------------------------")
  print(wrapped_text)
  print("-----------------------------------------------------------------")

-----------------------------------------------------------------
Hayao Miyazaki (宮崎 駿 or 宮﨑 駿, Miyazaki Hayao; [mijaꜜzaki hajao]; born January 5, 1941) is a Japanese
animator, filmmaker, and manga artist. He co-founded Studio Ghibli and serves as honorary chairman.
Throughout his career, Miyazaki has attained international acclaim as a masterful storyteller and
creator of Japanese animated feature films, and is widely regarded as one of the most accomplished
filmmakers in the history of animation. Born in Tokyo City, Miyazaki expressed interest in manga and
animation from an early age. He joined Toei Animation in 1963, working as an inbetween artist and
key animator on films like Gulliver's Travels Beyond the Moon (1965), Puss in Boots (1969), and
Animal Treasure Island (1971), before moving to A-Pro in 1971, where he co-directed Lupin the Third
Part I (1971–1972) alongside Isao Takahata. After moving to Zuiyō Eizō (later Nippon Animation) in
1973, Miyazaki worked as an animator on Wo

In [20]:
docs_embed = model.encode(paragraphs, normalize_embeddings=True)

In [21]:
docs_embed.shape

(19, 768)

In [22]:
docs_embed[0]

array([ 9.43502784e-03, -4.05939436e-03,  4.70433570e-02, -1.91588644e-02,
       -1.05208799e-03, -2.19988022e-02,  4.46006581e-02,  4.33994345e-02,
        5.02612218e-02, -7.20757106e-03, -4.79073776e-03,  8.74278322e-03,
        3.30359414e-02,  1.80243272e-02, -2.41563078e-02,  1.47018824e-02,
        1.42382551e-02, -3.51168662e-02, -2.57179365e-02, -2.99221165e-02,
        6.12405548e-03,  8.12837388e-03, -2.69539114e-02,  3.50551307e-02,
       -7.06916675e-03, -4.24130112e-02, -3.33318114e-02, -1.78539287e-02,
       -2.74857953e-02, -3.14298854e-03,  2.51866132e-02, -1.22505175e-02,
        9.56701487e-03,  1.21276341e-02, -2.77877282e-02, -5.69833629e-02,
       -4.16358858e-02,  5.35411090e-02, -2.30257828e-02, -3.22550833e-02,
       -6.75189793e-02,  1.19094644e-02, -2.85630710e-02, -1.64049231e-02,
       -5.26259206e-02,  1.68623198e-02,  1.85449757e-02, -3.72159146e-02,
        3.73747665e-03, -1.39373420e-02, -4.20705713e-02,  3.79180745e-03,
       -1.04365591e-03, -

In [23]:
query = "What was Studio Ghibli's first film?"
query_embed = model.encode(query, normalize_embeddings=True)

In [24]:
query_embed.shape

(768,)

In [25]:
import numpy as np
similarities = np.dot(docs_embed, query_embed.T)

In [26]:
similarities.shape

(19,)

In [27]:
similarities

array([0.5527753 , 0.45591557, 0.510938  , 0.50683564, 0.48231435,
       0.6385466 , 0.5517907 , 0.6293821 , 0.55376995, 0.4961574 ,
       0.46270683, 0.41976058, 0.4512602 , 0.40898627, 0.45950574,
       0.4371642 , 0.43962166, 0.19419606, 0.4650129 ], dtype=float32)

In [28]:
top_3_idx = np.argsort(similarities, axis=0)[-3:][::-1].tolist()

In [29]:
top_3_idx

[5, 7, 8]

In [30]:
most_similar_documents = [paragraphs[idx] for idx in top_3_idx]

In [32]:
CONTEXT = ""
for i, p in enumerate(most_similar_documents):
  wrapped_text = textwrap.fill(p, width=100)

  print("-----------------------------------------------------------------")
  print(wrapped_text)
  print("-----------------------------------------------------------------")
  CONTEXT += wrapped_text + "\n\n"                   

-----------------------------------------------------------------
Studio Ghibli Early films (1985–1995) Following the success of Nausicaä of the Valley of the Wind,
Miyazaki and Takahata founded the animation production company Studio Ghibli on June 15, 1985, as a
subsidiary of Tokuma Shoten, with offices in Kichijōji designed by Miyazaki. The studio's name had
been registered a year earlier; Miyazaki named it after the nickname of the Caproni Ca.309 aircraft,
meaning "a hot wind that blows in the desert" in Italian. Suzuki worked for Studio Ghibli as
producer, joining full-time in 1989, while Topcraft's Tōru Hara became production manager; Suzuki's
role in the creation of the studio and its films has led him to being occasionally named a co-
founder, and Hara is often viewed as influential to the company's success. Yasuyoshi Tokuma, the
founder of Tokuma Shoten, was also closely related to the company's creation, having provided
financial backing. Topcraft had been considered as a par

In [33]:
query = "What was Studio Ghibli's first film?"

In [34]:
prompt = f"""
use the following CONTEXT to answer the QUESTION at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

CONTEXT: {CONTEXT}
QUESTION: {query}

"""

In [35]:
!pip install -q openai
!pip install python-dotenv



In [38]:
import openai
from dotenv import load_dotenv
import os

load_dotenv()

True

In [39]:
from openai import OpenAI

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [45]:
response = client.chat.completions.create(
  model="gpt-4o",
  messages=[
    {"role": "user", "content": prompt},
  ]
)

In [46]:
print(response.choices[0].message.content)

Studio Ghibli's first film was "Laputa: Castle in the Sky," which was released on August 2, 1986.
