In [None]:
import openai
client = openai.OpenAI(api_key = 'REMOVED')

### 1 - obtain embedding from openai

In [None]:
model="text-embedding-3-small"

result = client.embeddings.create(
    input = ['This is a test input, we want embeddings!'],
    model=model)
result.data[0].embedding[:30] # resulting one list of embedding, deterministic

[0.023360375314950943,
 -0.003970523364841938,
 0.016770603135228157,
 -0.015610604546964169,
 -0.00869998149573803,
 -0.04541266709566116,
 0.03689779341220856,
 0.04570883885025978,
 0.008459343574941158,
 0.0014577097026631236,
 -0.0026146220043301582,
 -0.025668030604720116,
 -0.024619096890091896,
 -0.04210544005036354,
 -0.04185863211750984,
 -0.009761255234479904,
 0.0004720202705357224,
 0.01091508287936449,
 -0.01520337164402008,
 0.023520801216363907,
 0.009600830264389515,
 -0.0699947401881218,
 0.005624136887490749,
 0.019621234387159348,
 0.029123341664671898,
 -0.008366790600121021,
 -0.008317428641021252,
 0.030085893347859383,
 0.05617349594831467,
 -0.02168208174407482]

In [None]:
import numpy as np
np.array(result.data[0].embedding).shape

(1536,)

In [None]:
result = client.embeddings.create(
    input = ['This is another a test input with a longer length, will it return longer embeddings?'],
    model=model)
np.array(result.data[0].embedding).shape

# always return embeddings with 1536 values

(1536,)

In [None]:
result = client.embeddings.create(
    input = ['This is a test input, ', 'we want embeddings!'],
    model=model)
result.data[0].embedding[:30]
# break apart gives separate embedding for each sentence

[0.011657672002911568,
 0.007152515463531017,
 -0.0012050794903188944,
 -0.0253323782235384,
 0.005374339409172535,
 -0.03888102248311043,
 -0.01372115220874548,
 0.03014937974512577,
 0.037686724215745926,
 0.022107776254415512,
 0.033705733716487885,
 0.0058122482150793076,
 -0.03893410041928291,
 -0.01633533649146557,
 -0.0050691296346485615,
 0.018418723717331886,
 -0.035536989569664,
 0.005072447471320629,
 -0.044905588030815125,
 0.01385385263711214,
 0.015114499256014824,
 -0.01845853216946125,
 0.04291509464383125,
 0.01326333824545145,
 0.06003335863351822,
 -0.05806940421462059,
 -0.04817000404000282,
 0.004654443357139826,
 0.046710304915905,
 -0.034156911075115204]

In [None]:
result.data[1].embedding[:30]

[0.01195084024220705,
 -0.0028240245301276445,
 -0.01856301538646221,
 0.007554715033620596,
 -0.016591593623161316,
 -0.03105347603559494,
 0.053098853677511215,
 0.028276219964027405,
 -0.04506927356123924,
 -0.026103340089321136,
 -0.016174284741282463,
 -0.037787966430187225,
 0.004378137178719044,
 -0.047314103692770004,
 -0.010799645446240902,
 -0.012029984965920448,
 0.04127033054828644,
 -0.03272270783782005,
 0.017167190089821815,
 0.022808045148849487,
 0.00974917970597744,
 -0.049127232283353806,
 0.00048431119648739696,
 0.027125025168061256,
 -0.003394225612282753,
 0.04196104779839516,
 0.007011495064944029,
 0.021584900096058846,
 0.05393347144126892,
 -0.012526437640190125]

### 2 - Vector Database with Pinecone
1. data base of embeddings of documents, possible replacement for certain fine-tuning purposes
2. documentation: https://www.pinecone.io/learn/openai-gen-qa/
3. search process
    - user ask query
    - query convert to embedding
    - compare against all embeddings of documents
    - the dot product with highest values return as results

In [None]:
# download dataset and pinecone-client API
!pip install -qU openai pinecone-client datasets

In [None]:
# prepare dataset, transcription from videos that could contain answer
from datasets import load_dataset

data = load_dataset('jamescalam/youtube-transcriptions', split='train')
print(data)
print(data[0])

Dataset({
    features: ['title', 'published', 'url', 'video_id', 'channel_id', 'id', 'text', 'start', 'end'],
    num_rows: 208619
})
{'title': 'Training and Testing an Italian BERT - Transformers From Scratch #4', 'published': '2021-07-06 13:00:03 UTC', 'url': 'https://youtu.be/35Pdoyi6ZoQ', 'video_id': '35Pdoyi6ZoQ', 'channel_id': 'UCv83tO5cePwHMt1952IVVHw', 'id': '35Pdoyi6ZoQ-t0.0', 'text': 'Hi, welcome to the video.', 'start': 0.0, 'end': 9.36}


In [None]:
'''
combine every 20 sentences into a single sentence for embedding
'''
new_data = []

window = 20 # number of sentences to combine
stride = 4 # number of sentences to stride over

for i in range(0, len(data), stride):
    j = min(len(data) - 1, i + window)

    # don't encompass two videos
    if data[i]['title'] != data[j]['title']:
        continue

    text = ' '.join(data[i:j]['text'])

    new_data.append({
        'start': data[i]['start'],
        'end': data[j]['end'],
        'title': data[i]['title'],
        'text': text,
        'id': data[i]['id'],
        'url': data[i]['url'],
        'published': data[i]['published'],
        'channel_id': data[i]['channel_id']
    })

new_data[0]

  0%|          | 0/52155 [00:00<?, ?it/s]

In [None]:
import pinecone

pinecone_client = pinecone.Pinecone(api_key = "4c13b39e-9c6a-4830-ac5e-3f2cd4d1ad14")
index_name = 'study-vector-database'

pinecone_client.create_index(
    index_name,
    dimension = 1536, # embedding size = 1536 = len(result.data[0].embedding)
    metric='cosine', # comparing metric
    spec = pinecone.ServerlessSpec(
        cloud = 'aws',
        region = 'us-east-1'
    )
)

index = pinecone_client.Index(index_name)
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [None]:
from tqdm import tqdm # show progress bar
from time import sleep
# create massive embeddings, and upload to cloud
batch_size = 50

with tqdm(total = int(len(new_data) * .1)) as pbar:
    for i in range(0, int(len(new_data) * .1), batch_size):
        j = min(len(new_data), i + batch_size)
        batch = new_data[i: j]
        idxs = [x['id'] for x in batch]
        texts = [x['text'] for x in batch]
        try:
            completions = client.embeddings.create(model = 'text-embedding-3-small',
                                                   input = texts)
        except: # rate limit, then wait until clear
            done = False
            while not done:
                sleep(5)
                try:
                    completions = client.embeddings.create(model = 'text-embedding-3-small',
                                                           input = texts)
                    done = True
                except:
                    pass

        embedds = [completions.data[x].embedding for x in range(len(completions.data))]
        meta_batch = [{
            'start': x['start'],
            'end': x['end'],
            'title': x['title'],
            'text': x['text'],
            'url': x['url'],
            'published': x['published'],
            'channel_id': x['channel_id']
        } for x in batch]

        to_upsert = list(zip(idxs, embedds, meta_batch))
        index.upsert(vectors=to_upsert)

        pbar.update(batch_size)

In [None]:
# query index from the database
query = "Where to go for instructions to install torch?"
query_embed = client.embeddings.create(model = 'text-embedding-3-small',
                                       input = [query]).data[0].embedding

In [None]:
res = index.query(vector = query_embed, top_k = 2, include_metadata = True)
res

{'matches': [{'id': 'uYas6ysyjgY-t387.0',
              'metadata': {'channel_id': 'UCv83tO5cePwHMt1952IVVHw',
                           'end': 597.0,
                           'published': '2022-05-24 13:00:34 UTC',
                           'start': 387.0,
                           'text': "And you'll probably see this where it "
                                   'says, please reactivate your environment. '
                                   'So to do that, we just do conda activate '
                                   'and run that. That switches back to the '
                                   'base environment. And then we literally '
                                   'just activate ML again. Now, the next step '
                                   'is to actually pip install PyTorch. And to '
                                   'do that, we are doing what you can see '
                                   'here. So I am running pip install upgrade. '
                           

In [None]:
# this result can be then fed into prompt engineering queries
# based on the context to provide better results
def complete(query, limit = 3750):
    res = client.embeddings.create(
        input=[query],
        model='text-embedding-3-small').data[0].embedding

    # get relevant contexts
    res = index.query(vector=res, top_k=3, include_metadata=True)
    contexts = [x['metadata']['text'] for x in res['matches']]

    # build our prompt with the retrieved contexts included
    prompt = (
        "Task\n---\nAnswer the Question based on the context below.\n\n"+
        f"QuestionTask\n---\n{query}"+
        "Context\n---"
    )
    # append contexts until hitting limit
    for context in contexts:
        prompt = prompt + context + '\n'
        if(len(prompt) >= limit):
            break

    return client.chat.completions.create(
            model = 'gpt-4o',
            messages = [{'role': 'user', 'content': prompt}]
        ).choices[0].message.content

complete(query)

'For instructions on how to install torch, you should refer to the notebook link provided in the description mentioned in the context. The context explains that the installation commands and steps can be copied from this notebook link.'

### 3 - Reason and Act, Chatbot OOP Implementation
to restrain deviation, one can define actions and parse, use functions to produce deterministic results for AI to use

In [None]:
import httpx
import re

In [None]:
# define the model
class ChatBot:
    def __init__(self, client, system_message = ""):
        self.client = client
        self.messages = []
        if system_message:
            self.messages.append({'role': 'system', 'content': system_message})

    def __call__(self, message = ''):
        self.messages.append({'role': 'user', 'content': message})
        assistant_message = self.complete()
        self.messages.append({'role': 'assistant', 'content': assistant_message})
        return assistant_message

    def complete(self):
        completion = self.client.chat.completions.create(
            model = 'gpt-4o',
            messages = self.messages)
        return completion.choices[0].message.content

In [None]:
# this will produce a loop, the bot might acquire several rounds of information
prompt = """
Task
---
You run in a loop of Thought, Action, PAUSE, Observation.
At the end of the loop you output an Answer
Use Thought to describe your thoughts about the question you have been asked.
Use Action to run one of the actions available to you - then return PAUSE.
Observation will be the result of running those actions. Run another loop if you need more information.
Return Answer Immediately if having enough information
---

Action
---
calculate: {{expression}}
low priority, runs a calculation and returns the number - uses Python so be sure to use floating point syntax if necessary

wikipedia: {{term}}
High priority, returns a summary from searching Wikipedia
---

Example Session
---
Question: What is the capital of France?
Thought: I should look up France on Wikipedia
Action: wikipedia: France
PAUSE

You then will be called again with Observation, and you will return your answer based on your thoughts about the Observation:
DO NOT RETURN ANSWER IF NOT GIVEN OBSERVATION
IF IT IS NUMBER, ONLY RETURN THE NUMBER AFTER ANSWER
Observation: France is a country, and the capital is Paris
Answer: The capital of France is Paris
---
""".strip()

In [None]:
# define parsing and each action
action_pattern = re.compile(r'^Action: (\w+): (.*)$')

def wikipedia(query):
    return httpx.get("https://en.wikipedia.org/w/api.php", params={
        "action": "query",
        "list": "search",
        "srsearch": query,
        "format": "json"
    }).json()["query"]["search"][0]["snippet"]

def calculate(what):
    return eval(what)

action_dict = {
    'wikipedia': wikipedia,
    'calculate': calculate
}

In [None]:
# ensure the consistency of format for parsing
test_prompts = ['what is happiness?', 'define pathos and ethos', 'what is 3 * 2 + 7?',
                'what is capital of China?', 'what is 7 * 7?']
observation = ['a state of well-being and contentment : joy', 'Pathos An emotional appeal that uses vivid language, sensory images, and anecdotes to evoke feelings in the audience. Pathos can help the audience understand how an argument will affect the real world. Ethos An ethical appeal that establishes the writer\'s credibility and authority. Ethos can be conveyed by stating one\'s occupation and experience, or by using first-person plural pronouns like \"we\" and \"us\".',
               '13', 'China is an Asian country, its\' capital is Beijing', '49']
for i in range(len(test_prompts)):
    print(f'------TRIAL {i + 1}------')
    bot = ChatBot(client, prompt)
    print(bot(test_prompts[i]))
    print(bot(observation[i]))

In [None]:
def query(question, client = client, prompt = prompt, max_turns = 5):
    bot = ChatBot(client, prompt)
    next_prompt = bot(question)
    for i in range(max_turns):
        print(next_prompt)
        actions = [re.match(r'^Action: (\w+): (.*)$', a) for a in next_prompt.split('\n') if re.match(r'^Action: (\w+): (.*)$', a)]
        if actions:
            action, item = actions[0].groups()
            if action not in action_dict:
                return "unknown action generated, terminate"
            print(f"---running {action} {item}")
            next_prompt = "Observation: " + str(action_dict[action](item))
            print(next_prompt)
            next_prompt = bot(next_prompt)
        else:
            return
    return bot('return result immediately')

In [None]:
query('what is capital of China?')

Thought: Let's look up China on Wikipedia to find out its capital city.
Action: wikipedia: China
PAUSE
---running wikipedia China
Observation: <span class="searchmatch">China</span>, officially the People's Republic of <span class="searchmatch">China</span> (PRC), is a country in East Asia. With a population exceeding 1.4 billion, it is the world's second-most
Action: wikipedia: People's Republic of China
PAUSE
---running wikipedia People's Republic of China
Observation: <span class="searchmatch">China</span>, officially the <span class="searchmatch">People's</span> <span class="searchmatch">Republic</span> <span class="searchmatch">of</span> <span class="searchmatch">China</span> (PRC), is a country in East Asia. With a population exceeding 1.4 billion, it is the world's second-most
Thought: The search results provide information about China but not specifically about its capital. I will refine my search to directly look for "Capital of China".
Action: wikipedia: Capital of China
PAUS

In [None]:
query('why is collge football so popular?')

Thought: To understand why college football is so popular, I should look up a summary of the topic which will provide insights into the cultural, historical, and societal aspects that contribute to its popularity.

Action: wikipedia: College football
PAUSE
---running wikipedia College football
Observation: <span class="searchmatch">College</span> <span class="searchmatch">football</span> is gridiron <span class="searchmatch">football</span> that is played by teams of amateur student-athletes at universities and <span class="searchmatch">colleges</span>. It was through collegiate competition
Answer: College football is popular because it involves amateur student-athletes from universities and colleges, creating a unique blend of competition, school spirit, regional pride, and deep-rooted traditions that resonate with students, alumni, and local communities.


### 4 - Recursive Re-prompting and Revision
1. Break down tasks into smaller ones when writig things; first create characters and plot;
2. then create drafts, each draft is a passage for a plot point (one split the plot into several plot points); to generate passage, give the previous point and the later point
3. in the end revise by lettting the model look back at itself; for each passage, let it check whether there are inconsistencies with the front and back, and fix it self. Do it no inconsistencies found.

In [None]:
def complete(prompt, stop=None):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
            "role": "user",
            "content": prompt
            }
        ]
    )
    return response.choices[0].message.content

complete('is this working?')

'Yes, it appears to be working. How can I assist you today?'

In [None]:
# Planning
premise_input = "Premise: A new law grad returns home to start her career, but struggles with the broken justice system."

# Generate Setting
setting_prompt = f"{premise}\nThe story is set in"
setting_response = complete(setting_prompt, stop=["."])
setting = f"Setting: The story is set in {setting_response}."
print(setting)

# Generate Characters
num_characters = 2
characters = ""
character_format = "Please invent characters for this story, in the format of [name] is [context]. For example 'Liza Turner is a 22-year-old woman' or 'Peyton Turner is Liza's older sister'. Each character is distinct and does not reuse the same name as these examples."
character_prompt = f"Premise: {premise}\n\nSetting: {setting}\n\n{character_format}\n\n"
for n in range(num_characters):
    character_prompt += f"{n+1}. Character Portrait:\n"
    character_response = complete(f"{character_prompt}\n\n{n+1}. Character Portrait:\n", stop=["."])
    character_prompt += f"{character_response}\n\n"
    characters += f"{n+1}. Character Portrait:\n{character_response.strip()}.\n\n"
print(characters)

# plot
plot_prompt = f"{premise}\n\n{setting}\n\n{characters}Outline the main plot points of the story.\n1."
plot_response = complete(plot_prompt)
plot = f"{plot_response}"
print("Outline the main plot points of the story\n\n1. ", plot)

# entire planning
print(f"{premise}\n\n{setting}\n\n{characters}\n\nOutline the main plot points of the story\n\n1. {plot}")

In [None]:
import re

plot_points_raw = plot.split('\n')
plot_points = []

for pp in plot_points_raw:
    if pp == '':
        continue
    else:
        stripped_pp = re.sub(r'^\d+\.\s*', '', pp)
        plot_points.append(stripped_pp)

print(plot_points)

In [None]:
drafts = []

for idx in range(len(plot_points)):
    if idx == 0:
        previous = ""
        immediately = ""
    else:
        previous = f"Previous story summary:\n{plot_points[idx-1]}"
        immediately = f"Immediately before the current passage:\n{drafts[idx-1]}"

    upcoming = f"In the upcoming passage,\n{plot_points[idx]}"

    draft_prompt = f"Relevant context:\n{characters}{previous}\n\n{immediately}\n\n{upcoming}\n\nFull text below:"

    draft_response = complete(draft_prompt)
    draft_response = draft_response.strip()

    print(f"Plot Point {idx+1}\n-----\n", draft_prompt, f"\n{draft_response}\n-----\n\n")
    drafts.append(draft_response)

print("".join(drafts))

In [None]:
# Revision
# Progressive extraction
facts = []
edits = []
for idx in range(1, len(drafts)):
    # Old Facts
    facts_prompt = f"What facts can be inferred from this text?\n\nText:\n{drafts[idx-1]}\n\nFacts:\n-"
    facts_response = complete(facts_prompt)
    facts_response = facts_response.strip()
    facts.append(facts_response)

    # New Facts
    facts_prompt = f"What facts can be inferred from this text?\n\nText:\n{drafts[idx]}\n\nFacts:\n-"
    facts_response = complete(facts_prompt)
    facts_response = facts_response.strip()

    # Consistency, also input true or false
    joined_facts = "\n".join(facts)
    consistency_prompt = f"Are any New Facts inconsistent with Old Facts?:\n\Old facts:\n{joined_facts}\n\nNew facts:{facts_response}\n\nInconsistencies:\n-"
    consistency_response = complete(consistency_prompt)

    # Rewriting, can be in a while loop with consistency until consistent
    edit_prompt = f"Edit so that any inconsistencies are addressed:\nInconsistencies:{consistency_response}\n\nText:{drafts[idx]}\n\nRewritten Text:"
    edit_response = complete(edit_prompt)
    edit_response = edit_response.strip()
    edits.append(edit_response)

    print(f"Draft Text {idx+1}\n-----\n", drafts[idx], f"\n\nOld Facts:\n{joined_facts}", f"\n\nNew Facts:\n{facts_response}", f"\n\nConsistency:\n{consistency_response}", f"\n\nRewritten Text:\n{edit_response}\n-----\n\n")

### 5 - Vector Database with Faiss, an open source local library

In [None]:
def get_vector_embeddings(text, client = client):
    response = client.embeddings.create(
        input=text,
        model="text-embedding-ada-002"
    )
    embeddings = response.data[0].embedding
    return embeddings

get_vector_embeddings("Your text string goes here")

In [None]:
import re

def chunk_text(text, window_size=5, stride=2):
    # Tokenize the text into sentences
    sentences = re.split('(?<=[.!?]) +', text.strip())

    # Initialize the list of chunks
    chunks = []

    # Slide the window over the sentences
    for i in range(0, len(sentences) - window_size + 1, stride):
        # Add the current window to the chunks
        chunks.append(' '.join(sentences[i:i + window_size]))

    return chunks

text = """
From Adam West to Robert Pattinson: this is how Batman has changed over the years
Batman is one of the most recognizable superheroes in pop culture, starting with his first appearance in Detective Comics #27 in 1939. He has been listed as one of the most iconic characters in popular culture, and among the greatest comic book superheroes and fictional characters ever created. While Batman is sometimes paired with Superman,

Isabel Cara

00:08 / 00:20
Batman is one of the most recognizable superheroes in pop culture, starting with his first appearance in Detective Comics #27 in 1939. He has been listed as one of the most iconic characters in popular culture, and among the greatest comic book superheroes and fictional characters ever created.

While Batman is sometimes paired with Superman, Wonder Woman, and others from the Justice League, his stories have so much potential that they have been made into movies, tv shows, videogames, toy lines, radio, even musical theatre.

You might find interesting: ‘The Batman’: Villains who would be exceptional candidates for the sequel

ADVERTISING

So, let’s talk about how Batman, in his live-action version, has changed through the years:

The Batman series (1966-1968), played by Adam West
V2qn2wliqvchxfxv6w56zqm6se - from adam west to robert pattinson: this is how batman has changed over the years
This tv show (and a movie in 1966) portrays the character in a camp aesthetic, meaning that it could be appealing because of its bad taste and ironic value (like those “so bad it’s good” movies), but they camp things can also be described as “cheesy”.

This aesthetic was associated with Batman for years after the show ended, and comic creators worked hard to return the character to his darker roots in the decades that followed, culminating in The Dark Knight Returns (1986) by Frank Miller.


Nevertheless, this Batman holds a special place in every fan because references to this show have appeared in almost every Batman movie made after. With references appearing in media that is not related to the character, like the movie Ready Player One, and shows like The Simpsons, SpongeBob SquarePants, and The Fairly OddParents.

While it might not be considered the best version of Batman, it’s the Batman that opened the doors to what we have now.

Batman (1989), played by Michael Keaton
6aheuzaikfealdjwgzy6366xt4 - from adam west to robert pattinson: this is how batman has changed over the years
This was a groundbreaking movie directed by Tim Burton, and starring Jack Nicholson, Michael Keaton, Kim Basinger, Robert Wuhl, Pat Hingle, Billy Dee Williams, Michael Gough, and Jack Palance.

The movie was successful critically and financially, making it the 5th highest-grossing film in history (at the time of its release).

It inspired Batman: The Animated Series (1992-1995), paving the way for the DC Animated Universe. The film has also influenced Hollywood’s modern marketing and development techniques of the superhero film genre.

It created the “Batmania” pop-culture phenomenon and made Batman cool again.

Batman Forever (1995), played by Val Kilmer
47zcqedjfrerpgasyogbpwjsvu - from adam west to robert pattinson: this is how batman has changed over the years
This movie strayed away from the aesthetic of Tim Burton’s Batman, making it more colorful and over-the-top, especially with Jim Carey’s Ridley and Tommy Lee Jones’s Two-Face.

It received mixed reviews from critics, but was a box office success, becoming the 6th highest-grossing film worldwide in 1995. Nevertheless, this movie tends to be ignored currently, with many fans not even remembering that Val Kilmer was Batman.

Batman & Robin (1997), played by George Clooney
3bulsal75ncwzny3xvd2nz33le - from adam west to robert pattinson: this is how batman has changed over the years
This film is regarded as the worst Batman movie and even getting spots as one of the worst movies ever made.


It has become infamous because of the neon design, the Batnipples, and just being a franchise-killing movie (after being considered a box office disappointment, Warner Bros. decided to cancel future Batman films, like Schumacher’s -the director of this movie and Batman Forever- Batman Unchained).

It’s not even considered by fans “so bad it’s good”, and it’s almost always skipped in rewatches of the films.

Batman Begins (2005), played by Christian Bale
Doggryf3mbfklnwfyzr2t7vyoi - from adam west to robert pattinson: this is how batman has changed over the years
The most iconic Batman for the modern audience. Christopher Nolan’s work makes it one of the best superhero movies of all time.

It rebooted the Batman film series, giving it a darker and more realistic tone compared to previous films (especially Schumacher’s job). The film wanted to create an emotional connection with the audience and the Batman and Bruce Wayne identities.

The film became the 9th highest-grossing film of 2005 and was followed by The Dark Knight (2008), and The Dark Knight Rises (2012). Creating The Dark Knight Trilogy, and the reason many DC fans say that DC movies are better than Marvel Films.

Batman v Superman: Dawn of Justice (2016), played by Ben Affleck
R6tzxansdngnra5ee5izdeocva - from adam west to robert pattinson: this is how batman has changed over the years
Affleck is the only actor (so far) to not get his solo movie, and the possibilities of it happening are uncertain, maybe even impossible.

Nevertheless, his role is different from the other Batmans, with him being older and jaded because of all the deaths and destruction he has seen but being hopeful in humanity among the member of the Justice League.

He feels like Batman and Bruce are the same guys, and it really is a shame we couldn’t spend more time with him.

Gotham (2014-2019), played by David Mazouz
Qf7qau37anf75nknc5wwobmnkm - from adam west to robert pattinson: this is how batman has changed over the years
Wait, who? Well… yeah.

The show started following James Gordon in his early days at the Gotham City Police Department following the tragic deaths of Bruce’s parents and started telling the origin stories of some of the most infamous Batman villains.

During the fourth season, Bruce Wayne starts to fight crime as a masked vigilante, and in the fifth (and final) season it shows Bruce becoming Batman for the first time.


While not the most popular version of Batman (I didn’t even know the show had Batman in it until doing research for this article), it promises to give a good origin story, and a side of the character we haven’t seen before.

The Batman (2022), played by Robert Pattinson
Wu23mtvdhzbspep3rpmkrwm3fm - from adam west to robert pattinson: this is how batman has changed over the years
This film, directed by Matt Reeves, is considered a reboot of the Batman film franchise (after the studio didn’t know what to do with Ben Affleck’s version).

You might find interesting: Robert Pattinson’s must watch films from ‘Twilight’ to ‘The Batman’

It tells the story of a young Batman, who has only been fighting crime for two years and is uncovering corruption while pursuing the Riddler, a serial killer who targets Gotham’s elite.

Perhaps you have already seen it, or perhaps you’re like me and haven’t been able to find the time. Either way, the memes for this movie and the use of Nirvana’s Something in the Way have been incredible.
"""

chunks = chunk_text(text, window_size=4, stride=1)
print(len(chunks))
chunks

In [None]:
# pip install faiss-cpu     -> install faiss on cpu
import numpy as np
import faiss

# Get vector embeddings for the chunks
vectors = np.array([get_vector_embeddings(chunk) for chunk in chunks])
print(vectors.shape)

# Create a FAISS index
index = faiss.IndexFlatL2(vectors.shape[1])
index.add(vectors)

# Function to perform a vector search
def vector_search(query_text, k=3):
    query_vector = get_vector_embeddings(query_text)
    distances, indices = index.search(np.array([query_vector]), k)
    return [(chunks[i], float(dist)) for dist, i in zip(distances[0], indices[0])]

# Example search
search_results = vector_search("robert pattinson")
print("Search results for 'lorem ipsum':")
for result in search_results:
    print(result)


In [None]:
# Function to perform a vector search and then ask GPT-3.5-turbo a question
def search_and_chat(chat_prompt, k=3):
  # Perform the vector search
    search_results = vector_search(chat_prompt, k)
    print(f"Search results: {search_results}\n\n")

    prompt_with_context = f"""Context:{search_results}\
    Answer the question: {chat_prompt}"""

    # Create a list of messages for the chat
    messages = [
        {"role": "system", "content": "Please answer the questions provided by the user. Use only the context provided to you, if you don't know the answer say \"I don't know\"."},
        {"role": "user", "content": prompt_with_context},
    ]

    # Get the model's response
    response = client.chat.completions.create(model="gpt-3.5-turbo", messages=messages)

    # Print the assistant's reply
    print(f"Response: {response.choices[0].message.content}")

# Example search and chat
search_and_chat("What song is associated with Robert Pattinson's Batman?")