In [7]:
from datasets import load_dataset
from openai import OpenAI
import openai
from pinecone import Pinecone, ServerlessSpec
from tqdm.auto import tqdm
from DLAIUtils import Utils

import ast
import os
import pandas as pd
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

openai.api_key = os.environ['OPENAI_API_KEY']
pinecone = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
INDEX_NAME = f'dl-ai-{openai.api_key[-36:].lower().replace("_", "-")}'

if INDEX_NAME in [index.name for index in pinecone.list_indexes()]:
    pinecone.delete_index(INDEX_NAME)

pinecone.create_index(name=INDEX_NAME, dimension=1536, metric='cosine', spec = ServerlessSpec(cloud='aws', region='us-west-2'))

In [8]:
index = pinecone.Index(INDEX_NAME)
INDEX_NAME

'dl-ai-wty3wlpwt3blbkfj7btiegivd06tc0qqszqx'

In [10]:
df = pd.read_csv("data/wiki.csv")
df

Unnamed: 0,id,metadata,values
1,1-0,"{'chunk': 0, 'source': 'https://simple.wikiped...","[-0.011254455894231796, -0.01698738895356655, ..."
2,1-1,"{'chunk': 1, 'source': 'https://simple.wikiped...","[-0.0015197008615359664, -0.007858820259571075..."
3,1-2,"{'chunk': 2, 'source': 'https://simple.wikiped...","[-0.009930099360644817, -0.012211072258651257,..."
4,1-3,"{'chunk': 3, 'source': 'https://simple.wikiped...","[-0.011600767262279987, -0.012608098797500134,..."
5,1-4,"{'chunk': 4, 'source': 'https://simple.wikiped...","[-0.026462381705641747, -0.016362832859158516,..."
...,...,...,...
9996,9273-14,"{'chunk': 14, 'source': 'https://simple.wikipe...","[0.006269281730055809, -0.007062565069645643, ..."
9997,9273-15,"{'chunk': 15, 'source': 'https://simple.wikipe...","[-0.007164978422224522, -0.0002860440290533006..."
9998,9273-16,"{'chunk': 16, 'source': 'https://simple.wikipe...","[0.001473232638090849, -0.024397650733590126, ..."
9999,9273-17,"{'chunk': 17, 'source': 'https://simple.wikipe...","[0.004176019225269556, -0.022336846217513084, ..."


In [16]:
prepped = []

for i, row in tqdm(df.iterrows(), total = df.shape[0]):
    meta = ast.literal_eval(row['metadata'])
    prepped.append({'id': row['id'],
                    'values': ast.literal_eval(row['values']),
                    'metadata': meta})
    if len(prepped) >= 250:
        index.upsert(prepped)
        prepped = []

  0%|          | 0/10000 [00:00<?, ?it/s]

In [17]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 10000}},
 'total_vector_count': 10000}

In [19]:
openai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

def get_embeddings(articles, model="text-embedding-ada-002"):
    return openai_client.embeddings.create(input=articles, model=model)

query = "what is the berlin wall?"

embed = get_embeddings(query)

res = index.query(vector = embed.data[0].embedding, top_k=3, include_metadata=True)
text = [r['metadata']['text'] for r in res['matches']]
print("\n".join(text))

Egon Krenz was elected by the politburo to be Honecker's successor. Krenz tried to show that he was looking for change within the GDR but the citizens did not trust him. On November 9, 1989, the SED announced that East Germans would be able to travel to West Berlin the next day. The spokesman who announced the new travel law incorrectly said that it would take effect immediately, implying the Berlin Wall would open that night. People began to gather at border checkpoints at the wall hoping to be let through, but the guards told them that they had no orders to let citizens through. As the number of people grew, the guards became alarmed and tried to contact their superiors but had no responses. Unwilling to use force, the chief guard at the checkpoint relented at 10:54pm and ordered the gate to be opened. Thousands of East-Germans swarmed into West Berlin and the purpose of the wall was deemed now obsolete. The fall of the wall destroyed the SED politically as well as the career of its 

In [20]:
query = "write an article titled: what is the berlin wall?"

embed = get_embeddings([query])
res = index.query(vector=embed.data[0].embedding, top_k=3, include_metadata=True)

contexts = [x['metadata']['text'] for x in res['matches']]

prompt_start = (
    "Answer the question based on the context below.\n\n"+
    "Context:\n"
)

prompt_end = (
    f"\n\nQuestion: {query}\nAnswer:"
)

prompt = (
    prompt_start + "\n\n---\n\n".join(contexts) + 
    prompt_end
)

print(prompt)

Answer the question based on the context below.

Context:
Egon Krenz was elected by the politburo to be Honecker's successor. Krenz tried to show that he was looking for change within the GDR but the citizens did not trust him. On November 9, 1989, the SED announced that East Germans would be able to travel to West Berlin the next day. The spokesman who announced the new travel law incorrectly said that it would take effect immediately, implying the Berlin Wall would open that night. People began to gather at border checkpoints at the wall hoping to be let through, but the guards told them that they had no orders to let citizens through. As the number of people grew, the guards became alarmed and tried to contact their superiors but had no responses. Unwilling to use force, the chief guard at the checkpoint relented at 10:54pm and ordered the gate to be opened. Thousands of East-Germans swarmed into West Berlin and the purpose of the wall was deemed now obsolete. The fall of the wall d

In [21]:
res = openai_client.completions.create(
    model="gpt-3.5-turbo-instruct",
    prompt=prompt,
    temperature=0,
    max_tokens=636,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None
)
print('-' * 80)
print(res.choices[0].text)

--------------------------------------------------------------------------------

The Berlin Wall, also known as the "Iron Curtain," was a physical and ideological barrier that divided the city of Berlin from 1961 to 1989. It was built by the East German government to prevent its citizens from fleeing to the West, and it became a symbol of the Cold War and the division between communism and capitalism.

After World War II, Germany was split into four parts, with the western half occupied by the United States, United Kingdom, and France, and the eastern half occupied by the Soviet Union. The city of Berlin, located entirely within the eastern half, was also divided among the four countries. This division led to stark differences in political systems and living conditions between East and West Germany.

In 1961, the East German government, under the leadership of Walter Ulbricht, began constructing the Berlin Wall to prevent its citizens from escaping to the West. The wall was made up of