In [67]:
import pandas as pd
from openai import OpenAI
import ast
import numpy as np
import pdb
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

client = OpenAI(
    api_key = api_key
)

In [68]:
file = open('./giannis.txt')
text = file.read()

In [40]:
def chunk(txt):
    paragraphs = []
    splat = txt.split('\n\n')
    for line in splat:
        paragraphs.append(line)
    return paragraphs

In [41]:
chunks = pd.DataFrame(data=chunk(text), columns={'text': 'text'})

In [42]:
chunks.head(5)

Unnamed: 0,text
0,Jump to content
1,Main menu
2,Main menu move to sidebar hide
3,Navigation
4,Main page Contents Current events Random artic...


In [43]:
def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [44]:
chunks['embeddings'] = chunks['text'].apply(get_embedding)
chunks.to_csv('./giannis-with-embeddings', index=False)
chunks.to_pickle('./giannis-with-embeddings.pkl')

In [45]:
df = pd.read_pickle('./giannis-with-embeddings.pkl')
df.head()

Unnamed: 0,text,embeddings
0,Jump to content,"[0.009784935042262077, 0.016955072060227394, -..."
1,Main menu,"[-0.016432739794254303, 0.05875169113278389, 0..."
2,Main menu move to sidebar hide,"[-0.04941752180457115, 0.015163028612732887, -..."
3,Navigation,"[-0.025431305170059204, 0.04099149629473686, 0..."
4,Main page Contents Current events Random artic...,"[0.032835181802511215, 0.01653081551194191, 0...."


In [46]:
question = 'What did gianniss father do for work'

In [47]:
question_embedding = get_embedding(question)

def fn(page_embedding):
    return np.dot(page_embedding, question_embedding)

df['distance'] = df['embeddings'].apply(fn)
df.head()

Unnamed: 0,text,embeddings,distance
0,Jump to content,"[0.009784935042262077, 0.016955072060227394, -...",0.121148
1,Main menu,"[-0.016432739794254303, 0.05875169113278389, 0...",0.134117
2,Main menu move to sidebar hide,"[-0.04941752180457115, 0.015163028612732887, -...",0.094728
3,Navigation,"[-0.025431305170059204, 0.04099149629473686, 0...",0.155599
4,Main page Contents Current events Random artic...,"[0.032835181802511215, 0.01653081551194191, 0....",0.100018


In [48]:
df.sort_values('distance', ascending=False, inplace=True)
df.head()

Unnamed: 0,text,embeddings,distance
104,Early life As the son of immigrants from Niger...,"[0.0007357422146014869, 0.008205491118133068, ...",0.38066
312,"Following their son and brother, the entire An...","[-0.016756076365709305, 0.029105279594659805, ...",0.367623
310,Personal life Giannis and Thanasis Antetokounm...,"[0.024478189647197723, 0.0037096617743372917, ...",0.355506
550,"^ ""Giannis is a dad! Bucks star and girlfriend...","[-0.018812131136655807, 0.022889845073223114, ...",0.32018
350,"^ """"Giannis"", Behind the Name"". Archived from ...","[-0.0028413839172571898, 0.016048384830355644,...",0.316122


In [49]:
context = df['text'].iloc[0] + "\n" + df['text'].iloc[1] + df['text'].iloc[2] + df['text'].iloc[4]
print(context)

Early life As the son of immigrants from Nigeria,[12][13][14] Giannis Sina Ugo Adétòkunbọ̀[15][16] was born in Athens, Greece, on December 6, 1994.[17] His father was a soccer player and his mother was a high jumper in Nigeria. Three years earlier, they had moved from Lagos, leaving their firstborn son, Francis, under the care of his grandparents.[18] Adétòkunbọ̀ grew up in the Athens neighborhood of Sepolia[14] in a primarily Nigerian household.[19] His parents, as immigrants without work permits,[20] could not easily find work, so Giannis and his older brother Thanasis helped by hawking watches, handbags, and sunglasses in the streets.[21] In 2007, Adétòkunbọ̀ started playing basketball.[22]
Following their son and brother, the entire Antetokounmpo family, except for Francis and Thanasis, moved from Athens to Milwaukee in early 2014.[220] In July 2016, Giannis and Thanasis began their mandatory military service in Greece. The two brothers served a reduced three-month military service

In [62]:
response = client.chat.completions.create(
    model='gpt-3.5-turbo',
    messages=[
        {'role': 'system', 'content': 'You are an assistant who answers questions about Giannis Antetokounmpo. If the question does not involve Giannis Antetokounmpo, you should not answer it.'},
        {'role': 'user', 'content': question},
        {'role': 'assistant', 'content': f"Use this information from the Giannis Antetokounmpo Wikipedia to answer the question: {context}. Please stick to this context while answering the question"},
    ]
)

In [52]:
response.choices[0].message.content

"Giannis Antetokounmpo's father, Charles, was a Nigerian football player."

In [63]:
def query(question):
    response = client.chat.completions.create(
    model='gpt-3.5-turbo',
    messages=[
        {'role': 'system', 'content': 'You are an assistant who answers questions about Giannis Antetokounmpo. If the question does not involve Giannis Antetokounmpo, you should not answer it.'},
        {'role': 'user', 'content': question},
        {'role': 'assistant', 'content': f"Use this information from the Giannis Antetokounmpo Wikipedia to answer the question: {context}. Please stick to this context while answering the question. If the answer is not explicityly stated in the context. Answer with I do not know"},
    ]
    )
    return response.choices[0].message.content

In [64]:
query("Who are giannis's brothers?")

'Giannis Antetokounmpo has four brothers: Francis, Thanasis, Kostas, and Alex.'

In [60]:
query("What is giannis's ppg")

"I do not know Giannis Antetokounmpo's PPG."

In [61]:
query("Tell me about gianis's ethnicity")

"Giannis Antetokounmpo's ethnicity is of Nigerian descent. His parents are from different Nigerian ethnic groups, with his father being Yoruba and his mother being Igbo."