In [33]:
# %pip install --upgrade openai

In [1]:
from dotenv import load_dotenv
load_dotenv()

import os
from openai import OpenAI
from gpt import (
    create_file_search_assistant,
    upload_file_to_vectore_store,
    assistant_use_vector_store,
    create_thread,
)

In [4]:
MODEL = "gpt-4o"
FILE = "gan.pdf"
client = OpenAI(
    api_key=os.environ.get("API_KEY")
)

In [5]:
assistant = create_file_search_assistant(
    client=client,
    model=MODEL,
    assistant_name="Machine learning Assistant",
    instructions="You are an machine learning expert. Use you knowledge base to extract and summarize information about audited machine learning papers",
)

create_file_search_assistant took 0.35389184951782227 seconds


In [6]:
vector_store = upload_file_to_vectore_store(
    client=client,
    vector_store_name="Machine learning papers",
    file_paths=[FILE],
)

completed
FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)
upload_file_to_vectore_store took 3.4466092586517334 seconds


In [7]:
assistant = assistant_use_vector_store(
    client=client,
    assistant=assistant,
    vector_store=vector_store,
)

assistant_use_vector_store took 0.6176433563232422 seconds


In [8]:
thread = create_thread(
    client=client,
    message_content="Make detailed summary of the paper and explain the maths formulas",
)

None
create_thread took 0.3602719306945801 seconds


In [9]:
run = client.beta.threads.runs.create_and_poll(
    thread_id=thread.id, assistant_id=assistant.id
)

messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))

message_content = messages[0].content[0].text
annotations = message_content.annotations
citations = []
for index, annotation in enumerate(annotations):
    message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
    if file_citation := getattr(annotation, "file_citation", None):
        cited_file = client.files.retrieve(file_citation.file_id)
        citations.append(f"[{index}] {cited_file.filename}")

print(message_content.value)
print("="*50)
print("\n".join(citations))

### Detailed Summary of the Paper on Generative Adversarial Networks (GANs)

#### 1. Introduction
The paper introduces a novel framework called Generative Adversarial Networks (GANs) for estimating generative models via an adversarial process. This process simultaneously trains two models: a generative model (G) to capture the data distribution and a discriminative model (D) to estimate the probability of a sample coming from the training data rather than G .

#### 2. Adversarial Framework
The adversarial model framework involves a game between G and D. The generator transforms noise \( z \) (sampled from a distribution \( p_z(z) \)) into data space using a function \( G(z; \theta_g) \) represented by multilayer perceptrons. The discriminator \( D(x; \theta_d) \), also a multilayer perceptron, distinguishes between real data and generated data by outputting a probability .

The objective is for \( D \) to maximize its success in distinguishing between real and fake data, while \( G \) 