In [None]:
!pip install openai

In [5]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from openai import OpenAI
from google.colab import userdata
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
import gspread
from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)

In [None]:
# OpenAI client initialization
client = OpenAI(api_key=userdata.get("chatgpt_api_key"))

# Create a new assistant
assistant = client.beta.assistants.create(
    name="Financial Analyst Assistant",
    instructions="You are an expert financial analyst. Use your knowledge base to answer questions about audited financial statements.",
    model="gpt-4-turbo",
    tools=[{"type": "file_search"}],
)

# Create a vector store for storing file contents
vector_store = client.beta.vector_stores.create(name="Financial Statements")

# Define a function to ask questions for a given file
def ask_questions_for_file(file_path, message_file):
    answers = {}
    for q in questions:
        try:
            # Create a thread with the question and attached file
            thread = client.beta.threads.create(
                messages=[
                    {
                        "role": "user",
                        "content": q,
                        "attachments": [
                            {"file_id": message_file.id, "tools": [{"type": "file_search"}]}
                        ],
                    }
                ]
            )

            # Run the thread and wait for the response
            run = client.beta.threads.runs.create_and_poll(
                thread_id=thread.id, assistant_id=assistant.id
            )

            # List messages received in response to the thread
            messages = list(
                client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id)
            )
            # If no response received, mark as such
            if not messages:
                answers[q] = "No response received"
                continue

            # Extract and store the answer
            message_content = messages[0].content[0].text
            answers[q] = message_content.value
        except Exception as e:
            # Handle any errors that occur
            answers[q] = f"Error occurred: {str(e)}"
    return answers


questions = [
    "What is the name of the company?",
    "What year is covered by the annual report?",
    "Who are the auditors of the company?",
    "Who are the directors of the company?",
    "What is the depreciation policy of the company?"
]

files_answers = {}
# Define the folder ID where PDF files are located
folder_id = userdata.get("folder_id")
# Retrieve list of files in the specified folder
file_list = drive.ListFile({'q': f"'{folder_id}' in parents and trashed=false"}).GetList()

# Iterate over each file in the folder
for file in file_list:
    # Process only PDF files
    if file['mimeType'] == 'application/pdf':
        print(f'Downloading file: {file["title"]}')
        # Download the PDF file
        file.GetContentFile(file['title'])
        pdf_filename = file['title']
        # Open the PDF file for reading
        file_stream = open(pdf_filename, "rb")
        # Upload the PDF file to the vector store
        file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
            vector_store_id=vector_store.id, files=[file_stream]
        )
        # Create a message file for the assistant
        message_file = client.files.create(
            file=open(pdf_filename, "rb"), purpose="assistants"
        )

        # Ask questions about the file and store answers
        files_answers[pdf_filename] = ask_questions_for_file(pdf_filename, message_file)

# Print all answers collected
for filename, answers in files_answers.items():
    print(f"Answers for file: {filename}")
    for question, answer in answers.items():
        print(f"Question: {question}")
        print(f"Answer: {answer}")
    print("\n")

# Print the complete dictionary of answers for all files
print(files_answers)

In [None]:
# Get the sheet ID from the userdata
sheet_id = userdata.get("sheet_id")
# Open the Google Sheet using its ID
sh = gc.open_by_key(sheet_id)

In [None]:
worksheet = sh.sheet1
row = []
header = worksheet.row_values(1)
worksheet.clear()
# Clear the worksheet, effectively removing all existing rows
worksheet.append_row(header)
# Iterate over each file and its corresponding answers
for filename, answers in files_answers.items():
  row.append(filename)
  for question, answer in answers.items():
    row.append(answer)
  worksheet.append_row(row)
  row = []