In [5]:
# ! pip install -q azure-storage-blob azure-identity
# ! pip install -q pypdf2

# test 1

In [1]:
import os
from openai import AzureOpenAI
    
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-05-01-preview",
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )

assistant = client.beta.assistants.create(
  name="Financial Analyst Assistant",
  instructions="""
    As a Financial Analyst, you will leverage your expertise to generate tailored Financial Analysis Reports that cater to the specific requirements of clients. This role involves in-depth analysis of financial statements and market data to uncover insights into a company's financial performance and stability.
You will engage directly with clients to gather essential information and refine the report based on their feedback, ensuring that the final product precisely meets their needs and expectations.

Key Objectives:

Analytical Precision: Utilize analytical skills to interpret financial data, identify trends, and detect anomalies.
Effective Communication: Simplify and convey complex financial information in a clear and actionable manner for non-specialist audiences.
Client Focus: Tailor reports dynamically based on client feedback, aligning the analysis with their strategic goals.
Quality Assurance: Maintain the highest standards of quality and integrity in report generation, adhering to established benchmarks for analytical rigor.
Performance Indicators:

The effectiveness of the Financial Analysis Report is measured by its ability to provide actionable insights that support corporate decision-making, identify areas for operational improvement, and evaluate the company's financial health. Success is reflected in the report's contribution to informed investment decisions and strategic planning.
""",
  model=os.getenv("DEPLOYMENT_NAME"),
  # tools=[{"type": "file_search"}],
  temperature=0
)

In [3]:
# Create a vector store called "Financial Statements"
vector_store = client.beta.vector_stores.create(name="Financial Statements")

In [4]:
# Ready the files for upload to OpenAI
# file_paths = ["mydirectory/myfile1.pdf", "mydirectory/myfile2.txt"]
# file_streams = [open(path, "rb") for path in file_paths]

from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient

In [5]:
# connect_str = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
# container_client = ContainerClient.from_connection_string(
#     conn_str=connect_str,
#     container_name="earningcalltranscriptscontainer"
# )
# # List the blobs in the container
# import io
# blob_list = container_client.list_blobs()
# file_streams = [container_client.download_blob(blob).readall() for blob in blob_list]

In [None]:
# Configure Azure Storage connection
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
blob_service_client = BlobServiceClient.from_connection_string(connection_string)


# Define container and file names in Azure Storage
container_name = "earningcalltranscriptscontainer"  # Replace with your container name
container_client = ContainerClient.from_connection_string(
    conn_str=connection_string,
    container_name="earningcalltranscriptscontainer"
)
file_names = container_client.list_blobs()  # Replace with your file names

# Prepare file streams from Azure Storage blobs
file_streams = []
for file_name in file_names:
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=file_name)
    downloaded_blob = blob_client.download_blob()
    file_streams.append(downloaded_blob.content_as_bytes())  # Adjust encoding if needed

file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)

In [13]:
# Configure Azure Storage connection
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
blob_service_client = BlobServiceClient.from_connection_string(connection_string)

# Define container name in Azure Storage
container_name = "earningcalltranscriptscontainer"  # Replace with your container name

# Get a list of blobs with their complete names (including extension)
blob_client = blob_service_client.get_container_client(container=container_name)
blobs = blob_client.list_blobs()

# Prepare file streams from Azure Storage blobs with extension check
file_streams = []
for blob in blobs:
    # Check if blob has an extension (avoid empty strings)
    if blob.name and "." in blob.name:
        blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob.name)
        downloaded_blob = blob_client.download_blob()
        file_streams.append(downloaded_blob.content_as_bytes())  # Adjust encoding if needed

# Upload the files with extensions
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)

BadRequestError: Error code: 400 - {'error': {'message': 'Files with extensions [none] are not supported for retrieval. Supported file types include: [.c, .cpp, .cs, .css, .doc, .docx, .html, .java, .js, .json, .md, .pdf, .php, .pptx, .py, .rb, .sh, .tex, .ts, .txt].', 'type': 'invalid_request_error', 'param': 'file_ids', 'code': 'unsupported_file'}}

# Test 2

In [2]:
import os
from openai import AzureOpenAI
    
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-05-01-preview",
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )

assistant = client.beta.assistants.create(
  name="Financial Analyst Assistant",
  instructions="""
    As a Financial Analyst, you will leverage your expertise to generate tailored Financial Analysis Reports that cater to the specific requirements of clients. This role involves in-depth analysis of financial statements and market data to uncover insights into a company's financial performance and stability.
You will engage directly with clients to gather essential information and refine the report based on their feedback, ensuring that the final product precisely meets their needs and expectations.

Key Objectives:

Analytical Precision: Utilize analytical skills to interpret financial data, identify trends, and detect anomalies.
Effective Communication: Simplify and convey complex financial information in a clear and actionable manner for non-specialist audiences.
Client Focus: Tailor reports dynamically based on client feedback, aligning the analysis with their strategic goals.
Quality Assurance: Maintain the highest standards of quality and integrity in report generation, adhering to established benchmarks for analytical rigor.
Performance Indicators:

The effectiveness of the Financial Analysis Report is measured by its ability to provide actionable insights that support corporate decision-making, identify areas for operational improvement, and evaluate the company's financial health. Success is reflected in the report's contribution to informed investment decisions and strategic planning.
""",
  model=os.getenv("DEPLOYMENT_NAME"),
  # tools=[{"type": "file_search"}],
  temperature=0
)

In [3]:
# Create a thread
thread = client.beta.threads.create()
print(thread)

Thread(id='thread_siWas63FZiUU6OzYqwT5teT3', created_at=1724069111, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=None, file_search=None))


In [13]:
# Add a user question to the thread
import PyPDF2

def extract_text_from_pdf(pdf_path):
    """
    Extract text from a PDF file.

    Args:
    pdf_path (str): Path to the PDF file.

    Returns:
    str: Extracted text from the PDF.
    """
    text = ""
    with open(pdf_path, "rb") as file:
        pdf_reader = PyPDF2.PdfReader(file)
        for page in pdf_reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text
    return text

input_context = extract_text_from_pdf("company/VLO-3Q23-Earnings-Release.pdf")
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=f"""The report will include the following sections and I want to extract the capital expenditure and other metric like 

Operating Activities "Change in Working Capital",
        "Net Cash from Operating Activities"
Investing Activities has "Acquisition of Fixed Assets & Intangibles",
        "Net Cash from Investing Activities"
Financing Activities includes "Dividends Paid",
        "Cash from (Repayment of) Debt",
        "Net Cash from Financing Activities",
Net Change includes  Net Change in Cash
Metadata includes "Report Date", "Publish Date", "Source"
Profitability Metrics like "EBITDA",
        "Gross Profit Margin",
        "Operating Margin",
        "Net Profit Margin",
        "Return on Equity",
        "Return on Assets",
        "Return On Invested Capital",
Liquidity Metrics include Current Ratio
Solvency Metrics like  "Total Debt", "Liabilities to Equity Ratio", "Debt Ratio",
Cash Flow Metrics like "Free Cash Flow", "Free Cash Flow to Net Income", "Cash Return On Invested Capital",
Other Important Metrics like "Piotroski F-Score", "Net Debt / EBITDA", "Dividend Payout Ratio
Capital Expenditure    

```Document
{input_context}
```
"""
)

In [14]:
thread_messages = client.beta.threads.messages.list(thread.id)
print(thread_messages.model_dump_json(indent=2))

{
  "data": [
    {
      "id": "msg_OWP3LQ96N8CjQIbouxmmwRjL",
      "assistant_id": null,
      "attachments": [],
      "completed_at": null,
      "content": [
        {
          "text": {
            "annotations": [],
            "value": "The report will include the following sections and I want to extract the capital expenditure and other metric like \n\nOperating Activities \"Change in Working Capital\",\n        \"Net Cash from Operating Activities\"\nInvesting Activities has \"Acquisition of Fixed Assets & Intangibles\",\n        \"Net Cash from Investing Activities\"\nFinancing Activities includes \"Dividends Paid\",\n        \"Cash from (Repayment of) Debt\",\n        \"Net Cash from Financing Activities\",\nNet Change includes  Net Change in Cash\nMetadata includes \"Report Date\", \"Publish Date\", \"Source\"\nProfitability Metrics like \"EBITDA\",\n        \"Gross Profit Margin\",\n        \"Operating Margin\",\n        \"Net Profit Margin\",\n        \"Return on Equit

In [15]:
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id,
  #instructions="New instructions" #You can optionally provide new instructions but these will override the default instructions
)

In [16]:
import time
from IPython.display import clear_output

start_time = time.time()

status = run.status

while status not in ["completed", "cancelled", "expired", "failed"]:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(thread_id=thread.id,run_id=run.id)
    print("Elapsed time: {} minutes {} seconds".format(int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)))
    status = run.status
    print(f'Status: {status}')
    clear_output(wait=True)

messages = client.beta.threads.messages.list(
  thread_id=thread.id
) 

print(f'Status: {status}')
print("Elapsed time: {} minutes {} seconds".format(int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)))
print(messages.model_dump_json(indent=2))

Status: completed
Elapsed time: 0 minutes 10 seconds
{
  "data": [
    {
      "id": "msg_vbB3Oi84WpofApMIo0zcE5Pl",
      "assistant_id": "asst_iUD5GjoAo7DvMWPZXTPt5jDd",
      "attachments": [],
      "completed_at": null,
      "content": [
        {
          "text": {
            "annotations": [],
            "value": "Financial Analysis Report: Valero Energy Corporation - Q3 2023\n\nReport Date: October 26, 2023\nPublish Date: [Insert Publish Date]\nSource: Valero Energy Corporation\n\n1. Executive Summary:\n- Valero Energy Corporation reported net income attributable to Valero stockholders of $2.6 billion, or $7.49 per share, for the third quarter of 2023.\n- The company returned $2.2 billion to stockholders through dividends and stock buybacks.\n\n2. Financial Performance:\n- Refining segment operating income: $3.4 billion in Q3 2023, compared to $3.8 billion in Q3 2022.\n- Renewable Diesel segment operating income: $123 million in Q3 2023, compared to $212 million in Q3 2022.

In [17]:
messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

print(messages.model_dump_json(indent=2))

{
  "data": [
    {
      "id": "msg_vbB3Oi84WpofApMIo0zcE5Pl",
      "assistant_id": "asst_iUD5GjoAo7DvMWPZXTPt5jDd",
      "attachments": [],
      "completed_at": null,
      "content": [
        {
          "text": {
            "annotations": [],
            "value": "Financial Analysis Report: Valero Energy Corporation - Q3 2023\n\nReport Date: October 26, 2023\nPublish Date: [Insert Publish Date]\nSource: Valero Energy Corporation\n\n1. Executive Summary:\n- Valero Energy Corporation reported net income attributable to Valero stockholders of $2.6 billion, or $7.49 per share, for the third quarter of 2023.\n- The company returned $2.2 billion to stockholders through dividends and stock buybacks.\n\n2. Financial Performance:\n- Refining segment operating income: $3.4 billion in Q3 2023, compared to $3.8 billion in Q3 2022.\n- Renewable Diesel segment operating income: $123 million in Q3 2023, compared to $212 million in Q3 2022.\n- Ethanol segment operating income: $197 million in

In [18]:
import json
data = json.loads(messages.model_dump_json(indent=2))  # Load JSON data into a Python object
op = data['data'][0]['content'][0]['text']['value']

print(op)  # Outputs: assistant-1YGVTvNzc2JXajI5JU9F0HMD

Financial Analysis Report: Valero Energy Corporation - Q3 2023

Report Date: October 26, 2023
Publish Date: [Insert Publish Date]
Source: Valero Energy Corporation

1. Executive Summary:
- Valero Energy Corporation reported net income attributable to Valero stockholders of $2.6 billion, or $7.49 per share, for the third quarter of 2023.
- The company returned $2.2 billion to stockholders through dividends and stock buybacks.

2. Financial Performance:
- Refining segment operating income: $3.4 billion in Q3 2023, compared to $3.8 billion in Q3 2022.
- Renewable Diesel segment operating income: $123 million in Q3 2023, compared to $212 million in Q3 2022.
- Ethanol segment operating income: $197 million in Q3 2023, compared to $1 million in Q3 2022.
- General and administrative expenses: $250 million in Q3 2023, compared to $214 million in Q3 2022.

3. Cash Flow Analysis:
- Net cash provided by operating activities: $3.3 billion in Q3 2023.
- Change in working capital: $33 million favora

In [20]:
"""

This prompt focuses on extracting the key financial data points with clarity and precision.

{context}
"""
# Add a new user question to the thread
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="""Please provide the  company name, year and quarter, capital expenditure value in billions of US dollars in JSON format"""
)

messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

print(messages.model_dump_json(indent=2))

data = json.loads(messages.model_dump_json(indent=2))
json_code = data['data'][0]['content'][0]['text']['value']
print(json_code)

{
  "data": [
    {
      "id": "msg_OC3jRZetSITMC6ZLykGBe0QZ",
      "assistant_id": null,
      "attachments": [],
      "completed_at": null,
      "content": [
        {
          "text": {
            "annotations": [],
            "value": "Please provide the  company name, year and quarter, capital expenditure value in billions of US dollars in JSON format"
          },
          "type": "text"
        }
      ],
      "created_at": 1724071833,
      "incomplete_at": null,
      "incomplete_details": null,
      "metadata": {},
      "object": "thread.message",
      "role": "user",
      "run_id": null,
      "status": null,
      "thread_id": "thread_br6TfEpA7TWLlp9Dqumu3t2G"
    },
    {
      "id": "msg_OLkxYwpwfwGYrADyaKSAIvek",
      "assistant_id": "asst_ZKLWBYwCpdnwPYUmuL8HN9Hn",
      "attachments": [],
      "completed_at": null,
      "content": [
        {
          "text": {
            "annotations": [],
            "value": "Financial Analysis Report - Valero Ener

# Test 3

In [21]:
import os
import PyPDF2
import time
import json
from IPython.display import clear_output
from openai import AzureOpenAI
    
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2024-05-01-preview",
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
    )
file_path = "company/VLO-3Q23-Earnings-Release.pdf"
assistant = client.beta.assistants.create(
  name="Financial Analyst Assistant",
  instructions="""
    As a Financial Analyst, you will leverage your expertise to generate tailored Financial Analysis Reports that cater to the specific requirements of clients. This role involves in-depth analysis of financial statements and market data to uncover insights into a company's financial performance and stability.
You will engage directly with clients to gather essential information and refine the report based on their feedback, ensuring that the final product precisely meets their needs and expectations.

Key Objectives:

Analytical Precision: Utilize analytical skills to interpret financial data, identify trends, and detect anomalies.
Effective Communication: Simplify and convey complex financial information in a clear and actionable manner for non-specialist audiences.
Client Focus: Tailor reports dynamically based on client feedback, aligning the analysis with their strategic goals.
Quality Assurance: Maintain the highest standards of quality and integrity in report generation, adhering to established benchmarks for analytical rigor.
Performance Indicators:

The effectiveness of the Financial Analysis Report is measured by its ability to provide actionable insights that support corporate decision-making, identify areas for operational improvement, and evaluate the company's financial health. Success is reflected in the report's contribution to informed investment decisions and strategic planning.
""",
  model=os.getenv("DEPLOYMENT_NAME"),
  # tools=[{"type": "file_search"}],
  temperature=0
)

# Create a thread
thread = client.beta.threads.create()


def extract_text_from_pdf(pdf_path):
    """
    Extract text from a PDF file.

    Args:
    pdf_path (str): Path to the PDF file.

    Returns:
    str: Extracted text from the PDF.
    """
    text = ""
    with open(pdf_path, "rb") as file:
        pdf_reader = PyPDF2.PdfReader(file)
        for page in pdf_reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text
    return text

input_context = extract_text_from_pdf(file_path)

message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content=f"""The report will include the following sections and I want to extract the capital expenditure and other metric like 

Operating Activities "Change in Working Capital",
        "Net Cash from Operating Activities"
Investing Activities has "Acquisition of Fixed Assets & Intangibles",
        "Net Cash from Investing Activities"
Financing Activities includes "Dividends Paid",
        "Cash from (Repayment of) Debt",
        "Net Cash from Financing Activities",
Net Change includes  Net Change in Cash
Metadata includes "Report Date", "Publish Date", "Source"
Profitability Metrics like "EBITDA",
        "Gross Profit Margin",
        "Operating Margin",
        "Net Profit Margin",
        "Return on Equity",
        "Return on Assets",
        "Return On Invested Capital",
Liquidity Metrics include Current Ratio
Solvency Metrics like  "Total Debt", "Liabilities to Equity Ratio", "Debt Ratio",
Cash Flow Metrics like "Free Cash Flow", "Free Cash Flow to Net Income", "Cash Return On Invested Capital",
Other Important Metrics like "Piotroski F-Score", "Net Debt / EBITDA", "Dividend Payout Ratio
Capital Expenditure    

```Document
{input_context}
```
"""
)

thread_messages = client.beta.threads.messages.list(thread.id)

run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id,
  #instructions="New instructions" #You can optionally provide new instructions but these will override the default instructions
)



start_time = time.time()

status = run.status

while status not in ["completed", "cancelled", "expired", "failed"]:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(thread_id=thread.id,run_id=run.id)
    print("Elapsed time: {} minutes {} seconds".format(int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)))
    status = run.status
    print(f'Status: {status}')
    clear_output(wait=True)

messages = client.beta.threads.messages.list(
  thread_id=thread.id
) 

print(f'Status: {status}')
# print("Elapsed time: {} minutes {} seconds".format(int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)))
# print(messages.model_dump_json(indent=2))

messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

# print(messages.model_dump_json(indent=2))

data = json.loads(messages.model_dump_json(indent=2))  # Load JSON data into a Python object
op = data['data'][0]['content'][0]['text']['value']

print(op)  # Outputs: assistant-1YGVTvNzc2JXajI5JU9F0HMD

# Add a new user question to the thread
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="""Please provide the  company name, year and quarter, capital expenditure value in billions of US dollars in JSON format"""
)

messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

# print(messages.model_dump_json(indent=2))

data = json.loads(messages.model_dump_json(indent=2))
json_code = data['data'][0]['content'][0]['text']['value']
print(json_code)

Status: completed
Financial Analysis Report - Valero Energy Corporation

Report Date: October 26, 2023
Publish Date: [To be determined]
Source: Valero Energy Corporation

1. Executive Summary
- Valero Energy Corporation reported net income attributable to Valero stockholders of $2.6 billion, or $7.49 per share, for the third quarter of 2023.
- The Refining segment reported operating income of $3.4 billion, while the Renewable Diesel segment reported operating income of $123 million.
- The Ethanol segment reported operating income of $197 million.
- Valero returned $2.2 billion to stockholders through dividends and stock buybacks.

2. Financial Performance
- Profitability Metrics:
   - EBITDA: [Not provided]
   - Gross Profit Margin: [Not provided]
   - Operating Margin: [Not provided]
   - Net Profit Margin: [Not provided]
   - Return on Equity: [Not provided]
   - Return on Assets: [Not provided]
   - Return on Invested Capital: [Not provided]

3. Liquidity and Solvency
- Liquidity Me