### Example Call through Python SDK

In [56]:
import os
import sys

from dotenv import load_dotenv

sys.path.append(".")
sys.path.append("..")
sys.path.append("../..")
load_dotenv()

from any_parser import AnyParser  

example_apikey = os.getenv("CAMBIO_API_KEY")

example_local_file = "./sample_data/test2.pdf"

op = AnyParser(example_apikey)


In [57]:
print("file/document extraction test:")
content_result = op.extract(example_local_file)
print(type(content_result))
print(content_result)

file/document extraction test:
<class 'list'>
['# Productivity and Business Processes\n\n## Overview\n\n| Investor Metrics                                                   | FY23 Q1   | FY23 Q2   | FY23 Q3   | FY23 Q4   | FY24 Q1   |\n|:-------------------------------------------------------------------|:----------|:----------|:----------|:----------|:----------|\n| Office Commercial products and cloud services revenue growth (y/y) | 7% / 13%  | 7% 14%    | 13% / 17% | 12% / 14% | 15% / 14% |\n| Office Consumer products and cloud services revenue growth (y/y)   | 7% 11%    | (2)% 3%   | 1% 4%     | 3% 6%     | 3% 4%     |\n| Office 365 Commercial seat growth (y/y)                            | 14%       | 12%       | 11%       | 11%       | 10%       |\n| Microsoft 365 Consumer subscribers (in millions)                   | 65.1      | 67.7      | 70.8      | 74.9      | 76.7      |\n| Dynamics products and cloud services revenue growth (y/y)          | 15% / 22% | 13% 20%   | 17% / 21%

In [58]:
print("information extraction test:")
example_prompt = "Return table under Investor Metrics in JSON format with year as the key and the column as subkeys."
qa_result = op.parse(example_local_file, example_prompt, mode="basic")
print(type(qa_result))
print(qa_result)

information extraction test:
<class 'list'>
['<table>\n<tbody>\n<tr><td>Investor Metrics                                                  </td><td>FY23 Q1  </td><td>FY23 Q2  </td><td>FY23 Q3  </td><td>FY23 Q4  </td><td>FY24 Q1  </td></tr>\n<tr><td>Office Commercial products and cloud services revenue growth (y/y)</td><td>7% / 13% </td><td>7% 14%   </td><td>13% / 17%</td><td>12% / 14%</td><td>15% / 14%</td></tr>\n<tr><td>Office Consumer products and cloud services revenue growth (y/y)  </td><td>7% 11%   </td><td>(2)% 3%  </td><td>1% 4%    </td><td>3% 6%    </td><td>3% 4%    </td></tr>\n<tr><td>Office 365 Commercial seat growth (y/y)                           </td><td>14%      </td><td>12%      </td><td>11%      </td><td>11%      </td><td>10%      </td></tr>\n<tr><td>Microsoft 365 Consumer subscribers (in millions)                  </td><td>65.1     </td><td>67.7     </td><td>70.8     </td><td>74.9     </td><td>76.7     </td></tr>\n<tr><td>Dynamics products and cloud services revenue gro

In [59]:
print("instruction extraction test:")
example_instruction = "Return the table under Investor Metrics in JSON format with year as the key and the column as subkeys."
instruction_result = op.instruct(example_local_file, example_instruction, mode="advanced")
print(type(instruction_result))
print(instruction_result)

instruction extraction test:
<class 'list'>
[[{'Office Commercial products and cloud services revenue growth (y/y)': '7% / 13%, 7% 14%, 13% / 17%, 12% / 14%, 15% / 14%', 'Office Consumer products and cloud services revenue growth (y/y)': '7% 11%, (2)% 3%, 1% 4%, 3% 6%, 3% 4%', 'Office 365 Commercial seat growth (y/y)': '14%, 12%, 11%, 11%, 10%', 'Microsoft 365 Consumer subscribers (in millions)': '65.1, 67.7, 70.8, 74.9, 76.7', 'Dynamics products and cloud services revenue growth (y/y)': '15% / 22%, 13% 20%, 17% / 21%, 19% / 21%, 22% / 21%', 'LinkedIn revenue growth (y/y)': '17% / 21%, 10% / 14%, 8% 11%, 6% 8%, 8%', 'Microsoft Cloud revenue increased': '24% to $33.7 billion', 'Office Commercial products and cloud services revenue increased': '15% driven by Office 365 Commercial growth of 17%', 'Office Consumer products and cloud services revenue increased': '5% and Microsoft 365 Consumer subscribers grew to 78.4 million', 'LinkedIn revenue increased': '9%', 'Dynamics products and cloud

### Example Call through Bash Script

In [60]:
%%bash
cd .. 

source .env
APIKEY=$CAMBIO_API_KEY
FILE=./examples/sample_data/test2.pdf

echo "file/document extraction test:"
bash extract_parse.sh $APIKEY extract $FILE basic

file/document extraction test:


[
  "# Productivity and Business Processes\n\n## Overview\n\n| Investor Metrics | FY23 Q1 | FY23 Q2 | FY23 Q3 | FY23 Q4 | FY24 Q1 |\n|:--- |:--- |:--- |:--- |:--- |:--- |\n| Office Commercial products and cloud services revenue growth (y/y) | 7% / 13% | 7% 14% | 13% / 17% | 12% / 14% | 15% / 14% |\n| Office Consumer products and cloud services revenue growth (y/y) | 7% 11% | (2)% 3% | 1% 4% | 3% 6% | 3% 4% |\n| Office 365 Commercial seat growth (y/y) | 14% | 12% | 11% | 11% | 10% |\n| Microsoft 365 Consumer subscribers (in millions) | 65.1 | 67.7 | 70.8 | 74.9 | 76.7 |\n| Dynamics products and cloud services revenue growth (y/y) | 15% / 22% | 13% 20% | 17% / 21% | 19% / 21% | 22% / 21% |\n| LinkedIn revenue growth (y/y) | 17% / 21% | 10% / 14% | 8% 11% | 6% 8% | 8% |\n\nGrowth rates include non-GAAP CC growth (GAAP %/CC%)\n\n## Press release\n\n## Business Highlights\n\nRevenue in Productivity and Business Processes was $17.0 billion and increased 7% (up 13% in constant currency), with

In [61]:
%%bash
cd .. 

source .env
APIKEY=$CAMBIO_API_KEY
FILE=./examples/sample_data/test2.pdf

echo "information extraction test:"
bash extract_parse.sh $APIKEY parse $FILE "Return table under Investor Metrics in JSON format with year as the key and the column as subkeys." basic

information extraction test:


[
  "<table>\n<tbody>\n<tr><td>Investor Metrics                                                  </td><td>FY23 Q1  </td><td>FY23 Q2  </td><td>FY23 Q3  </td><td>FY23 Q4  </td><td>FY24 Q1  </td></tr>\n<tr><td>Office Commercial products and cloud services revenue growth (y/y)</td><td>7% / 13% </td><td>7% 14%   </td><td>13% / 17%</td><td>12% / 14%</td><td>15% / 14%</td></tr>\n<tr><td>Office Consumer products and cloud services revenue growth (y/y)  </td><td>7% 11%   </td><td>(2)% 3%  </td><td>1% 4%    </td><td>3% 6%    </td><td>3% 4%    </td></tr>\n<tr><td>Office 365 Commercial seat growth (y/y)                           </td><td>14%      </td><td>12%      </td><td>11%      </td><td>11%      </td><td>10%      </td></tr>\n<tr><td>Microsoft 365 Consumer subscribers (in millions)                  </td><td>65.1     </td><td>67.7     </td><td>70.8     </td><td>74.9     </td><td>76.7     </td></tr>\n<tr><td>Dynamics products and cloud services revenue growth (y/y)         </td><td>15% / 22%</td>

In [62]:
%%bash
cd .. 

source .env
APIKEY=$CAMBIO_API_KEY
FILE=./examples/sample_data/test2.pdf

echo "instruction extraction test:"
bash extract_parse.sh $APIKEY instruct $FILE "Return table under Investor Metrics in JSON format with year as the key and the column as subkeys." advanced

instruction extraction test:


[
  [
    {
      "Office Commercial products and cloud services revenue growth (y/y)": "7% / 13%",
      "Office Consumer products and cloud services revenue growth (y/y)": "7% 11%",
      "Office 365 Commercial seat growth (y/y)": "14%",
      "Microsoft 365 Consumer subscribers (in millions)": "65.1",
      "Dynamics products and cloud services revenue growth (y/y)": "15% / 22%",
      "LinkedIn revenue growth (y/y)": "17% / 21%",
      "Business Highlights": "Revenue in Productivity and Business Processes was $17.0 billion and increased 7% (up 13% in constant currency), with the following business highlights:\nOffice Commercial products and cloud services revenue increased 7% (up 14% in constant currency) driven by Office 365 Commercial revenue growth of 11% (up 18% in constant currency)\nOffice Consumer products and cloud services revenue decreased 2% (up 3% in constant currency) and Microsoft 365 Consumer subscribers grew to 63.2 million\nLinkedIn revenue increased 10% (up 14% in

### Done