In [125]:
CONTRAX_URL = 'https://dev.contraxsuite.com'
USERNAME = 'Administrator'
PASSWORD = 'Administrator'
PROJECT_ID = 30
DOCUMENT_FN = 'test_doc.pdf'

In [126]:
import requests

In [127]:
# Login
# Further authorization goes via 'auth_token' cookie.

r = requests.post('{contrax_url}/rest-auth/login/'.format(contrax_url=CONTRAX_URL),
                  data = {'username':USERNAME, 'password': PASSWORD})
cookies = {'auth_token': r.cookies['auth_token']}
r

<Response [200]>

In [128]:
# Get project info (mostly to ensure the project exists)

r = requests.get('{contrax_url}/api/v1/project/projects/{project_id}'
                 .format(contrax_url=CONTRAX_URL, project_id=PROJECT_ID), cookies=cookies)
document_type_code = r.json()['type_data']['code']
document_type_uid = r.json()['type_data']['uid']
document_type_code

'lease.LeaseDocument'

In [129]:
# Start an upload session
# Multiple file uploads can be started during one upload session and the overall progress can be tracked.

r = requests.post('{contrax_url}/api/v1/project/upload-session/'.format(contrax_url=CONTRAX_URL), 
                  cookies=cookies, data = {'project': PROJECT_ID})

upload_session_id = r.json()['uid']

r.json()

{'created_by': 1, 'project': 30, 'uid': 'b195826e-16e1-4108-812e-95da9ac942f6'}

In [130]:
# Upload a file.
# For multiple files - repeat this request for each file.

with open(DOCUMENT_FN, 'rb') as f:
    r = requests.post('{contrax_url}/api/v1/project/upload-session/{upload_session_id}/upload/'
                      .format(contrax_url=CONTRAX_URL, upload_session_id=upload_session_id),
                      cookies=cookies, 
                      files={'file': f},
                      data={'send_email_notifications': 'false'})
r.json()

'Loaded'

In [131]:
# Track upload progress
# Repeat checking progress until the document is parsed
import time

while True:
    r = requests.get('{contrax_url}/api/v1/project/upload-session/{upload_session_id}/progress/'
                 .format(contrax_url=CONTRAX_URL, upload_session_id=upload_session_id), cookies=cookies)
    data = r.json()
    # Exit if bad status code or there are no upload/parsing tasks started at all or if the total progress >= 100%
    if r.status_code != 200 or not data['document_tasks_progress'] or data['document_tasks_progress_total'] >= 100:
        break
    else:
        time.sleep(2)

r.json()

{'document_tasks_progress': {'test_doc.pdf': {'document_id': 80,
   'document_progress': 100.0,
   'file_name': 'test_doc.pdf',
   'file_size': 24062,
   'task_progress_data': [{'file_name': 'test_doc.pdf',
     'task_id': '40b6da6c-56d8-4366-8391-229de9aec95e',
     'task_name': 'Load Documents',
     'task_progress': 100,
     'task_status': 'SUCCESS'}],
   'tasks_overall_status': 'SUCCESS'}},
 'document_tasks_progress_total': 100.0,
 'documents_total_size': 24062,
 'project_id': 30,
 'session_status': 'Parsed'}

In [132]:
# Get project statistics
r = requests.get('{contrax_url}/api/v1/rawdb/project_stats/{project_id}/'
                 .format(contrax_url=CONTRAX_URL, project_id=PROJECT_ID), cookies=cookies)
r.json()

{'items': [],
 'reviewed_documents': 0,
 'time': 0.035933494567871094,
 'total_documents': 1}

In [133]:
# Get document id, name and term for each document in the project sorting them by term (desc)
# To see all possible columns - remove "columns" param from the query.
# To add more filters use params similar to "where_calculated_term=>60", "where_calculated_term=<=60", "where_calculated_term=[50;60]",
r = requests.get('{contrax_url}/api/v1/rawdb/documents/{document_type_code}/?project_ids={project_id}&columns=document_id,document_name,calculated_term&order_by=calculated_term:desc'
                 .format(contrax_url=CONTRAX_URL, document_type_code=document_type_code, project_id=PROJECT_ID), 
                 cookies=cookies)
first_doc_id = r.json()['items'][0]['document_id']
r.json()

{'items': [{'calculated_term': None,
   'document_id': 80,
   'document_name': 'test_doc.pdf'}],
 'limit': 200,
 'time': 0.03664398193359375,
 'total_documents': 1}

In [None]:
# Get fields of the concrete document from the project (document grid API)
# document_full_text is shortened here - see below for getting full document text
r = requests.get('{contrax_url}/api/v1/rawdb/documents/{document_type_code}/?where_document_name={doc_name}&columns=document_name,document_id'
                 .format(contrax_url=CONTRAX_URL, 
                         document_type_code=document_type_code, 
                         doc_name=DOCUMENT_FN), 
                 cookies=cookies)
first_doc_id=r.json()['items'][0]['document_id']
r.json()['items'][0]

In [134]:
# Get fields of the concrete document from the project (document grid API)
# document_full_text is shortened here - see below for getting full document text
r = requests.get('{contrax_url}/api/v1/rawdb/documents/{document_type_code}/?where_document_id={doc_id}'
                 .format(contrax_url=CONTRAX_URL, 
                         document_type_code=document_type_code, 
                         doc_id=first_doc_id), 
                 cookies=cookies)
r.json()['items'][0]

{'alterations_allowed': False,
 'alterations_allowed_suggested': False,
 'area_acres': None,
 'area_acres_suggested': None,
 'area_size_sq_ft': None,
 'area_size_sq_ft_suggested': None,
 'assignee_name': None,
 'auto_renew': None,
 'auto_renew_suggested': None,
 'calculated_area_size_sq_ft': None,
 'calculated_area_size_sq_ft_suggested': None,
 'calculated_expiration_date': None,
 'calculated_expiration_date_suggested': None,
 'calculated_lease_type': 'gross',
 'calculated_lease_type_suggested': 'gross',
 'calculated_term': None,
 'calculated_term_suggested': None,
 'cluster_id': None,
 'commencement_date': None,
 'commencement_date_suggested': None,
 'document_full_text': 'This is a test document for OCR.\n\nPlain Text ~ Tab Width: 8 ~ Ln 3, Col 81 ~ INS',
 'document_full_text_length': 82,
 'document_id': 80,
 'document_is_reviewed': False,
 'document_name': 'test_doc.pdf',
 'document_title': None,
 'expiration_date': None,
 'expiration_date_suggested': None,
 'landlord': None,
 'land

In [135]:
# Get field schema - field uids, types, codes, names
# Will be required for understanding field UIDs in the document API request below
r = requests.get('{contrax_url}/api/v1/document/document-types/{doc_type_uid}/'
                 .format(contrax_url=CONTRAX_URL, doc_type_uid=document_type_uid), cookies=cookies)
r.json()

{'code': 'lease.LeaseDocument',
 'editor_type': None,
 'fields_data': [{'allow_values_not_specified_in_choices': False,
   'category': None,
   'choices': [],
   'code': 'alterations_allowed',
   'confidence': None,
   'default': False,
   'default_value': '',
   'depends_on_fields': [],
   'description': '',
   'display_yes_no': True,
   'formula': '',
   'hidden_always': False,
   'hide_until': '',
   'modified_date': '2019-02-27T12:09:08.500421Z',
   'order': 0,
   'read_only': False,
   'requires_text_annotations': False,
   'title': 'Alterations Allowed',
   'type': 'related_info',
   'uid': 'fc611670-153a-4958-be5b-3a5996263f83',
   'value_aware': False},
  {'allow_values_not_specified_in_choices': False,
   'category': None,
   'choices': [],
   'code': 'area_acres',
   'confidence': None,
   'default': True,
   'default_value': None,
   'depends_on_fields': [],
   'description': None,
   'display_yes_no': False,
   'formula': '',
   'hidden_always': False,
   'hide_until': None

In [136]:
# Get full document text and other info
# In this API fields are referenced by UIDs.
r = requests.get('{contrax_url}/api/v1/document/project/{project_id}/documents/{doc_id}/'
                 .format(contrax_url=CONTRAX_URL, 
                         document_type_code=document_type_code, 
                         project_id=PROJECT_ID,
                         doc_id=first_doc_id), 
                 cookies=cookies)
r.json()

{'assignee': None,
 'assignee_data': None,
 'available_assignees_data': [{'first_name': '',
   'full_name': 'Administrator',
   'last_name': '',
   'photo': None,
   'pk': 1,
   'role': 1,
   'username': 'Administrator'}],
 'available_statuses_data': [{'code': 'loaded',
   'group': 1,
   'group_data': {'code': 'starting',
    'is_active': True,
    'name': 'Starting',
    'order': 1,
    'pk': 1},
   'is_active': True,
   'name': 'Loaded',
   'order': 1,
   'pk': 1},
  {'code': 'not_started',
   'group': 1,
   'group_data': {'code': 'starting',
    'is_active': True,
    'name': 'Starting',
    'order': 1,
    'pk': 1},
   'is_active': True,
   'name': 'Not Started',
   'order': 2,
   'pk': 3},
  {'code': 'in_review',
   'group': 2,
   'group_data': {'code': 'in_progress',
    'is_active': True,
    'name': 'In Progress',
    'order': 2,
    'pk': 2},
   'is_active': True,
   'name': 'In Review',
   'order': 3,
   'pk': 2},
  {'code': 'completed',
   'group': 3,
   'group_data': {'code