### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [5]:
import oci 
import os
import base64
import ocifs
import PIL.Image as Image
import io
import re
import numpy as np 
import pandas as pd

config = oci.config.from_file("config")

endpoint = "https://language.aiservice.us-ashburn-1.oci.oraclecloud.com"

ai_client = oci.ai_language.AIServiceLanguageClient(config, service_endpoint=endpoint)
ai_vision_client = oci.ai_vision.AIServiceVisionClient(config=config)

def valida_documento(ner_inference_result):
    lista_check_1 = ['Pagamento', 'Valor', 'Data']
    lista_check_2 = ['Pix', 'Valor', 'Data']
    lista_individual = []
    
    for j in range (0, len(ner_inference_result.data.documents[0].entities)):
        lista_individual.append(ner_inference_result.data.documents[0].entities[j].type)
                    
    validade_pagamento =  all(x in lista_individual for x in lista_check_1)
    validade_pix =  all(x in lista_individual for x in lista_check_2)
    
    diff = np.setdiff1d(lista_check_1, lista_individual)
    
    if validade_pagamento:
        validade = True
        diff = np.setdiff1d(lista_check_1, lista_individual)  
    
    elif validade_pix: 
        validade = True
        diff = np.setdiff1d(lista_check_2, lista_individual)
    
    else: 
        validade = False             
    
    return validade, diff

#input_path = "pix_24.jpg" 
#input_path = "agendamento_7.jpg" 
input_path = "pagamento_9.jpg" 

with open(input_path, "rb") as image_file:
   
    encoded_string = base64.b64encode(image_file.read(),altchars=None)
    decoded_string = encoded_string.decode("utf-8", "ignore")

    analyze_document_response = ai_vision_client.analyze_document(
        analyze_document_details=oci.ai_vision.models.AnalyzeDocumentDetails(
            compartment_id = "ocid1.compartment.oc1..aaaaaaaal63rmctoojg7q2pvdpeuqknebyaqg3h7gcci6whf74ht7tfapl4q",
            features=[
                oci.ai_vision.models.DocumentTextDetectionFeature(
                    feature_type="TEXT_DETECTION")],
                language='ENG',
                document=oci.ai_vision.models.InlineDocumentDetails(
                source="INLINE",
                data=decoded_string)))

    words = str([word.text for page in analyze_document_response.data.pages for word in page.words])
    
    words_str = words

    words_str_1 = words_str.replace("',",'')
    words_str_2 = words_str_1.replace("'",'')
    words_str_3 = words_str_2.replace("[",'')
    words_final = words_str_3.replace("]",'')
            
ner_text = oci.ai_language.models.BatchDetectLanguageEntitiesDetails(
endpoint_id = "ocid1.ailanguageendpoint.oc1.iad.amaaaaaatsbrckqamwnoz5zpy3h2feyt4rjtwm5btthfyqx2e6k4uone3zqq",
documents = [oci.ai_language.models.TextDocument(
    key = "1",
    text = words_final)])
ner_inference_result = ai_client.batch_detect_language_entities(ner_text)

for i in range (0, len(ner_inference_result.data.documents[0].entities)):
    print(f"Entidade {i+1}:", ner_inference_result.data.documents[0].entities[i].type)
    print(f"Texto {i+1}:", ner_inference_result.data.documents[0].entities[i].text)
    score = round(ner_inference_result.data.documents[0].entities[i].score, 3)
    print(f"Score {i+1}:", score*100,"%")
    print("")
    
validade, diff = valida_documento(ner_inference_result)

print(validade, diff)

Entidade 1: Pagamento
Texto 1: Pagamento realizado
Score 1: 100.0 %

Entidade 2: Pagamento
Texto 2: COMPROVANTE DE PAGAMENTO
Score 2: 99.9 %

Entidade 3: Data
Texto 3: Data de Pagamento: 06/04/2022
Score 3: 100.0 %

Entidade 4: Valor
Texto 4: Valor: R$ 120,60
Score 4: 100.0 %

Entidade 5: Data
Texto 5: Data da Transacao: 06/04/2022
Score 5: 99.8 %

True []


In [4]:
print(oci.__version__)

2.88.2+preview.1.5970
