### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

### Inicialização

In [1]:
# Importa bibliotecas

import oci
import PIL
import base64
import matplotlib.pyplot as plt
import numpy as np
import json
import ocifs
import io
import uuid
import pandas as pd
import requests
import time
from collections import defaultdict

fs = ocifs.OCIFileSystem()

In [2]:
# Config autenticação

CONFIG_PROFILE = "DEFAULT"
config = oci.config.from_file('~/.oci/config', CONFIG_PROFILE)

# Compartimento
COMPARTMENT_ID = "" #AJUSTAR!

# Nome do bucket e infos do object storage
bucket_name = "" #AJUSTAR!
namespace = ""

object_storage_client = oci.object_storage.ObjectStorageClient(config)
namespace = object_storage_client.get_namespace().data

audio_list = object_storage_client.list_objects(namespace, bucket_name, prefix = "ad" ,fields="name")

### Transcrição

In [4]:
# Função que chama o OCI Speech

ai_speech_client = oci.ai_speech.AIServiceSpeechClient(config)

def speech_transcribe(audio):
    create_transcription_job_response = ai_speech_client.create_transcription_job(
    create_transcription_job_details=oci.ai_speech.models.CreateTranscriptionJobDetails(
        compartment_id=COMPARTMENT_ID,
        input_location=oci.ai_speech.models.ObjectListInlineInputLocation(
            location_type="OBJECT_LIST_INLINE_INPUT_LOCATION",
            object_locations=[
                oci.ai_speech.models.ObjectLocation(
                    namespace_name=namespace,
                    bucket_name=bucket_name,
                    object_names=[audio])]),
        output_location=oci.ai_speech.models.OutputLocation(
            namespace_name=namespace,
            bucket_name=bucket_name),
        additional_transcription_formats=["SRT"],
        display_name = audio,
        model_details=oci.ai_speech.models.TranscriptionModelDetails(
            domain="GENERIC",
            language_code="pt-BR",
            transcription_settings=oci.ai_speech.models.TranscriptionSettings())))

In [None]:
# Transcrição de todos os audios na lista
for audio in audio_list.data.objects:
    print(audio.name)
    speech_transcribe(audio.name)

In [None]:
list_transcription_jobs_response = ai_speech_client.list_transcription_jobs(
    compartment_id=COMPARTMENT_ID)

ct=0
for i in list_transcription_jobs_response.data.items:
    print("Job no. ",ct,", date= ", list_transcription_jobs_response.data.items[ct].time_accepted,
         ", Status = ", list_transcription_jobs_response.data.items[ct].lifecycle_state) 
    ct+=1

In [3]:
transcription_json_list = object_storage_client.list_objects(namespace, bucket_name, prefix = "job" ,fields="name")

In [None]:
transcription_json_list

for jsonn in transcription_json_list.data.objects:
    print(jsonn.name)

In [5]:
import json

df = pd.DataFrame(columns=['id','transcription','sentiment','reason','bad word'])

for jsonn in transcription_json_list.data.objects:
    try:
        get_object_response = object_storage_client.get_object(
        namespace_name=namespace,
        bucket_name=bucket_name,
        http_response_content_type = 'text/plain',
        object_name=jsonn.name)

        data = json.loads(get_object_response.data.content)
    
        df_new_row = pd.DataFrame({'id': jsonn.name,
                              'transcription': [data['transcriptions'][0]['transcription']]})
    
        df = pd.concat([df,df_new_row], ignore_index=True)
        
    except:
        pass

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

display(df)

### Análise de Sentimento

In [7]:
def sentGenAI(transcription):

    endpoint="https://inference.generativeai.sa-saopaulo-1.oci.oraclecloud.com"
    compartment_id=COMPARTMENT_ID
    generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(config=config, service_endpoint=endpoint, retry_strategy=oci.retry.NoneRetryStrategy(), timeout=(10,240))
    chat_detail = oci.generative_ai_inference.models.ChatDetails()

    prompt = f"""
    Você é responsável por avaliar o sentimento da transcrição que lhe será fornecida.
    Você deve classificar o sentimento entre POSITIVO, NEUTRO ou NEGATIVO e deve, com poucas palavras, explicar a classificação atribuída.
    Você deve identificar se a transcrição contem alguma palavra obscena ou grosseira e, em caso positivo deve retornar SIM ou em caso negativo retornar NÃO.

    Sua resposta deve estar no seguinte formato:

    SENTIMENTO: <Sentimento>
    JUSTIFICATIVA: <Justificativa>
    PALAVRÃO: <Palavrão>

    Não adicione nenhuma informação e não use informações que não estejam disponíveis na transcrição.

    {transcription}
    """

    chat_request = oci.generative_ai_inference.models.CohereChatRequest()
    chat_request.message = prompt
    chat_request.max_tokens = 1000
    chat_request.temperature = 0
    chat_request.frequency_penalty = 0
    chat_request.top_p = 0.75
    chat_request.top_k = 0


    chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id="")
    chat_detail.chat_request = chat_request
    chat_detail.compartment_id = compartment_id
    chat_response = generative_ai_inference_client.chat(chat_detail)
    return chat_response

In [8]:
def atualizar_dataframe_com_variaveis(chat_result, df, index):

    chat_history = chat_result.data.chat_response.chat_history
    
    for chat in chat_history:
        if chat.role == 'CHATBOT':
            mensagem = chat.message
            break
    else:
        return df

    sentimento = None
    justificativa = None
    palavrao = None

    for linha in mensagem.split('\n'):
        if linha.startswith("SENTIMENTO:"):
            sentimento = linha.split("SENTIMENTO:")[1].strip()
        elif linha.startswith("JUSTIFICATIVA:"):
            justificativa = linha.split("JUSTIFICATIVA:")[1].strip()
        elif linha.startswith("PALAVRÃO:"):
            palavrao = linha.split("PALAVRÃO:")[1].strip()
    
    df.at[index, 'sentiment'] = sentimento
    df.at[index, 'reason'] = justificativa
    df.at[index, 'bad word'] = palavrao

    return df

In [9]:
df2 = df

for ind in df2.index:
    transc = df2["transcription"][ind]
    sentiment = sentGenAI(transc)
    sentiment_post = atualizar_dataframe_com_variaveis(sentiment, df2, ind)

In [None]:
display(df2)