### Azure OpenAI Service Using Method

In [80]:
import logging
import time
import requests
from openai import OpenAIError

logging.basicConfig(level=logging.INFO)

def _call_azure_api(prompt: str, system_prompt: str, temperature: float, max_tokens: int) -> str:
    url = "https://bigdata-openai-gpt-2.openai.azure.com/openai/deployments/bigdata-gpt35-2/chat/completions?api-version=2023-05-15"
    headers = {"api-key": "3f5c8ab3de1545059600c578e3d96452"}
    
    json_body = {
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        "max_tokens": max_tokens,
        "temperature": temperature
    }
    
    retries = 3
    for retry in range(retries):
        try:
            response = requests.post(url, headers=headers, json=json_body)
            response = response.json()
            choices = response.get("choices", [])
            
            if choices:
                content = choices[0].get("message", {}).get("content")
                if content:
                    prompt_tokens = response["usage"]["prompt_tokens"]
                    completion_tokens = response["usage"]["completion_tokens"]
                    cost = _count_cost("gpt-3.5-turbo", prompt_tokens, completion_tokens)
                    logging.info("Cost: %f", cost)
                    return content
                else:
                    logging.error("No content found in response.")
            else:
                logging.error("No choices found in response.")
            
            if retry == retries - 1:
                logging.error("Failed to generate a response after %d attempts. Aborting.", retries)
                raise
            logging.warning("Retrying (%d/%d) after 10 seconds...", retry + 1, retries)
            time.sleep(10)
        except OpenAIError as error:
            logging.error("Error: %s", error)
            if retry == retries - 1:
                logging.error("Failed to generate a response after %d attempts. Aborting.", retries)
                raise
            logging.warning("Retrying (%d/%d) after 10 seconds...", retry + 1, retries)
            time.sleep(10)

def _count_cost(model: str, prompt_tokens: float, completion_tokens: float) -> float:
    if model == "gpt-3.5-turbo":
        return (prompt_tokens / 1000) * 0.0015 + (completion_tokens / 1000) * 0.002
    return (prompt_tokens / 1000) * 0.003 + (completion_tokens / 1000) * 0.004

res = _call_azure_api("妳好嗎", "妳要naughty的回答", 0.8, 200)
print(res)

ERROR:root:No content found in response.
INFO:root:Cost: 0.000215


我作為一個AI助手，並沒有情感，所以我無法說“好”或“不好”。不過，我一直保持運作良好，可以為您提供幫助。如果您需要任何幫助，請隨時告訴我。


### 改良GPT邏輯

#### 第一層 接收逐字稿 -> 計算最佳分配 -> 獲得第一次彙整的List => chunk_list

In [97]:
import tiktoken
from typing import List
import math
def _count_tokens(content: str, model: str = "gpt-3.5-turbo-0613") -> int:
    messages = [
        {
            "role": "user",
            "content": content,
        },
    ]
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        print("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    if model in {
        "gpt-3.5-turbo-0613",
        "gpt-3.5-turbo-16k-0613",
        "gpt-4-0314",
        "gpt-4-32k-0314",
        "gpt-4-0613",
        "gpt-4-32k-0613",
    }:
        tokens_per_message = 3
        tokens_per_name = 1
    elif model == "gpt-3.5-turbo-0301":
        tokens_per_message = 4
        tokens_per_name = -1
    else:
        raise NotImplementedError(
            f"""num_tokens_from_messages() is not implemented for model {model}."""
        )
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3
    return num_tokens
def read_txt_file(file_path):
    try:
        with open(file_path, 'r') as file:
            content = file.read()
        return content
    except IOError:
        print("Error: Unable to read the file.")
        return ""
    
def split_transcript(transcript: str, chunk_sizes: List[int], model: str = "gpt-3.5-turbo-0613") -> List[str]:
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        print("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    
    encoded_transcript = encoding.encode(transcript)
    chunks = []
    start = 0
    for size in chunk_sizes:
        end = start + size
        chunk_tokens = encoded_transcript[start:end]
        chunk_text = encoding.decode(chunk_tokens)
        chunks.append(chunk_text)
        start = end
    return chunks

def set_optimal_chunk_sizes(transcript_token, model_limit, chunk_min_length, max_token, prompt):
    total_tokens = transcript_token + max_token + prompt
    if total_tokens <= model_limit:
        return None
    
    available_chunk_space = model_limit - max_token - prompt
    fill_num = transcript_token // available_chunk_space
    last_chunk_space = transcript_token % available_chunk_space

    if last_chunk_space < chunk_min_length:
        space_for_remaining_chunks = transcript_token - chunk_min_length
        new_chunk_space = math.ceil(space_for_remaining_chunks / fill_num)
        chunk_sizes = [new_chunk_space] * fill_num
        chunk_sizes.append(chunk_min_length)
        return chunk_sizes
    else:
        chunk_sizes = [available_chunk_space] * fill_num
        chunk_sizes.append(last_chunk_space)
        return chunk_sizes


absolute_path = "/Users/lucienlin/aiProjects/lucien-ai-meeting/data/test.txt" 
transcript = read_txt_file(absolute_path)
transcript_token =_count_tokens(transcript)
model_limit = 4000
chunk_min_length = 800
max_token = 1200
prompt = 200

chunk_sizes = set_optimal_chunk_sizes(transcript_token, model_limit, chunk_min_length, max_token, prompt)
print(chunk_sizes)
chunk_list = []
if not chunk_sizes:
    chunk_list.append(transcript)
else:
    chunk_list = split_transcript(transcript, chunk_sizes)

[2600, 2190]


#### 第二層 Map Reduce

In [98]:
# 計算第一層的每一個chunk當中的tokens
chunk_token_list = []
for chunk in chunk_list:
    chunk_token_list.append(_count_tokens(chunk))
chunk_token_list

[2606, 2190]

In [99]:
gpt_layer1_result = [1100,1121]
chunk_token_list = gpt_layer1_result

In [102]:
def find_chunks_within_limit(model_limit, max_token, prompt, chunk_token_list):
    chunk_token_list = [x + max_token + prompt for x in chunk_token_list]
    
    res = []
    temp = []
    
    for idx, _ in enumerate(chunk_token_list):
        temp.append(idx)
        
        if sum(chunk_token_list[temp[0]:idx+1]) > model_limit:
            temp.pop()
            res.append(temp)
            temp = [idx]
    
    if temp:
        res.append(temp)
    
    return res

def simulate_gpt_layer2(chunk_list):
    res = []
    for chunk in chunk_list:
        chunk = 800
        res.append(chunk)
    return res

def map_reduce(model_limit, max_token, prompt, chunk_token_list, chunk_list):
    while True:
        apple = find_chunks_within_limit(model_limit, max_token, prompt, chunk_token_list)
        new_chunk_list = []
        for indices in apple:
            temp = []
            for index in indices:
                temp.append(chunk_list[index])
            new_chunk_list.append(temp)
        tres = []
        for chunk in new_chunk_list:
            ... #gpt
            tres.append(...)
        if tres == 1:
            break
    res = tres
    return res # 回傳結果串列

model_limit = 4000
max_token = 500
prompt = 200

map_reduce(model_limit, max_token, prompt, chunk_token_list, chunk_list)
# print(result)

啊啊gpt ['Tethers\n資料的收入跟呈現的問題\n這個是\nAlisa\n開的\nAlisa\n現在\nOfficially\n已經轉調了\n是嗎?\n不定時關心一下大家\nTethers\n資料呈現的問題\n好\n那就老樣子\n就是\nAlisa\n你看不同意就抗辯\n這題是還在進行中\n是不是?\n對\n這個是預計下禮拜才會發問題\n還沒有上\n結發怎麼做都已經確定了\n賣推波無法成功刪選\n這個是什麼狀況?\n這個也是正在處理\n正在處理\n18\n號\n那\nAlisa\n坐在這邊所以應該是\nOK\n啦\n客戶版不會出現微頻的現象\n這個我知道\n這個就是\n這個品牌就是在強制\n這個是業務部拿來看客戶狀態的\nGoogle\nStudio\n的那台貨\n那這個是\n那我覺得這個滿麻煩的\n這就再看看是什麼\n現在是誰?\n我\n好\n我們等產銷會釐清資源以後再看\n沒有請分位\n這題是等著上線吧?\n等著上線\n這應該是等著上線\n報告異常是?\n這也在等著上線\n所以是為什麼?\n為什麼報告異常?\n它有一個\n它是兩個\n非預期的\n對\n非預期的\n你知道怎麼修的嗎?\n它去判斷回應\n它去判斷後面的回應\n可是如果\n主題關鍵是關聯性分數\n我大概知道這個了\n反正他們為了要判斷那個\n有的時候\n像其實這次歐文哥送出來有一個GDP的報告\n主題是TEMP\n以前是沒有辦法去\n就黃牛嘛\n對\n你用TEMP\n以前是沒有辦法Handle這一塊\n那現在他們新增一些機制\n可以從關鍵字去判斷說\n你這份報告想要討論什麼\n那看起來是那個機制出了點問題\n所以導致查不出來報告\n反正只要問題修掉就好\n分數無法使用\n那一樣就請Alisa\n這題滿久的就請Alisa\n看看這個可不可以接受\n哇\n任何關鍵字還是有網址\n這個應該正好修吧\n這是要等上線的問題\n對\n這可以再等上線\nBD網頁建置需求\n這個我知道這一題\n那就是看\n沒關係\n這就先卡在這裡好了\n因為這就是一個\n用戶有需求需要評估人員\n那我們這邊已經評估完畢了\n那原則上這個就算是結束了\n那擺一下看看還有沒有什麼後續\n不然下禮拜就把它封掉了\n頻道增加需求\n那這個是\nSteven成龍一個禮拜前開的\n變成還在處理中\n這個就好\n反正就是加頻道\n那這個