<a href="https://colab.research.google.com/github/NagarjunaD024/Datascience-LLMS/blob/main/src/Cost_optimizing_GPT/tunable_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import openai
import pandas as pd
import time

from google.colab import drive, userdata,  output
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
client = openai.OpenAI(api_key = userdata.get('OPENAI_API_KEY'))

In [None]:
def create_single_text_prompt(text, label):
    """ Create prompt for classifying one single text.

    Args:
        text: text to classify.
        label: correct class label (empty if unavailable).

    Returns:
        Prompt for text classification.
    """
    task = 'Is the sentiment positive or negative?'
    answer_format = 'Answer ("pos"/"neg")'
    return f'{text}\n{task}\n{answer_format}:{label}'

In [None]:
def create_prompt(text, samples):
    """ Generates prompt for sentiment classification.

    Args:
        text: classify this text.
        samples: integrate these samples into prompt.

    Returns:
        Input for LLM.
    """
    parts = []
    for _, row in samples.iterrows():
        sample_text = row['text']
        sample_label = row['sentiment']
        prompt = create_single_text_prompt(sample_text, sample_label)
        parts += [prompt]

    prompt = create_single_text_prompt(text, '')
    parts += [prompt]
    return '\n'.join(parts)


In [None]:
def call_llm(prompt, model, max_tokens, out_tokens):
    """ Query large language model and return answer.

    Args:
        prompt: input prompt for language model.
        model: name of OpenAI model to choose.
        max_tokens: maximal output length in tokens.
        out_tokens: prioritize these token IDs in output.

    Returns:
        Answer by language model and total number of tokens.
    """
    optional_parameters = {}
    if max_tokens:
        optional_parameters['max_tokens'] = max_tokens
    if out_tokens:
        logit_bias = {int(tid):100 for tid in out_tokens.split(',')}
        optional_parameters['logit_bias'] = logit_bias

    for nr_retries in range(1, 4):
        try:
            response = client.chat.completions.create(
                model=model,
                messages=[
                    {'role':'user', 'content':prompt}
                    ],
                **optional_parameters, temperature=0
                )

            answer = response.choices[0].message.content
            nr_tokens = response.usage.total_tokens
            return answer, nr_tokens

        except Exception as e:
            print(f'Exception: {e}')
            time.sleep(nr_retries * 2)

    raise Exception('Cannot query OpenAI model!')

In [None]:
file_path = '/content/drive/My Drive/Colab Notebooks/DataScience+GPT/Data/textanalysis/reviews.csv'

sample_path = '/content/drive/My Drive/Colab Notebooks/DataScience+GPT/Data/textanalysis/train_reviews.csv'

df = pd.read_csv(file_path)



model = 'gpt-3.5-turbo'
nr_samples = 1
out_tokens = '981,29875'
max_tokens = 1


samples = pd.DataFrame()
if nr_samples:
    samples = pd.read_csv(sample_path)
    samples = samples[:nr_samples]

nr_correct = 0
nr_tokens = 0

for _, row in df.iterrows():

    text = row['text']
    prompt = create_prompt(text, samples)
    label, current_tokens = call_llm(
        prompt, model,
        max_tokens,
        out_tokens)

    ground_truth = row['sentiment']
    if label == ground_truth:
        nr_correct += 1
    nr_tokens += current_tokens

    print(f'Label: {label}; Ground truth: {ground_truth}')

print(f'Number of correct labels:\t{nr_correct}')
print(f'Number of tokens used   :\t{nr_tokens}')

Label: neg; Ground truth: neg
Label: neg; Ground truth: neg
Label: neg; Ground truth: neg
Label: neg; Ground truth: neg
Label: pos; Ground truth: pos
Label: pos; Ground truth: neg
Label: pos; Ground truth: neg
Label: neg; Ground truth: neg
Label: pos; Ground truth: pos
Label: neg; Ground truth: neg
Number of correct labels:	8
Number of tokens used   :	3790
