<h1 style="text-align: center;">Survey answer classification with ChatGPT</h1>

<p style="text-align: center;">
    <img src="./data/ChatGPT_logo.png" alt="ChatGPT images" width="300">
</p>



### In this project I use the OpenAI API to request the help of ChatGPT to:

-   Classify survey responses to topics
-   Sentiment analysis
-   Summarization

In [None]:
import openai
import os

openai.api_key = 'your_api'

# get one here: https://platform.openai.com/


In [None]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

openai.api_key  = os.getenv('OPENAI_API_KEY')
os.getenv('OPENAI_API_KEY')

In [None]:
def get_completion(prompt, model="gpt-3.5-turbo"): #"gpt-3.5-turbo"
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]


In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('data/member_survey_comments.csv', encoding='ISO-8859-1')


In [None]:
df.head()

In [None]:
# cutting the dataset
#df = df.iloc[0:10,:]

In [None]:
import re

def clean_text(text):
    # Lowercase the text
    #text = text.lower()

    # Remove punctuation
    #text = text.translate(str.maketrans('', '', string.punctuation))
    
    # Remove leading and trailing spaces
    text = text.strip()

    # Remove extra spaces between words
    text = re.sub(' +', ' ', text)

    # Replace newline characters with spaces
    text = text.replace('\n', ' ')

    # Remove non-ASCII characters
    text = text.encode("ascii", "ignore").decode()

    return text

# Apply the function to each content in the 'answer' column
df['answer'] = df['answer'].apply(clean_text)

In [None]:
# Start with an empty list
concatenated_content = []

# Go through each item in the 'content' column
for number,item in enumerate(df['answer']):
    # Add the numbered item and the content to the list
    concatenated_content.append(str(number) + '. ' + str(item))
    # Increment the number for the next item

# Join the list into a single string with a newline character between each item
long_text = '\n'.join(concatenated_content)

# Now book_text is a single string that contains all the content
print(long_text)

In [None]:
# In this script, get_completion is a function that sends a prompt to the GPT-3.5-turbo model and receives a response.
# The text is split into chunks of step characters, and each chunk is processed separately to generate a response. 
# Each response is then added to the responses dictionary with the chunk number as the key.

In [None]:
question='Do you have any comments or reflections on the CompanyX’s proposal for cultural renewal?'

In [None]:
topics= ["Approval",
"CompanyX purpose",
"Concern over future",
"Concerns over effectiveness",
"Disappointment in previous processes",
"Leading by example",
"Member inclusion",
"More detail needed",
"Needs processes",
"New leadership",
"Other",
"Outside input",
"Speed",
"Support for staff",
"Transparency",
"Trust"]

# Defining the prompts

In [None]:
prompts={
    'summary': """
    <task> Your task is to provide a detailed summary from the members' answers to the survey <question>.<>    
    
    <question>: <{}><>
    
    <introduction>
    Our members will be at the heart of shaping the future direction of the CompanyX and our Purpose and Culture Survey is critical in gathering your views to inform our work on what a reimagined, more focused and accountable CompanyX will look like. 
    Your input to this process is crucial; we need your insight and expertise to drive forward the changes required.
    We will share a prospectus, updating you on this work before each member organisation will get to vote on these proposals at the EGM.
    <>
    
    Make sure you read the <introduction>, <question> before you attempt to do the <task>. 
    
    Summarize the <answers> given by member participants to the survey, delimited by triple backticks, in at most 100 words. 

    <answers>: ```{}```
    """,
    
    'sentiment' :"""
   
    <task> Your tasks are:
    1. Evaluate the sentiment of the member's answer to the survey <question>..
    2. Explain decision outcome of <task> 1. Stick to less than 25 words.
    Give your response 2 outputs in a json format. 
    <question>: <{}><>
    
    <introduction>
    Our members will be at the heart of shaping the future direction of the CompanyX and our Purpose and Culture Survey is critical in gathering your views to inform our work on what a reimagined, more focused and accountable CompanyX will look like. 
    Your input to this process is crucial; we need your insight and expertise to drive forward the changes required.
    We will share a prospectus, updating you on this work before each member organisation will get to vote on these proposals at the EGM.
    <>
    
    Make sure you read the <introduction>, <question> before you attempt to do the <task>. 
    
    Limit your respone to the <task> in one word 'positive','negative','neutral'.
    
    Member's <answer> to the survey is delimited by triple backticks.

    <answer>: ```{}```
    """,
    
 'classification' :"""
    
    <task> Your tasks are:
    1. Classify the <answer> given by the member participant as a response to survey <question>. 
    Read the set of <topics> listed here:{}. Choose one that is the most likely that the <answer> belongs to.
    2. Explain decision outcome of <task> 1. Stick to less than 25 words.
    Give your response in a json format with two keys 'topic' and 'explanation'. 
    Make sure your response spells the <topics> exactly the same.
    <>    
    
    <question>: <{}><>
    
    <introduction>
    Our members will be at the heart of shaping the future direction of the CompanyX and our Purpose and Culture Survey is critical in gathering your views to inform our work on what a reimagined, more focused and accountable CompanyX will look like. 
    Your input to this process is crucial; we need your insight and expertise to drive forward the changes required.
    We will share a prospectus, updating you on this work before each member organisation will get to vote on these proposals at the EGM.
    <>
    
    Make sure you read the <introduction>, <question> and the list of <topics> before you attempt to do the <task>. 
    
    Member's <answer> to the survey is delimited by triple backticks.

    <answer>: ```{}```
    
    """
}


***I tried to follow best practices and guidelines but it is by no means perfect. Feel free to play around with the prompts.***

### Test sentiment prompt

In [None]:
prompt = prompts['sentiment']
text   = df['answer'][0]

# Generate a response from the model
response = get_completion(prompt.format(question,text))
print(response)


### Test classification prompt

In [None]:
prompt   = prompts['classification']
response = get_completion(prompt.format(topics,question,text))
print(response)

### Test summary prompt

In [None]:
import textwrap
chunks   = textwrap.wrap(long_text, 200) # 500 characters
prompt   = prompts['summary']
response = get_completion(prompt.format(question,chunks[0]))
print(response)

# Trick: To fix and avoid limit rate request errors

In [None]:
import time
from tenacity import retry, wait_exponential

@retry(wait=wait_exponential(multiplier=1, min=2, max=30))
def delayed_completion(**kwargs):
    """Call the Completion API with retry mechanism."""

    # Call the Completion API and return the result
    return openai.ChatCompletion.create(**kwargs)

In [None]:
# cut the dataset
# df = df.loc[0:60,:]

# Performing sentiment and classification - one at time

In [None]:
# this must not exceed.
rate_limit_per_minute = 60
delay                 = 60.0 / rate_limit_per_minute

## choose which prompts to run.
prompt_list               = ['sentiment','classification']
responses_per_prompt_dict = {}

# request statistics
start_time            = time.time()
request_count         = 0
start_time_per_request = time.time()
time_taken_per_request = []

for prompt_name in prompt_list:
    responses_per_prompt  = []
    print(f'{prompt_name}')
    for text in df.answer:
        if prompt_name=='sentiment':
            prompt = prompts[prompt_name].format(question,text)
        if prompt_name=='classification':
            prompt = prompts[prompt_name].format(topics,text)#question,text)
            
        request_count +=1

        if request_count%60==0:
            end_time            = time.time()  # Store the current time
            time_taken          = round(end_time - start_time,0)  # Calculate the time taken
            requests_per_minute = round(60 * request_count / time_taken ,0) # Calculate the requests per minute

            
            print(f"Made {request_count} requests in {time_taken} seconds ({requests_per_minute} RPM)")

            start_time    = time.time()  # Reset the start time
            request_count = 0  # Reset the counter       

        # Sleep for the delay
        time.sleep(delay)

        # Generate a response from the model with retry mechanism
        response = delayed_completion(
                    model="gpt-3.5-turbo", 
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0)
        
        # store the time taken per request/response
        time_taken_per_request.append(round(time.time()-start_time_per_request,0))
        start_time_per_request = time.time()
        
        # append each response
        responses_per_prompt.append(response)

    #store all responses for each prompt
    responses_per_prompt_dict[prompt_name] = responses_per_prompt

In [None]:
# OpenAI returns the data in JSON
print(prompts.keys())

In [None]:
responses_per_prompt_dict['classification']

In [None]:
response.choices[0].message["content"]

In [None]:
import json
json.loads(response.choices[0].message["content"])

### Store the response data

In [None]:
import json
# Create a list to store the data
data = []
prompt_name ='classification'
# Iterate over the responses
for i,response in enumerate(responses_per_prompt_dict[str(prompt_name)]):
    temp = json.loads(response.choices[0].message["content"])
    # Extract the required information
    item = {
        "id": response.id,
        "model": response.model,
        "created": response.created,
        "finish_reason": response.choices[0].finish_reason,
        "topic": temp['topic'],
        "explanation": temp['explanation'],
        "completion_tokens": response.usage["completion_tokens"],
        "prompt_tokens": response.usage["prompt_tokens"],
        "request_tokens": response.usage["total_tokens"],
        "completion_cost": response.usage["completion_tokens"]*0.002/1000,
        "prompt_cost": response.usage["prompt_tokens"]*0.002/1000,
        "request_cost": response.usage["total_tokens"]*0.002/1000,
        "time_taken_per_request":time_taken_per_request[i]
    }
    
    # Add the item to the data list
    data.append(item)

# Convert the list to a DataFrame
df_responses = pd.DataFrame(data)

# Print the DataFrame

In [None]:
# store to csv
df_responses.to_csv(f'{prompt_name}.csv')

### Evaluate ChatGPT predictions

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Initialize the LabelEncoder
le = LabelEncoder()

# Fit and transform the labels
le.fit(topics)

# Use the fitted LabelEncoder to transform your features
true_encoded      = le.transform(df_responses['topic'])
predicted_encoded = le.transform(df['topic'])

print(accuracy_score(true_encoded,predicted_encoded))

# Compute the confusion matrix
cm = confusion_matrix(true_encoded, predicted_encoded)

# Use seaborn to plot the confusion matrix
plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

In [None]:
df_responses['classification'].isin(topics)

In [None]:
df_responses['classification'][df_responses['classification']=='Concern over trust']='Concerns over trust'

In [None]:
df_responses['classification'].value_counts()

"Concerns over trust" model hallucinates a category

In [None]:
topics

In [None]:
# Should be 0
sum(~df_responses['classification'].isin(topics))


# Perform summarization

In [None]:
import tiktoken

encoding = tiktoken.get_encoding("cl100k_base")
encoding = tiktoken.encoding_for_model('gpt-3.5-turbo')

#text        = response.choices[0].message["content"]
tokens      = encoding.encode(long_text)
token_count = len(tokens)

print(f"The text is {token_count} tokens long.")
print(f"The text is {len(encoding.decode(tokens))} words long.")

In [None]:
len(encoding.encode(prompts[prompt_name]))

In [None]:
prompts.keys()

# Trying different chunk sizes and temperature values for text summarization

In [None]:
# Assuming long_text is the text you want to summarize
import textwrap

# Long text
text = long_text

# Split the text into chunks of 

chunk_size    = [len(long_text)/2] # [4096, len(long_text)/2, 500]
temperatures  = [0.2] #[1, 0.5, 0.2]

for temp in temperatures:
    for size in chunk_size:
    
        chunks     = textwrap.wrap(long_text, size)

        summaries            = []
        responses_per_prompt = []
        prompt_name          = 'summary'

        print(f'Size of each chunk is {size} characters')
        for i,chunk in enumerate(chunks):
            # Create the prompt for each chunk
            prompt = prompts[prompt_name].format(question,chunk)
            print(f'Chunk {i} - Tokens sent: {len(encoding.encode(prompt))}')
            # Generate a summary from the model
            response = delayed_completion(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=temp
            )
            
            # Extract the content from the response
            summary = response['choices'][0]['message']['content']
            
            # Add the summary to the list
            summaries.append(summary)
            
            # append each response
            responses_per_prompt.append(response)

        # store all responses for each prompt
        #responses_per_prompt_dict[f'{prompt_name}_chunk_size_{size}_temperature_{temp}'] = responses_per_prompt
        responses_per_prompt_dict[f'{prompt_name}'] = responses_per_prompt
        # Combine the summaries
        final_summary = " ".join(summaries)



In [None]:
# Store to csv
df_responses.to_csv(f'{prompt_name}.csv')

In [None]:
responses_per_prompt_dict.keys()

# Functions to calculate the cost before sending the requests

In [None]:
# Total tokens calculator

def tokens_calculator(text, n_chunks, n_temperatures, prompt_size=100):
    # Encode the text
    tokens      = encoding.encode(text)
    token_count = len(tokens)
    
    # Calculate average tokens per call
    avg_tokens_per_call = (token_count + prompt_size) / n_chunks

    # Calculate total tokens
    total_tokens = avg_tokens_per_call * n_chunks * n_temperatures

    return total_tokens

chunk_size    = [4096, len(long_text)/2, 500]
temperatures  = [1, 0.5, 0.2]
tokens_calculator(text=text, n_chunks=len(chunk_size), n_temperatures=len(temperatures)) * 0.002/1000


In [None]:
# 
def cost_calculator(text, n_chunks, n_temperatures, prompt_size=100, price=0.002):
    total_tokens = tokens_calculator(text, n_chunks, n_temperatures, prompt_size)
    return f'{round(total_tokens * price/1000,3)}$'

cost_calculator(text=text, n_chunks=len(chunk_size), n_temperatures=len(temperatures))


## This project serves as an introduction to prompt engineering while using ChatGPT to perform machine learning tasks.