# 1. Importing and Preprocessing the dataset

In [29]:
import pandas as pd
from openai import OpenAI
from tqdm import tqdm
from sklearn.metrics import accuracy_score, classification_report,f1_score
import os

##### Download Dataset from Kaggle SMS Spam Collection Dataset
##### https://www.kaggle.com/datasets/uciml/sms-spam-collection-dataset?select=spam.csv

In [4]:
#Place dataset in the data folder
dataset = pd.read_csv("./data/spam.csv", encoding = "latin")
print(dataset.shape)
dataset.head()

(5572, 5)


Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


##### We’re only going to select the v1 and v2 columns to simplify the dataset. We’ll also limit the number of rows to 100, so the processing time is faster. Also, remember it costs to make an API call to the OpenAI API. Classifying a large number of records can be expensive so it’s in our best interest to limit the number of calls we make.

In [6]:
dataset = dataset[["v1","v2"]].head(100)
print(dataset.shape)
dataset.head()

(100, 2)


Unnamed: 0,v1,v2
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


# Text Classification Using ChatGPT

##### Setup API Key

In [8]:
#Set system OPENAI_API_KEY in environment variables
openai_api_key = os.environ.get("OPENAI_API_KEY") 
client = OpenAI(api_key=openai_api_key)

##### Ham and Spam Classification for a Complete Dataset

In [16]:
def classify_text(dataset):
    i = 0
    predictions = []
    while i < dataset.shape[0]:
        try:
            message = dataset["v2"].iloc[i]
            prompt = f"""Classify the following sms message as ham or spam. Also return the probability of it being spam.
Message: '{message}'.
The output should only contain two words: ham or spam, and the probability with two decimal points.
"""        
            response = client.chat.completions.create(
              model="gpt-4o",
              temperature = 0.0,
               messages=[
                    {"role": "system", "content": "You are a text classification expert."},
                    {
                        "role": "user",
                        "content": prompt
                    }
        ]
            )
            response =  response.choices[0].message.content.split(",")
            predictions.append(response)
            i = i + 1
            print(f"Total messages processed: {i}")
        except Exception as e:
            print("Erro occure：", str(e))
    return predictions

In [18]:
predictions = classify_text(dataset)
predictions

Total messages processed: 1
Total messages processed: 2
Total messages processed: 3
Total messages processed: 4
Total messages processed: 5
Total messages processed: 6
Total messages processed: 7
Total messages processed: 8
Total messages processed: 9
Total messages processed: 10
Total messages processed: 11
Total messages processed: 12
Total messages processed: 13
Total messages processed: 14
Total messages processed: 15
Total messages processed: 16
Total messages processed: 17
Total messages processed: 18
Total messages processed: 19
Total messages processed: 20
Total messages processed: 21
Total messages processed: 22
Total messages processed: 23
Total messages processed: 24
Total messages processed: 25
Total messages processed: 26
Total messages processed: 27
Total messages processed: 28
Total messages processed: 29
Total messages processed: 30
Total messages processed: 31
Total messages processed: 32
Total messages processed: 33
Total messages processed: 34
Total messages processe

[['ham', ' 0.01'],
 ['ham', ' 0.05'],
 ['spam', ' 0.99'],
 ['ham', ' 0.10'],
 ['ham', ' 0.05'],
 ['spam', ' 0.99'],
 ['ham', ' 0.02'],
 ['ham', ' 0.05'],
 ['spam', ' 0.98'],
 ['spam', ' 0.99'],
 ['ham', ' 0.05'],
 ['spam', ' 0.99'],
 ['spam', ' 0.99'],
 ['ham', ' 0.01'],
 ['ham', ' 0.05'],
 ['spam', ' 0.98'],
 ['ham', ' 0.05'],
 ['ham', ' 0.02'],
 ['ham', ' 0.05'],
 ['spam', ' 0.99'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.10'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.02'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.02'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['spam', ' 0.95'],
 ['ham', ' 0.02'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.02'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['spam', ' 0.99'],
 ['ham', ' 0.05'],
 ['ham', ' 0.02'],
 ['ham', ' 0.10'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.05'],
 ['ham', ' 0.01'],
 [

In [20]:
pred_df = pd.DataFrame(predictions, columns=['Label', 'probability'])
pred_df.head(10)

Unnamed: 0,Label,probability
0,ham,0.01
1,ham,0.05
2,spam,0.99
3,ham,0.1
4,ham,0.05
5,spam,0.99
6,ham,0.02
7,ham,0.05
8,spam,0.98
9,spam,0.99


# Measuring ChatGPT Classification Performance

##### We will use accuracy_score and classification_report metrics from the sklearn.metrics module. These metrics accept the predictions and target labels in integer format. The following script converts predictions and target labels for our dataset to a binary format with 0s and 1s.

In [24]:
pred_df['Label'] = pred_df['Label'].map({'ham': 0, 'spam': 1})
dataset['v1'] = dataset['v1'].map({'ham': 0, 'spam': 1})

##### The following script measures the accuracy and prints the classification report for ChatGPT predictions.

In [33]:
print(classification_report(pred_df['Label'].values, dataset['v1'].values))
print("Accuracy: ",accuracy_score(pred_df['Label'].values, dataset['v1'].values))
print("F1: ",f1_score(pred_df['Label'].values, dataset['v1'].values))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99        82
           1       1.00      0.94      0.97        18

    accuracy                           0.99       100
   macro avg       0.99      0.97      0.98       100
weighted avg       0.99      0.99      0.99       100

Accuracy:  0.99
F1:  0.9714285714285714
