### Detect Spam Using gpt-35-turbo-16k
Make sure you make a '.env' file with base link and api key.

Spam \<shift> <enter/return> until you run into problems. Debug with claude/chat jippity.

In [None]:
import os
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv(override=True)  # take environment variables from .env.

client = OpenAI(
    base_url=os.getenv("OPENAI_API_BASE"),
    api_key=os.getenv("OPENAI_API_KEY")
)

from datasets import load_dataset

ds = load_dataset("TrainingDataPro/email-spam-classification")


In [None]:
email = 0
print(ds.keys())  # Shows available splits
print(ds['train']) 
print("____________________")
print(f"Entire Email:\n {ds['train'][email]}")  # Prints the first data point
print('____________________')
print(f"Subject:\n {ds['train'][email]['title']}\n") 
print('____________________')
print(f"Body:\n {ds['train'][email]['text']}\n")
print('____________________')
print(f"Label:\n {ds['train'][email]['type']}\n")


##### Test the API and ensure you get a response

In [None]:
response = client.chat.completions.create(
    model="gpt-35-turbo-16k",
    messages=[
        {"role": "system", "content": "You are an excellent spam detector. Given an email subject and body you return a response saying whether the message is 'spam' or 'not spam'."},
        {"role": "user", "content": "Hello! what is your name?"}
        #{"role": "user", "content": "Hello! Can you briefly explain what OpenAI's GPT models are?"}
    ],
    temperature=0.7
)

print(response.choices[0].message.content.strip())

##### Test your prompting skills by editing the sys_context variable below. Best I've been able to get is 71.43% accuracy.

In [None]:
def detect(subject, body):
    sys_context = "You are an excellent spam detector. Given an email subject and body, return either 'spam' or 'not spam'."
    try:
        response = client.chat.completions.create(
            model="gpt-35-turbo-16k",
            messages=[
                {"role": "system", "content": f"{sys_context}"},
                {"role": "user", "content": f"Subject: {subject}\nBody: {body}\nIs this spam or not spam?"}
            ],
            temperature=0.0
        )
        
        if not response or not response.choices:
            print("Error: No valid response from OpenAI API")
            return "error"

        result = response.choices[0].message.content.strip().lower()
        return result

    except Exception as e:
        print(f"API call failed: {e}")
        return "error"

# Evaluate performance
total = len(ds['train'])
correct = 0

max_label_length = 8 # helps align output for readabilty

for index, email in enumerate(ds['train'], start=1):
    subject = email['title']
    body = email['text']
    true_label = email['type'].lower()
    predicted_label = detect(subject, body)

    print(f"e-mail {index:<3} Predicted: {predicted_label.ljust(max_label_length)} | Actual: {true_label.ljust(max_label_length)}")

    if predicted_label == true_label:
        correct += 1


# Print accuracy
accuracy = correct / total * 100
print(f"Accuracy: {accuracy:.2f}%")


##### Test the model with custom email subject and body input.

In [None]:
import gradio as gr


with gr.Blocks() as demo:
    gr.Markdown("## Spam Detector")
    subject_input = gr.Textbox(label="e-mail subject")    # Text input component
    body_input = gr.Textbox(label="e-mail body")    # Text input component
    text_output = gr.Textbox(label="Result") # Text output component
    echo_btn = gr.Button("Detect")                 # Button to trigger the echo
    
    # Connect the button click event to the echo function
    echo_btn.click(fn=detect, inputs=[subject_input, body_input], outputs=text_output)

demo.launch()