In [1]:
import pandas as pd
import requests
import json
import time

# === CONFIGURATION ===
CSV_PATH = "emails.csv"                 # Input CSV file containing emails
OUTPUT_PATH = "emails_with_verdict.csv" # Output CSV file with verdicts and justifications
LM_STUDIO_API_URL = "http://localhost:1234/v1/chat/completions"  # LM Studio endpoint
MODEL_NAME = "llama-2-7b-chat"          # Name of the Llama 2 model loaded in LM Studio

# === HELPER FUNCTION ===
def get_llama_analysis(email_text):
    """
    Send an email text to Llama 2 via LM Studio to classify phishing.
    
    Returns a tuple:
    - verdict: int (1 = phishing, 0 = not phishing)
    - justification: str (brief explanation of the verdict)
    """
    # Construct the prompt for the model
    prompt = f"""
    You are a cybersecurity expert specializing in phishing detection.
    Analyze the following email and respond in **JSON** format with two fields:
    - verdict: 1 for phishing, 0 for not phishing
    - justification: a short explanation (1–3 sentences)
    
    Email content:
    {email_text}
    """

    # Create the payload for LM Studio API
    payload = {
        "model": MODEL_NAME,
        "messages": [
            {"role": "system", "content": "You are a cybersecurity assistant that outputs clear JSON."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.0  # Makes the model deterministic (consistent output)
    }

    try:
        # Send POST request to LM Studio API
        response = requests.post(LM_STUDIO_API_URL, json=payload)
        response.raise_for_status()  # Raise exception for HTTP errors
        result = response.json()

        # Extract the model's message content
        content = result["choices"][0]["message"]["content"].strip()

        # Try to parse JSON response from the model
        try:
            data = json.loads(content)
            verdict = int(data.get("verdict", 0))  # Default to 0 if missing
            justification = data.get("justification", "").strip()
        except json.JSONDecodeError:
            # Fallback if the model does not return valid JSON
            verdict = 1 if "phish" in content.lower() else 0
            justification = content
        
        return verdict, justification

    except Exception as e:
        # Handle errors gracefully
        print(f"Error processing email: {e}")
        return None, "Error during processing"

# === MAIN SCRIPT ===
if __name__ == "__main__":
    # Load CSV file into pandas DataFrame
    df = pd.read_csv(CSV_PATH)
    
    # Check that the CSV contains the required 'text' column
    if 'text' not in df.columns:
        raise ValueError("CSV must contain a 'text' column.")

    verdicts = []        # List to store Llama verdicts
    justifications = []  # List to store Llama justifications

    # Loop over each email in the CSV
    for i, email in enumerate(df['text']):
        print(f"Processing email {i+1}/{len(df)}...")
        verdict, justification = get_llama_analysis(email)  # Call helper function
        verdicts.append(verdict)
        justifications.append(justification)
        time.sleep(0.5)  # Prevent overwhelming the API

    # Add new columns to the DataFrame
    df['LLM verdict'] = verdicts
    df['LLM justification'] = justifications

    # Save the results to a new CSV file
    df.to_csv(OUTPUT_PATH, index=False)

    print(f"✅ Classification complete! Results saved to {OUTPUT_PATH}")


Processing email 1/10...
Processing email 2/10...
Processing email 3/10...
Processing email 4/10...
Processing email 5/10...
Processing email 6/10...
Processing email 7/10...
Processing email 8/10...
Processing email 9/10...
Processing email 10/10...
✅ Classification complete! Results saved to emails_with_verdict.csv
