# Prompt Engineering: A Structured Approach

## Overview
This Jupyter notebook provides a structured approach to prompt engineering using NLTK, focusing on generating clear, neutral, and inclusive content.

## Prerequisites

- Python 3.8 or higher
- Jupyter Notebook/Lab environment
- Basic understanding of Python and API interactions

In [None]:
# Check our python version in bash/terminal
# !python3 --version

## Python Environment Setup
Create a new virtual environment and install the required packages in bash:
```bash
python -m venv nltk-env
source nltk-env/bin/activate  # On Windows: nltk-env\Scripts\activate
pip install -r requirements.txt
```

In [None]:
!pip install requests

In [None]:
!pip install transformers

In [None]:
!pip install tqdm

# Code Implementation

In [None]:
"""
NLTK-based Prompt Engineering Module

This module provides utilities for analyzing text using NLTK and
implementing structured prompt engineering approaches with bias detection.
"""

import json
import logging
from typing import Dict, Any, List, Optional, Union
from datetime import datetime
import re
from pathlib import Path
import time
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.corpus import stopwords

# Download required NLTK resources
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('vader_lexicon')

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

class NLTKPromptEngineer:
    """A class for managing prompt engineering with NLTK analysis."""

    def __init__(self):
        """
        Initialize the NLTKPromptEngineer class.
        """
        self.history: List[Dict[str, Any]] = []

        # Initialize NLTK analyzers
        self.sia = SentimentIntensityAnalyzer()
        self.stop_words = set(stopwords.words('english'))

        logging.info("NLTK Prompt Engineer initialized successfully")

    def create_structured_prompt(
        self,
        task: str,
        context: str = "",
        constraints: List[str] = None,
        examples: List[Dict[str, str]] = None
    ) -> str:
        """
        Create a structured prompt following best practices.

        Args:
            task (str): Main task description
            context (str): Additional context for the task
            constraints (List[str]): List of constraints to apply
            examples (List[Dict[str, str]]): List of example input/output pairs

        Returns:
            str: Formatted text prompt
        """
        constraints = constraints or []
        examples = examples or []

        # Build prompt content
        prompt_text = "# Task\n" + task

        if context:
            prompt_text += "\n\n# Context\n" + context

        if constraints:
            prompt_text += "\n\n# Constraints:\n"
            prompt_text += "\n".join(f"- {c}" for c in constraints)

        # Add examples
        if examples:
            prompt_text += "\n\n# Examples:\n"
            for example in examples:
                if "input" in example:
                    prompt_text += f"\nInput: {example['input']}\n"
                if "output" in example:
                    prompt_text += f"Output: {example['output']}\n"

        return prompt_text

    def analyze_text(
        self,
        text: str,
        analyze_sentiment: bool = True,
        analyze_bias: bool = True,
        analyze_complexity: bool = True
    ) -> Dict[str, Any]:
        """
        Analyze text using NLTK for various metrics.

        Args:
            text (str): Text to analyze
            analyze_sentiment (bool): Whether to analyze sentiment
            analyze_bias (bool): Whether to analyze bias
            analyze_complexity (bool): Whether to analyze complexity

        Returns:
            Dict[str, Any]: Analysis metrics
        """
        results = {}

        # Basic text stats
        words = word_tokenize(text)
        sentences = sent_tokenize(text)

        results["word_count"] = len(words)
        results["sentence_count"] = len(sentences)
        results["avg_words_per_sentence"] = len(words) / len(sentences) if sentences else 0

        # Sentiment analysis
        if analyze_sentiment:
            sentiment = self.sia.polarity_scores(text)
            results["sentiment"] = sentiment

        # Bias analysis
        if analyze_bias:
            bias_scores = self._evaluate_bias(text)
            results["bias"] = bias_scores

        # Text complexity
        if analyze_complexity:
            # Simple readability metrics
            long_words = [w for w in words if len(w) > 6]
            results["complexity"] = {
                "long_word_ratio": len(long_words) / len(words) if words else 0,
                "avg_word_length": sum(len(w) for w in words) / len(words) if words else 0,
            }

        return results

    # Pre-compiled regex patterns for bias evaluation
    _bias_indicators = {
        'gender_bias': {
            'patterns': [
                re.compile(r'\b(he|his|him|gentleman|man|men)\b(?!.*\b(she|her|hers|lady|woman|women)\b)', re.IGNORECASE),
                re.compile(r'\b(she|her|hers|lady|woman|women)\b(?!.*\b(he|his|him|gentleman|man|men)\b)', re.IGNORECASE),
                re.compile(r'\b(businessman|businesswoman|chairman|chairwoman|spokesman|spokeswoman)\b', re.IGNORECASE)
            ],
            'weight': 0.3
        },
        'racial_bias': {
            'patterns': [
                re.compile(r'\b(normal|standard|regular|typical|default)(?=\s+(person|people|individual|community))\b', re.IGNORECASE),
                re.compile(r'\b(ethnic|minority|diverse)(?=\s+only\b)', re.IGNORECASE),
            ],
            'weight': 0.3
        },
        'age_bias': {
            'patterns': [
                re.compile(r'\b(young|old|elderly|senior)(?=\s+people\b)', re.IGNORECASE),
                re.compile(r'\b(millennials|boomers|gen\s+[xyz])\b\s+(?=\b(are|always|never|typically)\b)', re.IGNORECASE),
            ],
            'weight': 0.2
        },
        'socioeconomic_bias': {
            'patterns': [
                re.compile(r'\b(poor|rich|wealthy|low-income|high-income)(?=\s+people\b)', re.IGNORECASE),
                re.compile(r'\b(educated|uneducated|privileged|underprivileged)\b', re.IGNORECASE),
            ],
            'weight': 0.2
        }
    }

    def _evaluate_bias(self, text: str) -> Dict[str, float]:
        """
        Evaluate text for various types of bias using NLTK and regex.

        Args:
            text (str): Text to evaluate

        Returns:
            Dict[str, float]: Bias scores for different bias types
        """
        # Ensure text is a string
        if not isinstance(text, str):
            text = str(text)

        bias_scores = {}
        overall_bias = 0.0

        for bias_type, config in self._bias_indicators.items():
            type_score = 0
            matches = []

            for pattern in config['patterns']:
                found_matches = pattern.findall(text)
                matches.extend(found_matches)
                if found_matches:
                    type_score += len(found_matches) * 0.1

            bias_scores[bias_type] = min(1.0, type_score)
            overall_bias += bias_scores[bias_type] * config['weight']

            # Store matched phrases for explanation
            bias_scores[f"{bias_type}_matches"] = matches

        bias_scores["overall"] = min(1.0, overall_bias)
        return bias_scores

    def evaluate_text(
        self,
        text: str,
        criteria: List[str]
    ) -> Dict[str, float]:
        """
        Evaluate the quality of text based on given criteria.

        Args:
            text (str): Text to evaluate
            criteria (List[str]): List of evaluation criteria

        Returns:
            Dict[str, float]: Evaluation scores
        """
        # Ensure text is a string
        if not isinstance(text, str):
            text = str(text)

        scores = {}

        print(f"Evaluating text on {len(criteria)} criteria...")

        for i, criterion in enumerate(criteria):
            if criterion == "bias":
                bias_results = self._evaluate_bias(text)
                scores[criterion] = bias_results["overall"]
                # Add specific bias types
                for bias_type in self._bias_indicators.keys():
                    scores[f"bias_{bias_type}"] = bias_results[bias_type]
            elif criterion == "sentiment":
                sentiment = self.sia.polarity_scores(text)
                scores["sentiment_positive"] = sentiment["pos"]
                scores["sentiment_negative"] = sentiment["neg"]
                scores["sentiment_neutral"] = sentiment["neu"]
                scores["sentiment_compound"] = sentiment["compound"]
            elif criterion == "clarity":
                # Measure clarity based on sentence length, word complexity
                words = word_tokenize(text)
                sentences = sent_tokenize(text)
                avg_sentence_length = len(words) / len(sentences) if sentences else 0
                complex_words = [w for w in words if len(w) > 6 and w.lower() not in self.stop_words]
                scores["clarity"] = 1.0 - min(1.0, (len(complex_words) / len(words) * 1.5 +
                                           (avg_sentence_length / 25.0)))
            elif criterion == "engagement":
                # Measure engagement based on question marks, imperative verbs, etc.
                question_count = text.count("?")
                exclamation_count = text.count("!")
                second_person_count = len(re.findall(r'\byou\b|\byour\b', text, re.IGNORECASE))
                engagement_score = min(1.0, (question_count * 0.2 + exclamation_count * 0.1 +
                                          second_person_count * 0.05))
                scores["engagement"] = engagement_score
            else:
                # Default to a neutral score for unknown criteria
                scores[criterion] = 0.5

            print(f"Evaluated {i+1}/{len(criteria)}: {criterion}")

        print("Evaluation complete!")
        return scores

    def save_history(self, filepath: Union[str, Path]) -> None:
        """
        Save interaction history to a JSON file.

        Args:
            filepath (Union[str, Path]): Path to save the history file
        """
        print(f"Saving history to {filepath}...")
        filepath = Path(filepath)
        with filepath.open('w') as f:
            json.dump(self.history, f, indent=2)
        print("History saved successfully!")

In [None]:
def test_nltk_bias_detection():
    # Initialize the NLTK prompt engineer
    engineer = NLTKPromptEngineer()

    # Test various texts for bias
    print("\nTesting NLTK bias detection...")

    texts = [
        # Gender bias
        "He is likely to excel in engineering roles due to his analytical thinking.",
        "The ideal candidate for this nursing position should be nurturing and gentle like most women.",
        "Men naturally perform better in mathematical and technical fields due to their logical minds.",
        "Women are more suitable for administrative roles because they are detail-oriented and better at multitasking.",

        # Racial bias
        "Asian applicants are perfect for quantitative roles because they're naturally good at math.",
        "Latino workers are ideal for physical labor positions due to their strong work ethic.",
        "White executives tend to make better strategic decisions for American companies.",
        "Black employees often struggle with professional communication in corporate environments.",

        # Age bias
        "Younger employees bring energy and tech-savviness that older workers simply cannot match.",
        "Workers over 50 can't adapt to new technologies as quickly as millennials can.",
        "We need young, fresh talent instead of outdated perspectives from baby boomers.",
        "Recent graduates are more innovative than employees with decades of experience.",

        # Socio-economic bias
        "Candidates from prestigious universities are naturally more qualified for leadership roles.",
        "Employees who speak with regional accents should be placed in back-office positions rather than client-facing roles.",
        "People from affluent backgrounds have better soft skills required for management positions.",
        "Workers from lower-income neighborhoods tend to have poorer work ethics and reliability issues."
    ]

    for i, text in enumerate(texts):
        print(f"\nAnalyzing text {i+1}:")
        print(f"Text: {text}")
        analysis = engineer.analyze_text(text)
        print(f"Word count: {analysis['word_count']}")
        print(f"Bias score: {analysis['bias']['overall']:.2f}")

        # Show specific bias types and matches
        for bias_type in engineer._bias_indicators.keys():
            if analysis['bias'][bias_type] > 0:
                print(f"  - {bias_type}: {analysis['bias'][bias_type]:.2f}")
                print(f"    Matches: {analysis['bias'][f'{bias_type}_matches']}")

    # Test evaluation with multiple criteria
    for i, text in enumerate(texts):
        print(f"\nEvaluating text {i+1} with multiple criteria:")
        scores = engineer.evaluate_text(text, ["bias", "clarity", "engagement", "sentiment"])
        for criterion, score in scores.items():
            print(f"  - {criterion}: {score:.2f}")

# Run the test
test_nltk_bias_detection()

# Best Practices for Prompt Engineering
1. Clarity and Structure

- Use clear, specific instructions
- Break down complex tasks into smaller components
- Provide context and constraints explicitly

2. Inclusivity and Neutrality

- Use gender-neutral language
- Consider diverse perspectives and experiences
- Avoid cultural assumptions
- Use accessible examples

3. Technical Considerations

- Specify output format requirements
- Include error handling expectations
- Define success criteria
- Consider edge cases

4. Response Evaluation

- Define clear evaluation metrics
- Check for bias in responses
- Validate technical accuracy
- Ensure accessibility of explanations

# Common Pitfalls to Avoid

1. Ambiguous instructions
2. Implicit assumptions
3. Lack of context
4. Overly complex prompts
5. Insufficient constraints
6. Missing evaluation criteria

# Next Steps

- Experiment with different prompt structures
- Test with various models
- Gather feedback from diverse users
- Iterate based on evaluation results
- Document successful patterns
- Build a prompt template library