In [14]:
# Import required libraries
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
import spacy

# Download required NLTK data
nltk.download('punkt')

class TokenizationDemo:
    def __init__(self):
        # Initialize spaCy
        self.nlp = spacy.load('en_core_web_sm')
    
    def word_tokenization(self, text):
        """
        Demonstrates word tokenization
        """
        # Using NLTK
        nltk_tokens = word_tokenize(text)
        
        # Using spaCy
        spacy_tokens = [token.text for token in self.nlp(text)]
        
        # Simple split
        basic_tokens = text.split()
        
        return {
            'nltk': nltk_tokens,
            'spacy': spacy_tokens,
            'basic': basic_tokens
        }
    
    def character_tokenization(self, text):
        """
        Demonstrates character tokenization
        """
        # Simple character splitting
        char_tokens = list(text)
        
        # Without spaces
        char_tokens_no_space = [char for char in text if not char.isspace()]
        
        return {
            'basic': char_tokens,
            'no_space': char_tokens_no_space
        }
    
    def sentence_tokenization(self, text):
        """
        Demonstrates sentence tokenization
        """
        # Using NLTK
        nltk_sents = sent_tokenize(text)
        
        # Using spaCy
        spacy_sents = [sent.text for sent in self.nlp(text).sents]
        
        return {
            'nltk': nltk_sents,
            'spacy': spacy_sents
        }

# Usage Example
def main():
    tokenizer = TokenizationDemo()
    
    # Sample texts
    word_text = "Machine learning is fascinating! AI will change the world."
    char_text = "NLP"
    paragraph = """Natural Language Processing is fascinating. 
                  It helps computers understand human language. 
                  This is a multi-sentence text."""
    
    # Word tokenization
    print("\nWord Tokenization:")
    word_tokens = tokenizer.word_tokenization(word_text)
    for method, tokens in word_tokens.items():
        print(f"{method}: {tokens}")
    
    # Character tokenization
    print("\nCharacter Tokenization:")
    char_tokens = tokenizer.character_tokenization(char_text)
    for method, tokens in char_tokens.items():
        print(f"{method}: {tokens}")
    
    # Sentence tokenization
    print("\nSentence Tokenization:")
    sent_tokens = tokenizer.sentence_tokenization(paragraph)
    for method, tokens in sent_tokens.items():
        print(f"{method}: {tokens}")

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'spacy'