In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from collections import Counter
import string
import logging
from tqdm.notebook import tqdm
import sys
from IPython.display import display, HTML

# Set up logging for Jupyter Notebook with custom formatting
def setup_notebook_logging():
    """Configure logging to display nicely in Jupyter Notebook"""
    logging.basicConfig(level=logging.INFO,
                       format='%(asctime)s - %(levelname)s - %(message)s',
                       datefmt='%Y-%m-%d %H:%M:%S')
    
    # Create a custom handler for Jupyter Notebook
    class NotebookLoggingHandler(logging.Handler):
        def emit(self, record):
            if record.levelno >= logging.WARNING:
                color = 'red' if record.levelno >= logging.ERROR else 'orange'
                display(HTML(f'<div style="color: {color};">{self.format(record)}</div>'))
            else:
                print(self.format(record))
    
    logger = logging.getLogger()
    logger.handlers = []
    logger.addHandler(NotebookLoggingHandler())
    return logger

def download_nltk_resources():
    """
    Download required NLTK resources with progress tracking and error handling
    """
    resources = [
        ('punkt', 'tokenizers'),
        ('stopwords', 'corpora'),
        ('wordnet', 'corpora'),
        ('averaged_perceptron_tagger', 'taggers')
    ]
    
    logger = logging.getLogger()
    
    for resource, resource_type in tqdm(resources, desc="Downloading NLTK Resources"):
        try:
            logger.info(f"Checking {resource} resource...")
            nltk.data.find(f'{resource_type}/{resource}')
            logger.info(f"✓ {resource} already downloaded")
        except LookupError:
            try:
                logger.warning(f"Downloading missing resource: {resource}")
                nltk.download(resource, quiet=True)
                logger.info(f"✓ Successfully downloaded {resource}")
            except Exception as e:
                logger.error(f"Failed to download {resource}: {str(e)}")
                raise RuntimeError(f"Critical resource {resource} could not be downloaded") from e

def analyze_reviews(reviews):
    """
    Analyze customer reviews with comprehensive error handling and progress tracking
    
    Args:
        reviews (list): List of review strings
    
    Returns:
        tuple: (processed_reviews, word_frequency)
    """
    logger = logging.getLogger()
    
    try:
        # Initialize NLTK resources
        download_nltk_resources()
        
        # Initialize tools
        stop_words = set(stopwords.words('english'))
        lemmatizer = WordNetLemmatizer()
        
        processed_reviews = []
        all_words = []
        
        # Process each review with progress bar
        for review in tqdm(reviews, desc="Processing Reviews"):
            try:
                # Convert to lowercase
                review = review.lower()
                logger.debug(f"Processing review: {review[:50]}...")
                
                # Tokenization
                tokens = word_tokenize(review)
                
                # Remove punctuation and numbers
                tokens = [token for token in tokens if token not in string.punctuation and not token.isnumeric()]
                
                # Remove stopwords
                tokens = [token for token in tokens if token not in stop_words]
                
                # Lemmatization
                lemmatized = [lemmatizer.lemmatize(token) for token in tokens]
                
                processed_reviews.append(lemmatized)
                all_words.extend(lemmatized)
                
            except Exception as e:
                logger.error(f"Error processing review: {str(e)}")
                logger.error(f"Problematic review: {review}")
                continue
        
        # Calculate word frequency
        word_freq = Counter(all_words)
        
        return processed_reviews, word_freq
    
    except Exception as e:
        logger.error(f"Critical error in analysis: {str(e)}")
        raise

# Usage example for Jupyter Notebook
def run_analysis(reviews):
    """
    Run the analysis with proper setup for Jupyter Notebook
    """
    # Setup logging
    logger = setup_notebook_logging()
    logger.info("Starting review analysis...")
    
    try:
        # Show setup message
        display(HTML("<h3>📊 Review Analysis</h3>"))
        
        # Run analysis
        processed_reviews, word_frequency = analyze_reviews(reviews)
        
        # Display results
        display(HTML("<h4>Processed Reviews:</h4>"))
        for i, review in enumerate(processed_reviews, 1):
            display(HTML(f"<p><strong>Review {i}:</strong> {' '.join(review)}</p>"))
        
        display(HTML("<h4>Most Common Words:</h4>"))
        for word, count in word_frequency.most_common(10):
            display(HTML(f"<p>{word}: {count}</p>"))
            
        logger.info("Analysis completed successfully!")
        return processed_reviews, word_frequency
    
    except Exception as e:
        logger.error(f"Analysis failed: {str(e)}")
        raise

# Sample reviews
reviews = [
    "The product quality is amazing! I love it.",
    "Absolutely terrible experience. The service was bad.",
    "Great product and fast delivery. Highly recommended!",
    "The product was okay, but I expected better quality.",
    "Worst experience ever. Never buying again."
]

# Run the analysis
if __name__ == "__main__":
    processed_reviews, word_frequency = run_analysis(reviews)

Starting review analysis...


Downloading NLTK Resources:   0%|          | 0/4 [00:00<?, ?it/s]

Checking punkt resource...
✓ punkt already downloaded
Checking stopwords resource...
✓ stopwords already downloaded
Checking wordnet resource...


✓ Successfully downloaded wordnet
Checking averaged_perceptron_tagger resource...


✓ Successfully downloaded averaged_perceptron_tagger


Processing Reviews:   0%|          | 0/5 [00:00<?, ?it/s]

Analysis completed successfully!
