# LLM-Powered Review Insights Generator

This notebook demonstrates how to use Large Language Models (LLMs) to generate structured insights from app reviews. This approach:

1. Takes a sample of reviews
2. Preprocesses and formats them
3. Sends the formatted reviews to an LLM (like GPT-4)
4. Extracts structured insights about general feedback, issues, and suggestions

The code is designed to be cost-effective and efficient, minimizing API calls while providing valuable insights.

## Setup & Imports

First, let's import the necessary modules and set up the environment.

In [1]:
# Standard library imports
import os
import sys
import json
import random
from datetime import datetime
from typing import Dict, List, Any, Optional, Union

# Data analysis imports
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

# Visualization imports
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML
import ipywidgets as widgets

# Add project root to path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

# Import project modules
try:
    from src.modules.llm.openai_llm import OpenAILLM
    from src.modules.preprocessing.nlp_preprocessor import NLPPreprocessor
    from src.config import config
except ImportError as e:
    print(f"Error importing project modules: {e}")
    print("Make sure you're running this notebook from the 'notebooks' directory.")
    print("Project structure should be: app-reviews-ai/notebooks/llm_insights.ipynb")

DEBUG: Applying APP_ID from environment: 'in.goindigo.android'


## Load and Prepare Data

We'll load sample review data and prepare it for analysis. You can use either mock data or real app reviews.

In [2]:
# Function to preprocess reviews data if needed
def add_text_preprocessing(df):
    """Add text preprocessing columns if they don't exist"""
    # Make a copy to avoid modifying the original dataframe
    processed_df = df.copy()
    
    # Check if preprocessing columns already exist
    if 'cleaned_text' in processed_df.columns and 'normalized_text' in processed_df.columns:
        print("Text preprocessing columns already exist")
        return processed_df
    
    print("Adding text preprocessing columns...")
    # This function only adds text preprocessing - it assumes other features exist
    try:
        # Import NLP preprocessor
        sys.path.insert(0, project_root)
        from src.modules.preprocessing.nlp_preprocessor import NLPPreprocessor
        
        # Initialize the preprocessor
        preprocessor = NLPPreprocessor({"enable_lemmatization": True})
        if not hasattr(preprocessor, 'is_initialized') or not preprocessor.is_initialized:
            preprocessor.initialize()
        
        # Apply preprocessing to text column
        print("Cleaning and normalizing review text...")
        processed_df['cleaned_text'] = processed_df['text'].apply(
            lambda x: preprocessor.clean_text(str(x)) if pd.notna(x) else "")
        
        processed_df['normalized_text'] = processed_df['cleaned_text'].apply(
            lambda x: preprocessor.normalize_text(x) if pd.notna(x) else "")
        
        print("Text preprocessing complete.")
    except Exception as e:
        print(f"Error during text preprocessing: {e}")
        print("Falling back to basic text cleaning...")
        
        # Basic fallback preprocessing if the advanced module fails
        import re
        from nltk.corpus import stopwords
        
        try:
            # Try to download NLTK resources if not available
            import nltk
            nltk.download('stopwords', quiet=True)
            stopwords_list = set(stopwords.words('english'))
        except:
            # If NLTK is not available, use a small set of common stopwords
            stopwords_list = {'a', 'an', 'the', 'and', 'or', 'but', 'is', 'are', 
                              'was', 'were', 'to', 'of', 'in', 'for', 'with'}
        
        # Simple cleaning function
        def basic_clean(text):
            if not isinstance(text, str):
                text = str(text)
            # Convert to lowercase
            text = text.lower()
            # Remove special characters and punctuation
            text = re.sub(r'[^\w\s]', '', text)
            # Remove numbers
            text = re.sub(r'\d+', '', text)
            # Remove extra spaces
            text = re.sub(r'\s+', ' ', text).strip()
            return text
            
        # Simple stopword removal
        def remove_stopwords(text):
            return ' '.join([word for word in text.split() if word not in stopwords_list])
        
        # Apply basic cleaning
        processed_df['cleaned_text'] = processed_df['text'].apply(
            lambda x: basic_clean(x) if pd.notna(x) else "")
        
        # Apply stopword removal for normalization
        processed_df['normalized_text'] = processed_df['cleaned_text'].apply(
            lambda x: remove_stopwords(x) if pd.notna(x) else "")
        
        print("Basic text preprocessing complete.")
    
    return processed_df

# Function to load and prepare review data
def load_reviews(use_mock_data=False, use_processed=True, add_preprocessing=False):
    """Load review data from CSV or generate mock data"""
    # Set up project paths
    project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
    data_dir = os.path.join(project_root, 'data')
    raw_csv_path = os.path.join(data_dir, 'reviews.csv')
    processed_data_dir = os.path.join(data_dir, 'processed')
    processed_csv_path = os.path.join(processed_data_dir, 'processed_reviews.csv')
    
    # Create data directory if it doesn't exist
    os.makedirs(data_dir, exist_ok=True)
    os.makedirs(processed_data_dir, exist_ok=True)
    
    if not use_mock_data:
        # Try to load preprocessed data first if requested
        if use_processed and os.path.exists(processed_csv_path):
            try:
                print(f"Loading preprocessed reviews from: {processed_csv_path}")
                df = pd.read_csv(processed_csv_path)
                
                # Convert date to datetime if it exists
                if 'date' in df.columns:
                    df['date'] = pd.to_datetime(df['date'], errors='coerce')
                
                print(f"Successfully loaded {len(df)} preprocessed reviews")
                
                # Check if text preprocessing columns exist
                has_preprocessing = 'cleaned_text' in df.columns and 'normalized_text' in df.columns
                
                # If user explicitly requested preprocessing or add_preprocessing is True
                if add_preprocessing and not has_preprocessing:
                    print("Adding text preprocessing columns as requested...")
                    df = add_text_preprocessing(df)
                elif not has_preprocessing:
                    print("Note: Text preprocessing columns (cleaned_text, normalized_text) are missing.")
                    print("      This is fine for basic analysis, but for advanced clustering you can")
                    print("      set add_preprocessing=True when calling load_reviews().")
                
                return df
            except Exception as e:
                print(f"Error loading preprocessed data: {e}")
                print("Trying raw data instead...")
        
        # Try to load raw data if preprocessed data not available or not requested
        if os.path.exists(raw_csv_path):
            try:
                print(f"Loading raw reviews from: {raw_csv_path}")
                df = pd.read_csv(raw_csv_path)
                
                # Convert date to datetime if it exists
                if 'date' in df.columns:
                    df['date'] = pd.to_datetime(df['date'], errors='coerce')
                
                print(f"Successfully loaded {len(df)} raw reviews")
                
                # Since this is raw data, we should add basic preprocessing even if not requested
                print("Processing raw data...")
                processed_df = add_text_preprocessing(df)
                
                # Save processed data for future use
                try:
                    processed_df.to_csv(processed_csv_path, index=False)
                    print(f"Saved preprocessed data to: {processed_csv_path}")
                except Exception as e:
                    print(f"Warning: Could not save preprocessed data: {e}")
                
                return processed_df
            except Exception as e:
                print(f"Error loading raw data: {e}")
                print("Falling back to mock data...")
    
    # Generate mock data if requested or if real data loading failed
    print("Generating mock review data...")
    
    # Import the appropriate module to generate mock reviews
    try:
        from src.modules.acquisition.google_play import GooglePlayAcquisition
        google_play = GooglePlayAcquisition({})
        mock_df = google_play.get_mock_reviews(500)
        print(f"Generated {len(mock_df)} mock reviews")
    except Exception as e:
        print(f"Error generating mock data: {e}")
        # Create a very simple mock dataset
        mock_data = []
        for i in range(500):
            sentiment = random.choice(['positive', 'negative', 'neutral'])
            rating = 5 if sentiment == 'positive' else (1 if sentiment == 'negative' else 3)
            if sentiment == 'positive':
                text = random.choice([
                    "Great app, love it!",
                    "This app is fantastic. Everything works perfectly.",
                    "Best airline app I've used. Very easy to book flights."
                ])
            elif sentiment == 'negative':
                text = random.choice([
                    "Terrible app, constantly crashes.",
                    "Cannot book flights. Always get an error at payment.",
                    "Worst app ever. Customer service is non-existent."
                ])
            else:
                text = random.choice([
                    "App is okay. Could use some improvements.",
                    "Works for basic tasks but lacks some features.",
                    "Average app experience. Nothing special."
                ])
            
            mock_data.append({
                'review_id': f"mock_{i}",
                'text': text,
                'rating': rating,
                'date': pd.to_datetime('now') - pd.Timedelta(days=random.randint(1, 365))
            })
        
        mock_df = pd.DataFrame(mock_data)
        print(f"Generated {len(mock_df)} simple mock reviews")
    
    # Process the mock data
    processed_df = add_text_preprocessing(mock_df)
    return processed_df

In [3]:
# Load or generate review data 
# Set use_mock_data=False to load real data from CSV
use_mock_data = False
use_processed = True     # Set to True to use the pre-processed data from data_preprocessing.ipynb
add_preprocessing = True  # Set to True to add text preprocessing columns if missing

# Load the data
reviews_df = load_reviews(
    use_mock_data=use_mock_data, 
    use_processed=use_processed,
    add_preprocessing=add_preprocessing
)

# Check if we have text preprocessing columns
has_preprocessing = 'cleaned_text' in reviews_df.columns and 'normalized_text' in reviews_df.columns

# Display the first few reviews
if reviews_df is not None:
    # Display with preprocessing columns if available
    if has_preprocessing:
        display(reviews_df[['text', 'cleaned_text', 'normalized_text']].head())
    else:
        display(reviews_df[['text', 'rating', 'date']].head())
    
    # Display dataset summary
    print("\nDataset summary:")
    print(f"Total reviews: {len(reviews_df)}")
    rating_counts = reviews_df['rating'].value_counts().sort_index()
    print(f"Rating distribution:\n{rating_counts}")
    
    # Calculate percentages
    rating_percentages = (rating_counts / len(reviews_df) * 100).round(1)
    print(f"\nRating percentages:\n{rating_percentages}")
    
    # Show date range
    print(f"\nDate range: {reviews_df['date'].min()} to {reviews_df['date'].max()}")
    
    # Show top 10 most frequent words if we have normalized text
    if has_preprocessing:
        from collections import Counter
        all_words = ' '.join(reviews_df['normalized_text'].fillna('').astype(str)).split()
        top_words = Counter(all_words).most_common(15)
        print(f"\nTop 15 most frequent words:\n{top_words}")
    
    # Whether we're using preprocessing in our analysis
    print(f"\nUsing text preprocessing columns for analysis: {has_preprocessing}")
else:
    print("Failed to load review data. Please check the paths and try again.")

Loading preprocessed reviews from: /Users/dipesh/Local-Projects/indigo-reviews-ai/data/processed/processed_reviews.csv
Successfully loaded 10000 preprocessed reviews


Unnamed: 0,text,cleaned_text,normalized_text
0,very nice,very nice,nice
1,"very bad interface, booked a normal fare ticke...",very bad interface booked a normal fare ticket...,bad interface booked normal fare ticket even s...
2,good update information,good update information,good update information
3,sucks big time,sucks big time,suck big time
4,"worst app and worst website, its like some chi...",worst app and worst website its like some chil...,worst app worst website like child developed w...



Dataset summary:
Total reviews: 10000
Rating distribution:
rating
1    3688
2     549
3     539
4     822
5    4402
Name: count, dtype: int64

Rating percentages:
rating
1    36.9
2     5.5
3     5.4
4     8.2
5    44.0
Name: count, dtype: float64

Date range: 2023-08-27 12:35:07 to 2025-05-07 12:19:43

Top 15 most frequent words:
[('app', 2948), ('good', 2330), ('flight', 1194), ('indigo', 1172), ('booking', 915), ('service', 871), ('time', 771), ('experience', 663), ('worst', 636), ('nice', 557), ('ticket', 537), ('even', 488), ('customer', 444), ('airline', 443), ('excellent', 440)]

Using text preprocessing columns for analysis: True


## Create Sample for Analysis

Rather than analyzing all reviews, we'll create a representative sample that captures the distribution of ratings and key topics. This approach:

1. Is more cost-effective (fewer tokens sent to the LLM)
2. Focuses the analysis on the most relevant reviews
3. Ensures we have representation across all rating levels

In [4]:
def prepare_insight_prompt(
    sample_size=150,  # Increased default for larger dataset
    rating_focus='balanced',
    clustering='auto',
    model='gpt-4o',
    focus_areas=None,
    use_text_preprocessing=False,  # Set to False by default since processed columns might not exist
):
    """Prepare a sample of reviews and format for LLM analysis"""
    
    # Step 1: Determine rating weights based on focus
    if rating_focus == 'negative':
        # Focus on negative reviews (1-2 stars)
        rating_weights = {1: 0.4, 2: 0.3, 3: 0.15, 4: 0.1, 5: 0.05}
    elif rating_focus == 'positive':
        # Focus on positive reviews (4-5 stars)
        rating_weights = {1: 0.05, 2: 0.1, 3: 0.15, 4: 0.3, 5: 0.4}
    elif rating_focus == 'equal':
        # Equal representation across all ratings
        rating_weights = {1: 0.2, 2: 0.2, 3: 0.2, 4: 0.2, 5: 0.2}
    else:  # 'balanced' (default)
        # Balanced approach that slightly emphasizes extremes
        rating_weights = {1: 0.25, 2: 0.15, 3: 0.2, 4: 0.15, 5: 0.25}
    
    # Step 2: Determine how many reviews to sample for each rating
    rating_counts = {}
    for rating in range(1, 6):
        count = int(sample_size * rating_weights[rating])
        rating_counts[rating] = max(1, count)  # Ensure at least 1 review per rating
    
    # Adjust to match desired sample size
    total = sum(rating_counts.values())
    if total != sample_size:
        # Distribute the difference proportionally
        diff = sample_size - total
        sorted_ratings = sorted(rating_weights.items(), key=lambda x: x[1], reverse=True)
        for i in range(abs(diff)):
            rating = sorted_ratings[i % len(sorted_ratings)][0]
            rating_counts[rating] += 1 if diff > 0 else -1
            rating_counts[rating] = max(1, rating_counts[rating])  # Ensure at least 1
    
    # Step 3: Sample reviews based on the determined counts
    sample_df = pd.DataFrame()
    for rating, count in rating_counts.items():
        rating_reviews = reviews_df[reviews_df['rating'] == rating]
        if len(rating_reviews) > 0:
            # If we have fewer reviews than requested, take all of them
            if len(rating_reviews) <= count:
                sample_df = pd.concat([sample_df, rating_reviews])
            else:
                # If clustering is enabled, use cluster-based sampling
                if clustering != 'none' and len(rating_reviews) > 10:
                    # Check if we should use text preprocessing columns
                    if use_text_preprocessing and 'normalized_text' in rating_reviews.columns:
                        # Use normalized text for better clustering
                        text_column = 'normalized_text'
                    else:
                        # Fall back to raw text
                        text_column = 'text'
                        
                    # Use TF-IDF and K-means to cluster reviews by content
                    try:
                        # Convert text to TF-IDF features
                        vectorizer = TfidfVectorizer(max_features=100, stop_words='english')
                        tfidf_matrix = vectorizer.fit_transform(rating_reviews[text_column].fillna(''))
                        
                        # Determine number of clusters based on data size
                        n_clusters = min(10, max(3, count))
                        
                        # Apply K-means clustering
                        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
                        rating_reviews = rating_reviews.copy()  # Create an explicit copy
                        rating_reviews.loc[:, 'cluster'] = kmeans.fit_predict(tfidf_matrix)
                        
                        # Sample evenly from each cluster
                        cluster_samples = []
                        for cluster_id in range(n_clusters):
                            cluster_reviews = rating_reviews[rating_reviews['cluster'] == cluster_id]
                            if len(cluster_reviews) > 0:
                                cluster_count = max(1, count // n_clusters)
                                cluster_sample = cluster_reviews.sample(
                                    min(len(cluster_reviews), cluster_count),
                                    random_state=42
                                )
                                cluster_samples.append(cluster_sample)
                        
                        # Combine cluster samples
                        cluster_sample_df = pd.concat(cluster_samples)
                        
                        # If we need more reviews to reach the desired count, sample randomly
                        if len(cluster_sample_df) < count:
                            remaining = count - len(cluster_sample_df)
                            remaining_reviews = rating_reviews[~rating_reviews.index.isin(cluster_sample_df.index)]
                            if len(remaining_reviews) > 0:
                                additional_sample = remaining_reviews.sample(
                                    min(len(remaining_reviews), remaining),
                                    random_state=42
                                )
                                cluster_sample_df = pd.concat([cluster_sample_df, additional_sample])
                        
                        # Add to the full sample
                        sample_df = pd.concat([sample_df, cluster_sample_df])
                        print(f"Using topic modeling for clustering")
                        print(f"Found {n_clusters} topics")
                        
                    except Exception as e:
                        print(f"Error in clustering: {e}")
                        # Fallback to random sampling
                        sample_df = pd.concat([sample_df, rating_reviews.sample(count, random_state=42)])
                else:
                    # Simple random sampling
                    sample_df = pd.concat([sample_df, rating_reviews.sample(count, random_state=42)])
    
    # Step 4: Sort the sample by rating and date
    sample_df = sample_df.sort_values(['rating', 'date'], ascending=[False, False])
    
    # Step 5: Format the reviews for the prompt
    formatted_reviews = []
    for i, (_, review) in enumerate(sample_df.iterrows()):
        # Format each review with rating and date
        date_str = review['date'] if 'date' in review and pd.notna(review['date']) else 'Unknown date'
        text = review['text'] if 'text' in review and pd.notna(review['text']) else 'No text provided'
        rating = review['rating'] if 'rating' in review and pd.notna(review['rating']) else 'No rating'
        
        # Add version information if available
        version_info = ""
        if 'version' in review and pd.notna(review['version']):
            version_info = f" (Version: {review['version']})"
        elif 'reviewCreatedVersion' in review and pd.notna(review['reviewCreatedVersion']):
            version_info = f" (Version: {review['reviewCreatedVersion']})"
        
        formatted_review = f"Review #{i+1}: {rating}/5 stars on {date_str}{version_info}\n{text}\n"
        formatted_reviews.append(formatted_review)
    
    # Step 6: Create the system prompt
    system_prompt = f"""You are an expert app review analyst specializing in airline mobile applications. Your task is to analyze {len(formatted_reviews)} reviews 
for the Indigo Airlines mobile app and extract valuable insights. Focus on understanding user sentiment, 
identifying key issues, and recognizing patterns across the reviews. Your analysis will help improve the 
app's functionality and user experience for airline passengers."""
    
    # Step 7: Create the user prompt with context and instructions
    # Modify focus areas based on input or provide defaults
    if not focus_areas:
        focus_areas = [
            "App Performance & Stability", 
            "Booking Experience", 
            "Check-in Process",
            "Payment System", 
            "User Interface & Design",
            "Flight Information & Updates"
        ]
    else:
        # Convert to list regardless of input type (string, tuple, etc.)
        focus_areas = list(focus_areas)

    focus_areas_text = "\n- ".join([""] + focus_areas)
    
    custom_prompt = f"""Please analyze the following collection of {len(formatted_reviews)} reviews for the Indigo Airlines mobile app and provide comprehensive insights.

Focus on these key areas:{focus_areas_text}

For your analysis, please:
1. Summarize the overall sentiment and main themes in the reviews
2. Identify the most significant issues users are facing with the airline app
3. Extract specific feature requests and constructive feedback
4. Note any patterns related to app versions, flight experiences, or booking processes
5. Suggest 3-5 specific, actionable improvements based on user feedback
6. Highlight any positive aspects that could be amplified or extended

Format your response as a well-structured report with clear sections and bullet points where appropriate. Include relevant quotes from reviews to support your findings.

Here are the reviews to analyze:

{''.join(formatted_reviews)}

Based on these reviews, please provide your comprehensive analysis and specific, actionable recommendations for improving the Indigo Airlines mobile app."""
    
    print(f"Final sample size: {len(formatted_reviews)} reviews")
    
    # Return all the prepared data
    return {
        "sample_size": sample_size,
        "rating_focus": rating_focus,
        "clustering": clustering,
        "model": model,
        "focus_areas": focus_areas,
        "system_prompt": system_prompt,
        "custom_prompt": custom_prompt,
        "sample_df": sample_df,
        "formatted_reviews": formatted_reviews
    }

## Token Estimation Function

This function helps estimate the number of tokens and associated cost before making an actual API call.

In [5]:
def check_token_limit(prompt, system_prompt="", model="gpt-4o", response_tokens=1800):
    """Check token limits and estimate costs"""
    # Model limits
    model_limits = {
        "gpt-3.5-turbo": 16385, "gpt-3.5-turbo-16k": 16385,
        "gpt-4": 8192, "gpt-4o": 128000, "gpt-4-turbo": 128000  
    }
    # Cost per 1K tokens
    model_costs = {
        "gpt-3.5-turbo": {"input": 0.0015, "output": 0.002},
        "gpt-3.5-turbo-16k": {"input": 0.003, "output": 0.004},
        "gpt-4": {"input": 0.03, "output": 0.06},
        "gpt-4o": {"input": 0.01, "output": 0.02},
        "gpt-4-turbo": {"input": 0.01, "output": 0.03}
    }
    # Get limit and estimate tokens
    token_limit = model_limits.get(model, 8192)
    prompt_tokens = len(prompt) // 4
    system_tokens = len(system_prompt) // 4
    total_tokens = prompt_tokens + system_tokens + response_tokens
    
    # Calculate cost
    if model in model_costs:
        input_cost = (prompt_tokens + system_tokens) / 1000 * model_costs[model]["input"]
        output_cost = response_tokens / 1000 * model_costs[model]["output"]
        total_cost = input_cost + output_cost
        cost_str = f"${total_cost:.4f}"
    else:
        cost_str = "Unknown"
    
    # Determine status
    if total_tokens > token_limit:
        status = "Exceeds limit"
        percentage = f"{total_tokens / token_limit * 100:.1f}% (EXCEEDS LIMIT)"
        warning = f"The total estimated tokens ({total_tokens}) exceeds the model's limit ({token_limit})"
    else:
        status = "Within limit"
        percentage = f"{total_tokens / token_limit * 100:.1f}%"
        warning = None
    
    # Return results
    result = {
        "prompt_tokens": prompt_tokens, "system_tokens": system_tokens, 
        "response_tokens": response_tokens, "total_tokens": total_tokens,
        "model_limit": token_limit, "status": status,
        "percentage_of_limit": percentage, "estimated_cost": cost_str
    }
    if warning:
        result["warning"] = warning
    return result

In [6]:
def estimate_tokens_and_cost(prepared_data, model, max_tokens=1800):
    """Estimate tokens and cost for the prepared prompt"""
    prompt = prepared_data["custom_prompt"]
    system_prompt = prepared_data["system_prompt"]
    
    # Get token estimates
    token_check = check_token_limit(
        prompt=prompt, 
        system_prompt=system_prompt,
        model=model,
        response_tokens=max_tokens
    )
    
    # Create a nice HTML display
    html_result = f"""
    <div style="border: 1px solid #ddd; padding: 15px; border-radius: 5px; background-color: #f9f9f9;">
        <h3 style="margin-top: 0;">Token and Cost Estimate</h3>
        <table style="width: 100%; border-collapse: collapse;">
            <tr>
                <td style="padding: 5px; font-weight: bold;">Model:</td>
                <td style="padding: 5px;">{model}</td>
            </tr>
            <tr>
                <td style="padding: 5px; font-weight: bold;">Prompt Tokens:</td>
                <td style="padding: 5px;">{token_check['prompt_tokens']:,}</td>
            </tr>
            <tr>
                <td style="padding: 5px; font-weight: bold;">System Tokens:</td>
                <td style="padding: 5px;">{token_check['system_tokens']:,}</td>
            </tr>
            <tr>
                <td style="padding: 5px; font-weight: bold;">Response Tokens:</td>
                <td style="padding: 5px;">{max_tokens:,}</td>
            </tr>
            <tr style="border-top: 1px solid #ddd;">
                <td style="padding: 5px; font-weight: bold;">Total Tokens:</td>
                <td style="padding: 5px;">{token_check['total_tokens']:,}</td>
            </tr>
            <tr>
                <td style="padding: 5px; font-weight: bold;">Token Limit:</td>
                <td style="padding: 5px;">{token_check['model_limit']:,}</td>
            </tr>
            <tr>
                <td style="padding: 5px; font-weight: bold;">Limit Usage:</td>
                <td style="padding: 5px;">{token_check['percentage_of_limit']}</td>
            </tr>
            <tr style="border-top: 1px solid #ddd; font-weight: bold;">
                <td style="padding: 5px; color: #d9534f;">Estimated Cost:</td>
                <td style="padding: 5px; color: #d9534f;">{token_check['estimated_cost']}</td>
            </tr>
        </table>
        <div style="margin-top: 10px; font-style: italic; color: #666;">
            Based on {prepared_data['sample_size']} reviews with focus on {prepared_data['formatted_reviews'][0][:50]}...
        </div>
        {f'<div style="margin-top: 10px; color: #d9534f; font-weight: bold;">{token_check["warning"]}</div>' if "warning" in token_check else '<div style="margin-top: 10px; color: #5cb85c; font-weight: bold;">✓ Within token limits</div>'}
    </div>
    """
    
    display(HTML(html_result))
    
    return token_check

## LLM Insight Generator

This function interfaces with the LLM module to generate insights from the prepared review sample.

In [7]:
def make_safe_llm_call(prompt, system_prompt="", max_tokens=1800, temperature=0.2, model="gpt-4o"):
    """Make a safe call to the LLM with error handling"""
    try:
        # Initialize the LLM module
        llm = OpenAILLM({
            "model": model,
            "max_tokens": max_tokens,
            "temperature": temperature
        })
        
        # Initialize the module
        llm.initialize()
        
        # Generate text
        response = llm.generate_text(
            prompt=prompt,
            system_prompt=system_prompt,
            max_tokens=max_tokens,
            temperature=temperature
        )
        
        return response
    except Exception as e:
        # Handle any errors
        error_msg = f"Error generating insights: {str(e)}"
        print(error_msg)
        
        # Create a mock response for testing
        mock_response = f"""
ERROR: {error_msg}

Since there was an error connecting to the LLM, here's a mock response for testing purposes:

# Review Analysis Summary

## Overall Sentiment
The reviews show a mix of positive and negative sentiment. Many users appreciate the app's functionality but have encountered technical issues.

## Key Issues Identified
1. App crashes during booking process
2. Payment system occasionally charges users twice
3. Check-in feature unreliable in some cases

## Suggested Improvements
1. Improve app stability, particularly during the booking flow
2. Fix the payment system to prevent double-charging
3. Enhance the UI for better navigation
4. Streamline the check-in process
5. Improve customer support response time
"""
        return mock_response

In [8]:
def generate_custom_insights(
    sample_size=75,
    rating_focus='balanced',
    clustering='auto',
    model='gpt-4o',
    focus_areas=None,
    max_tokens=1800
):
    """Generate insights with the given parameters"""
    print(f"Generating insights with: sample_size={sample_size}, rating_focus={rating_focus}, clustering={clustering}, model={model}")
    
    # Prepare the prompt and sample
    prepared_data = prepare_insight_prompt(
        sample_size=sample_size,
        rating_focus=rating_focus,
        clustering=clustering,
        model=model,
        focus_areas=focus_areas
    )
    
    # Estimate tokens and cost
    token_check = estimate_tokens_and_cost(prepared_data, model, max_tokens)
    
    # Check if we're within token limits
    if token_check["status"] == "Exceeds limit":
        print(f"WARNING: {token_check['warning']}")
        print("Consider reducing the sample size or maximum response tokens.")
        while True:
            proceed = input("Do you want to proceed anyway? (yes/no): ").lower()
            if proceed in ['yes', 'y']:
                break
            elif proceed in ['no', 'n']:
                print("Operation cancelled.")
                return None
            else:
                print("Please enter 'yes' or 'no'.")
    
    # Make the API call
    result = make_safe_llm_call(
        prompt=prepared_data['custom_prompt'],
        system_prompt=prepared_data['system_prompt'],
        max_tokens=max_tokens,
        temperature=0.2,
        model=model
    )
    
    # Display the results
    print("\n" + "=" * 50)
    print(f"INSIGHTS ({model.upper()}, {sample_size} REVIEWS, {rating_focus.upper()} FOCUS)")
    print("=" * 50)
    print(result)
    
    return {
        "sample_size": sample_size,
        "rating_focus": rating_focus,
        "clustering": clustering,
        "model": model,
        "focus_areas": focus_areas,
        "result": result,
        "sample_df": prepared_data['sample_df']
    }

## Interactive UI for Generating Insights

Let's create an interactive UI that allows customization of parameters without the duplicate output issue.

In [11]:
class InsightGenerator:
    """Class to manage the insight generation UI and process"""
    
    def __init__(self):
        """Initialize the insight generator"""
        # Flag to prevent multiple executions
        self._operation_in_progress = False
        
        # Storage for the last generated insights
        self.last_insights = None
        
        # Create widgets
        self._create_widgets()
        
        # Create the UI layout
        self._create_ui()
    
    def _create_widgets(self):
        """Create the UI widgets with improved layout"""
        # Sample size widget without description in the widget itself
        self.sample_size_widget = widgets.IntSlider(
            value=150, min=10, max=1000, step=5,  # Increased default to 150 for larger dataset
            description='',  # Empty description since we use a separate label
            style={'description_width': '0px'},
            layout=widgets.Layout(width='500px')
        )
        
        # Rating focus widget
        self.rating_weights_widget = widgets.Dropdown(
            options=[
                ('Focus on 1-2 star reviews', 'negative'),
                ('Balanced across all ratings', 'balanced'),
                ('Focus on all ratings equally', 'equal'),
                ('Focus on 4-5 star reviews', 'positive')
            ],
            value='balanced',
            description='Rating Focus:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='500px')
        )
        
        # Clustering widget
        self.clustering_widget = widgets.Dropdown(
            options=[
                ('Use existing topics if available', 'auto'),
                ('Always create new clusters', 'force_new'),
                ('Simple rating-based sampling', 'none')
            ],
            value='auto',
            description='',
            style={'description_width': '0px'},
            layout=widgets.Layout(width='500px')
        )
        
        # Model widget
        self.model_widget = widgets.Dropdown(
            options=[
                ('GPT-4o (most powerful, more expensive)', 'gpt-4o'),
                ('GPT-3.5 Turbo (faster, cheaper)', 'gpt-3.5-turbo'),
            ],
            value='gpt-4o',
            description='',
            style={'description_width': '0px'},
            layout=widgets.Layout(width='500px')
        )
        
        # Focus areas widget - Updated for airline app
        self.focus_area_widget = widgets.SelectMultiple(
            options=[
                'App Performance & Stability',
                'Booking Experience',
                'Check-in Process',
                'Customer Support',
                'Payment System',
                'User Interface & Design',
                'Flight Information & Updates',
                'Loyalty Program & Miles',
                'In-app Features',
                'Flight Modifications & Cancellations',
                'Baggage Information',
                'Boarding Pass & Digital Documents'
            ],
            value=['App Performance & Stability', 'Booking Experience', 'Check-in Process', 
                   'Payment System', 'User Interface & Design'],
            description='Focus Areas:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='500px', height='200px')  # Made taller for more options
        )
        
        # Output areas
        self.estimate_output = widgets.Output()
        self.insight_output = widgets.Output()
        
        # Buttons with bigger size
        self.estimate_button = widgets.Button(
            description='Estimate Tokens & Cost',
            button_style='info',
            tooltip='Calculate tokens and cost without making an API call',
            layout=widgets.Layout(width='250px', height='40px')
        )
        
        self.run_button = widgets.Button(
            description='Generate Insights',
            button_style='success',
            tooltip='Run LLM analysis (will use API credits)',
            layout=widgets.Layout(width='250px', height='40px')
        )
        
        # Attach handlers
        self.estimate_button.on_click(self._on_estimate_button_clicked)
        self.run_button.on_click(self._on_run_button_clicked)
    
    def _create_ui(self):
        """Create and display the UI with improved layout"""
        # Create configuration section
        config_box = widgets.VBox([
            widgets.HTML(value='<h3>Sample Configuration:</h3>'),
            widgets.HBox([widgets.Label('Sample Size:'), self.sample_size_widget]),
            widgets.HBox([widgets.Label('Rating Focus:'), self.rating_weights_widget]),
            widgets.HBox([widgets.Label('Clustering:'), self.clustering_widget]),
            widgets.HBox([widgets.Label('Model:'), self.model_widget]),
            widgets.HTML(value='<h3>Optional Focus Areas:</h3>'),
            self.focus_area_widget,
        ])
        
        # Create action buttons section - now in horizontal layout at the bottom
        button_box = widgets.HBox([
            self.estimate_button,
            self.run_button
        ], layout=widgets.Layout(
            justify_content='center',
            margin='20px 0'
        ))
        
        # Create output section
        output_box = widgets.VBox([
            widgets.HTML(value='<h3>Results:</h3>'),
            self.estimate_output,
            self.insight_output
        ])
        
        # Organize the complete layout - stacked vertically
        main_ui = widgets.VBox([
            config_box,
            button_box,
            output_box
        ])
        
        # Display heading
        print("CUSTOMIZABLE COST-CONTROLLED INSIGHTS")
        print("Configure your parameters, check token usage/cost, and generate insights when ready.")
        print("First use 'Estimate Tokens & Cost' to see the impact of your settings without making an API call.\n")
        
        # Display UI
        display(main_ui)
    
    def _on_estimate_button_clicked(self, b):
        """Handler for estimate button clicks with re-entrancy protection"""
        if self._operation_in_progress:
            return
        
        try:
            self._operation_in_progress = True
            self.estimate_output.clear_output()
            
            with self.estimate_output:
                # Get widget values
                sample_size = self.sample_size_widget.value
                rating_focus = self.rating_weights_widget.value
                clustering = self.clustering_widget.value
                model = self.model_widget.value
                focus_areas = self.focus_area_widget.value
                
                # Display a single message
                print(f"Preparing prompt with: sample_size={sample_size}, rating_focus={rating_focus}, clustering={clustering}, model={model}")
                
                # Prepare data
                prepared_data = prepare_insight_prompt(
                    sample_size=sample_size,
                    rating_focus=rating_focus,
                    clustering=clustering,
                    model=model,
                    focus_areas=focus_areas
                )
                
                # Calculate and display token estimates
                estimate_tokens_and_cost(prepared_data, model)
        except Exception as e:
            with self.estimate_output:
                print(f"Error during estimation: {str(e)}")
        finally:
            self._operation_in_progress = False
    
    def _on_run_button_clicked(self, b):
        """Handler for run button clicks with re-entrancy protection"""
        if self._operation_in_progress:
            return
        
        try:
            self._operation_in_progress = True
            self.insight_output.clear_output()
            
            with self.insight_output:
                # Get widget values
                sample_size = self.sample_size_widget.value
                rating_focus = self.rating_weights_widget.value
                clustering = self.clustering_widget.value
                model = self.model_widget.value
                focus_areas = self.focus_area_widget.value
                
                # Show a single execution message and loading indicator
                print(f"Generating insights with: sample_size={sample_size}, rating_focus={rating_focus}, clustering={clustering}, model={model}")
                print("Processing request... this may take 30-60 seconds depending on the model...")
                
                # You could also use a more visual loading indicator
                for i in range(3):
                    print("⏳", end="", flush=True)
                
                # Prepare data
                prepared_data = prepare_insight_prompt(
                    sample_size=sample_size,
                    rating_focus=rating_focus,
                    clustering=clustering,
                    model=model,
                    focus_areas=focus_areas
                )
                
                # Make the API call
                result = make_safe_llm_call(
                    prompt=prepared_data['custom_prompt'],
                    system_prompt=prepared_data['system_prompt'],
                    max_tokens=1800,
                    temperature=0.2,
                    model=model
                )

                # Create the insights dictionary
                self.last_insights = {
                    "sample_size": sample_size,
                    "rating_focus": rating_focus,
                    "clustering": clustering,
                    "model": model,
                    "focus_areas": focus_areas,
                    "result": result,
                    "sample_df": prepared_data['sample_df']
                }
                
                # Display results manually once
                print("\n" + "=" * 50)
                print(f"INSIGHTS ({model.upper()}, {sample_size} REVIEWS, {rating_focus.upper()} FOCUS)")
                print("=" * 50)
                print(result)

                # Add a message about saving the insights
                print("\nTo save these insights to a file, run: save_insights(insight_generator.last_insights)")
        except Exception as e:
            with self.insight_output:
                print(f"Error generating insights: {str(e)}")
                print("\nDebug information:")
                print(f"- Exception type: {type(e).__name__}")
                print(f"- Error message: {str(e)}")
                print("\nTroubleshooting:")
                print("- Check your OpenAI API key and internet connection")
                print("- Make sure the OpenAI API is available")
                print("- Verify that the model you selected is available in your account")
        finally:
            self._operation_in_progress = False

In [14]:
# Initialize and display the UI
insight_generator = InsightGenerator()

CUSTOMIZABLE COST-CONTROLLED INSIGHTS
Configure your parameters, check token usage/cost, and generate insights when ready.
First use 'Estimate Tokens & Cost' to see the impact of your settings without making an API call.



VBox(children=(VBox(children=(HTML(value='<h3>Sample Configuration:</h3>'), HBox(children=(Label(value='Sample…

## Manual Insight Generation

If you prefer to generate insights without the UI, you can use the `generate_custom_insights` function directly.

In [None]:
# Example: Generate insights manually with custom parameters
# Set to False to see the example without actually running it
run_example = False

if run_example:
    insights = generate_custom_insights(
        sample_size=150,        # Increased sample size for our larger dataset
        rating_focus='balanced',  # Balanced approach, getting reviews across all ratings
        clustering='auto',     # Use automatic clustering
        model='gpt-4o',        # Use GPT-4o model
        focus_areas=[
            'App Performance & Stability',
            'Booking Experience', 
            'Check-in Process',
            'Payment System',
            'User Interface & Design',
            'Flight Information & Updates'
        ]
    )
else:
    print("""
# Manual Insight Generation Example
# To run this analysis, change run_example to True and execute the cell

insights = generate_custom_insights(
    sample_size=150,        # Number of reviews to sample
    rating_focus='balanced',  # Get a mix of all ratings
    clustering='auto',     # Use automatic clustering
    model='gpt-4o',        # Use GPT-4o model
    focus_areas=[
        'App Performance & Stability',
        'Booking Experience', 
        'Check-in Process',
        'Payment System',
        'User Interface & Design',
        'Flight Information & Updates'
    ]
)
""")

## Saving Insights

You can save the generated insights to a file for future reference.

In [13]:
def save_insights(insights, filename=None):
    """Save insights to a file"""
    if insights is None:
        print("No insights to save.")
        return
    
    # Create filename if not provided
    if filename is None:
        timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        focus = insights.get("rating_focus", "balanced")
        sample_size = insights.get("sample_size", 0)
        filename = f"insights_{focus}_{sample_size}_{timestamp}.md"
    
    # Create reports directory if it doesn't exist
    reports_dir = os.path.join(project_root, 'reports', 'insights')
    os.makedirs(reports_dir, exist_ok=True)
    
    # Full path
    filepath = os.path.join(reports_dir, filename)
    
    # Save the insights
    try:
        with open(filepath, 'w', encoding='utf-8') as f:
            # Write metadata
            f.write(f"# Review Insights\n\n")
            f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            f.write(f"## Parameters\n\n")
            f.write(f"- **Sample size**: {insights.get('sample_size', 'N/A')}\n")
            f.write(f"- **Rating focus**: {insights.get('rating_focus', 'N/A')}\n")
            f.write(f"- **Clustering**: {insights.get('clustering', 'N/A')}\n")
            f.write(f"- **Model**: {insights.get('model', 'N/A')}\n")
            f.write(f"- **Focus areas**: {', '.join(insights.get('focus_areas', ['N/A']))}\n\n")
            
            # Write the insights
            f.write(f"## Insights\n\n")
            f.write(insights.get("result", "No insights generated."))
        
        print(f"Insights saved to: {filepath}")
    except Exception as e:
        print(f"Error saving insights: {str(e)}")

# Uncomment and execute to save insights from manual insight generation
#save_insights(insights)
# Save insights from UI based insight generation
save_insights(insight_generator.last_insights)

Insights saved to: /Users/dipesh/Local-Projects/indigo-reviews-ai/reports/insights/insights_negative_600_2025-05-08_14-56-52.md


## Conclusion

This notebook provides a clean, modular approach to generating insights from app reviews using LLMs. The key benefits of this approach include:

1. **Cost efficiency** - Sampling and proper prompt design reduces token usage
2. **Representative sampling** - Using clustering ensures diverse review representation
3. **Parameter customization** - Flexible configuration for different analysis needs
4. **Error prevention** - Robust error handling and duplicate execution prevention
5. **Integration with project modules** - Properly interfaces with the project's LLM module

You can further extend this notebook by:

1. Adding more specialized insight types (competitive analysis, user persona extraction, etc.)
2. Implementing automatic insight tracking over time to identify trends
3. Creating visualizations based on the LLM-generated insights
4. Building a dashboard that combines quantitative metrics with qualitative insights