# Open-Ended Coding Analysis

This notebook provides a comprehensive framework for analyzing open-ended qualitative data through:
- **Code Frames**: Systematic coding structures for categorizing data
- **Themes**: Identification and analysis of recurring patterns
- **Categorization**: Multi-level classification and organization of qualitative data

## Features
- Data loading from flat files (CSV, Excel) and databases (SQLite, PostgreSQL)
- Interactive visualizations
- Robust error handling
- Code quality checks via Makefile
- Comprehensive testing framework

## 1. Setup and Imports

In [None]:
# Standard library imports
import os
import sys
import warnings
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union
import logging

# Data manipulation
import pandas as pd
import numpy as np

# Database connections
import sqlite3
from sqlalchemy import create_engine

# Visualizations
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# NLP and text analysis
from collections import Counter, defaultdict
import re

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Suppress warnings
warnings.filterwarnings('ignore')

# Set visualization styles
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("✓ All imports successful")

## 2. Data Loading Module

Robust data loading from multiple sources with comprehensive error handling.

In [None]:
class DataLoader:
    """Handles data loading from various sources with error handling."""
    
    def __init__(self):
        self.logger = logging.getLogger(self.__class__.__name__)
    
    def load_csv(self, filepath: str, **kwargs) -> pd.DataFrame:
        """
        Load data from CSV file.
        
        Args:
            filepath: Path to CSV file
            **kwargs: Additional arguments for pd.read_csv
            
        Returns:
            DataFrame with loaded data
            
        Raises:
            FileNotFoundError: If file doesn't exist
            pd.errors.EmptyDataError: If file is empty
        """
        try:
            if not os.path.exists(filepath):
                raise FileNotFoundError(f"File not found: {filepath}")
            
            df = pd.read_csv(filepath, **kwargs)
            self.logger.info(f"Successfully loaded {len(df)} rows from {filepath}")
            return df
        
        except pd.errors.EmptyDataError:
            self.logger.error(f"Empty file: {filepath}")
            raise
        except Exception as e:
            self.logger.error(f"Error loading CSV {filepath}: {str(e)}")
            raise
    
    def load_excel(self, filepath: str, sheet_name: Union[str, int] = 0, **kwargs) -> pd.DataFrame:
        """
        Load data from Excel file.
        
        Args:
            filepath: Path to Excel file
            sheet_name: Sheet name or index
            **kwargs: Additional arguments for pd.read_excel
            
        Returns:
            DataFrame with loaded data
        """
        try:
            if not os.path.exists(filepath):
                raise FileNotFoundError(f"File not found: {filepath}")
            
            df = pd.read_excel(filepath, sheet_name=sheet_name, **kwargs)
            self.logger.info(f"Successfully loaded {len(df)} rows from {filepath}")
            return df
        
        except Exception as e:
            self.logger.error(f"Error loading Excel {filepath}: {str(e)}")
            raise
    
    def load_from_sqlite(self, db_path: str, query: str) -> pd.DataFrame:
        """
        Load data from SQLite database.
        
        Args:
            db_path: Path to SQLite database file
            query: SQL query to execute
            
        Returns:
            DataFrame with query results
        """
        try:
            conn = sqlite3.connect(db_path)
            df = pd.read_sql_query(query, conn)
            conn.close()
            self.logger.info(f"Successfully loaded {len(df)} rows from SQLite database")
            return df
        
        except Exception as e:
            self.logger.error(f"Error loading from SQLite: {str(e)}")
            raise
    
    def load_from_postgres(self, connection_string: str, query: str) -> pd.DataFrame:
        """
        Load data from PostgreSQL database.
        
        Args:
            connection_string: PostgreSQL connection string
            query: SQL query to execute
            
        Returns:
            DataFrame with query results
        """
        try:
            engine = create_engine(connection_string)
            df = pd.read_sql_query(query, engine)
            engine.dispose()
            self.logger.info(f"Successfully loaded {len(df)} rows from PostgreSQL database")
            return df
        
        except Exception as e:
            self.logger.error(f"Error loading from PostgreSQL: {str(e)}")
            raise

# Initialize data loader
data_loader = DataLoader()
print("✓ DataLoader initialized")

## 3. Load Sample Data

Load your qualitative data from various sources.

In [None]:
# Example: Load from CSV
try:
    # Replace with your actual data file
    if os.path.exists('data/sample_responses.csv'):
        df = data_loader.load_csv('data/sample_responses.csv')
    else:
        # Create sample data for demonstration
        df = pd.DataFrame({
            'id': range(1, 21),
            'response': [
                'I love the flexibility of remote work',
                'Better work-life balance is crucial',
                'Communication challenges with team members',
                'Increased productivity at home',
                'Missing social interactions with colleagues',
                'Technology issues affect my work',
                'More time for family and personal activities',
                'Difficulty separating work and personal life',
                'Cost savings from not commuting',
                'Feeling isolated from the team',
                'Flexible schedule allows better time management',
                'Video call fatigue is real',
                'Can focus better without office distractions',
                'Miss casual conversations at the office',
                'Home office setup improves comfort',
                'Internet connectivity problems',
                'Appreciate the autonomy',
                'Harder to build relationships remotely',
                'Reduced stress from commuting',
                'Challenging to stay motivated alone'
            ],
            'respondent_id': [f'R{i:03d}' for i in range(1, 21)],
            'timestamp': pd.date_range(start='2024-01-01', periods=20, freq='D')
        })
        logger.info("Using sample demonstration data")
    
    print(f"\nLoaded {len(df)} responses")
    print(f"\nData shape: {df.shape}")
    print(f"\nColumns: {df.columns.tolist()}")
    display(df.head())
    
except Exception as e:
    logger.error(f"Error loading data: {str(e)}")
    raise

## 4. Code Frames

Code frames provide a structured approach to categorizing qualitative data. Define your coding scheme here.

In [None]:
class CodeFrame:
    """Manages coding frames for qualitative analysis."""
    
    def __init__(self, name: str, description: str = ""):
        self.name = name
        self.description = description
        self.codes = {}
        self.logger = logging.getLogger(self.__class__.__name__)
    
    def add_code(self, code_id: str, label: str, description: str = "", 
                 keywords: Optional[List[str]] = None, parent: Optional[str] = None):
        """
        Add a code to the frame.
        
        Args:
            code_id: Unique identifier for the code
            label: Human-readable label
            description: Detailed description of the code
            keywords: List of keywords associated with this code
            parent: Parent code ID for hierarchical structures
        """
        self.codes[code_id] = {
            'label': label,
            'description': description,
            'keywords': keywords or [],
            'parent': parent,
            'count': 0
        }
        self.logger.info(f"Added code: {code_id} - {label}")
    
    def apply_codes(self, text: str, case_sensitive: bool = False) -> List[str]:
        """
        Apply codes to text based on keyword matching.
        
        Args:
            text: Text to code
            case_sensitive: Whether to use case-sensitive matching
            
        Returns:
            List of matching code IDs
        """
        if not case_sensitive:
            text = text.lower()
        
        matched_codes = []
        for code_id, code_info in self.codes.items():
            keywords = code_info['keywords']
            if not case_sensitive:
                keywords = [k.lower() for k in keywords]
            
            for keyword in keywords:
                if keyword in text:
                    matched_codes.append(code_id)
                    self.codes[code_id]['count'] += 1
                    break
        
        return matched_codes
    
    def get_hierarchy(self) -> Dict:
        """Get hierarchical structure of codes."""
        hierarchy = defaultdict(list)
        for code_id, code_info in self.codes.items():
            parent = code_info.get('parent')
            if parent:
                hierarchy[parent].append(code_id)
            else:
                hierarchy['root'].append(code_id)
        return dict(hierarchy)
    
    def summary(self) -> pd.DataFrame:
        """Generate summary statistics of code usage."""
        summary_data = []
        for code_id, code_info in self.codes.items():
            summary_data.append({
                'Code ID': code_id,
                'Label': code_info['label'],
                'Count': code_info['count'],
                'Parent': code_info.get('parent', 'None')
            })
        return pd.DataFrame(summary_data).sort_values('Count', ascending=False)

print("✓ CodeFrame class defined")

### 4.1 Define Your Code Frame

In [None]:
# Create a code frame for analyzing remote work experiences
remote_work_frame = CodeFrame(
    name="Remote Work Analysis",
    description="Coding frame for analyzing remote work experiences"
)

# Define main categories (top-level codes)
remote_work_frame.add_code(
    'POSITIVE',
    'Positive Experiences',
    'Positive aspects of remote work'
)

remote_work_frame.add_code(
    'NEGATIVE',
    'Negative Experiences',
    'Challenges and negative aspects of remote work'
)

remote_work_frame.add_code(
    'NEUTRAL',
    'Neutral/Mixed',
    'Neutral or mixed experiences'
)

# Define sub-codes for positive experiences
remote_work_frame.add_code(
    'POS_FLEX',
    'Flexibility',
    'Flexibility in schedule and location',
    keywords=['flexibility', 'flexible', 'autonomy', 'schedule'],
    parent='POSITIVE'
)

remote_work_frame.add_code(
    'POS_BALANCE',
    'Work-Life Balance',
    'Improved work-life balance',
    keywords=['work-life balance', 'family', 'personal activities', 'time management'],
    parent='POSITIVE'
)

remote_work_frame.add_code(
    'POS_PROD',
    'Productivity',
    'Increased productivity',
    keywords=['productivity', 'productive', 'focus', 'efficient'],
    parent='POSITIVE'
)

remote_work_frame.add_code(
    'POS_COST',
    'Cost Savings',
    'Financial benefits',
    keywords=['cost savings', 'commuting', 'save money', 'reduced stress'],
    parent='POSITIVE'
)

# Define sub-codes for negative experiences
remote_work_frame.add_code(
    'NEG_COMM',
    'Communication Issues',
    'Communication and collaboration challenges',
    keywords=['communication', 'challenges', 'video call', 'fatigue'],
    parent='NEGATIVE'
)

remote_work_frame.add_code(
    'NEG_SOCIAL',
    'Social Isolation',
    'Lack of social interaction',
    keywords=['isolated', 'social', 'miss', 'lonely', 'relationships'],
    parent='NEGATIVE'
)

remote_work_frame.add_code(
    'NEG_TECH',
    'Technical Issues',
    'Technology and infrastructure problems',
    keywords=['technology', 'internet', 'connectivity', 'technical'],
    parent='NEGATIVE'
)

remote_work_frame.add_code(
    'NEG_BOUND',
    'Work-Life Boundaries',
    'Difficulty maintaining boundaries',
    keywords=['separating', 'boundaries', 'motivated', 'personal life'],
    parent='NEGATIVE'
)

print(f"\n✓ Code frame '{remote_work_frame.name}' created with {len(remote_work_frame.codes)} codes")
print(f"\nHierarchy: {remote_work_frame.get_hierarchy()}")

### 4.2 Apply Codes to Data

In [None]:
# Apply codes to each response
df['codes'] = df['response'].apply(
    lambda x: remote_work_frame.apply_codes(x, case_sensitive=False)
)

# Create binary columns for each code
for code_id in remote_work_frame.codes.keys():
    df[f'code_{code_id}'] = df['codes'].apply(lambda x: 1 if code_id in x else 0)

print("\nCoded Responses:")
display(df[['response', 'codes']].head(10))

# Show code summary
print("\nCode Summary:")
code_summary = remote_work_frame.summary()
display(code_summary)

### 4.3 Visualize Code Distribution

In [None]:
# Bar chart of code frequencies
fig = px.bar(
    code_summary,
    x='Label',
    y='Count',
    color='Count',
    title='Code Distribution in Responses',
    labels={'Label': 'Code', 'Count': 'Frequency'},
    color_continuous_scale='Blues'
)
fig.update_layout(xaxis_tickangle=-45, height=500)
fig.show()

# Hierarchical sunburst chart
sunburst_data = []
for code_id, code_info in remote_work_frame.codes.items():
    parent = code_info.get('parent', '')
    sunburst_data.append({
        'labels': code_info['label'],
        'parents': remote_work_frame.codes[parent]['label'] if parent else '',
        'values': code_info['count']
    })

sunburst_df = pd.DataFrame(sunburst_data)
fig = px.sunburst(
    sunburst_df,
    names='labels',
    parents='parents',
    values='values',
    title='Hierarchical Code Structure'
)
fig.show()

## 5. Themes

Identify and analyze recurring themes in the data.

In [None]:
class ThemeAnalyzer:
    """Analyzes and identifies themes in qualitative data."""
    
    def __init__(self):
        self.themes = {}
        self.logger = logging.getLogger(self.__class__.__name__)
    
    def define_theme(self, theme_id: str, name: str, description: str, 
                    associated_codes: Optional[List[str]] = None):
        """
        Define a theme.
        
        Args:
            theme_id: Unique identifier
            name: Theme name
            description: Detailed description
            associated_codes: List of code IDs associated with this theme
        """
        self.themes[theme_id] = {
            'name': name,
            'description': description,
            'codes': associated_codes or [],
            'responses': []
        }
        self.logger.info(f"Defined theme: {theme_id} - {name}")
    
    def identify_themes(self, df: pd.DataFrame, code_column: str = 'codes') -> pd.DataFrame:
        """
        Identify themes in coded data.
        
        Args:
            df: DataFrame with coded responses
            code_column: Column name containing codes
            
        Returns:
            DataFrame with theme assignments
        """
        theme_assignments = []
        
        for idx, row in df.iterrows():
            response_codes = set(row[code_column])
            matched_themes = []
            
            for theme_id, theme_info in self.themes.items():
                theme_codes = set(theme_info['codes'])
                if response_codes & theme_codes:  # Intersection
                    matched_themes.append(theme_id)
                    self.themes[theme_id]['responses'].append(idx)
            
            theme_assignments.append(matched_themes)
        
        df['themes'] = theme_assignments
        return df
    
    def theme_co_occurrence(self) -> pd.DataFrame:
        """
        Calculate co-occurrence matrix of themes.
        
        Returns:
            DataFrame with theme co-occurrence counts
        """
        theme_ids = list(self.themes.keys())
        n_themes = len(theme_ids)
        co_occurrence = np.zeros((n_themes, n_themes))
        
        # Count co-occurrences
        for theme_info in self.themes.values():
            responses = theme_info['responses']
            # This is simplified - you'd check actual response overlaps
        
        return pd.DataFrame(
            co_occurrence,
            index=[self.themes[t]['name'] for t in theme_ids],
            columns=[self.themes[t]['name'] for t in theme_ids]
        )
    
    def summary(self) -> pd.DataFrame:
        """Generate theme summary statistics."""
        summary_data = []
        for theme_id, theme_info in self.themes.items():
            summary_data.append({
                'Theme ID': theme_id,
                'Name': theme_info['name'],
                'Description': theme_info['description'],
                'Associated Codes': len(theme_info['codes']),
                'Frequency': len(theme_info['responses'])
            })
        return pd.DataFrame(summary_data)

print("✓ ThemeAnalyzer class defined")

### 5.1 Define Themes

In [None]:
# Initialize theme analyzer
theme_analyzer = ThemeAnalyzer()

# Define themes based on code patterns
theme_analyzer.define_theme(
    'THEME_AUTONOMY',
    'Autonomy and Control',
    'Themes related to personal autonomy, control over schedule, and independence',
    associated_codes=['POS_FLEX', 'POS_BALANCE']
)

theme_analyzer.define_theme(
    'THEME_PERFORMANCE',
    'Work Performance',
    'Themes related to productivity, efficiency, and work output',
    associated_codes=['POS_PROD', 'NEG_TECH']
)

theme_analyzer.define_theme(
    'THEME_CONNECTION',
    'Social Connection',
    'Themes related to social interaction, relationships, and collaboration',
    associated_codes=['NEG_SOCIAL', 'NEG_COMM']
)

theme_analyzer.define_theme(
    'THEME_WELLBEING',
    'Personal Wellbeing',
    'Themes related to mental health, stress, and life quality',
    associated_codes=['POS_COST', 'POS_BALANCE', 'NEG_BOUND']
)

print(f"\n✓ Defined {len(theme_analyzer.themes)} themes")

### 5.2 Identify Themes in Data

In [None]:
# Apply theme identification
df = theme_analyzer.identify_themes(df)

print("\nResponses with Identified Themes:")
display(df[['response', 'codes', 'themes']].head(10))

# Show theme summary
print("\nTheme Summary:")
theme_summary = theme_analyzer.summary()
display(theme_summary)

### 5.3 Visualize Themes

In [None]:
# Theme frequency bar chart
fig = px.bar(
    theme_summary,
    x='Name',
    y='Frequency',
    title='Theme Distribution',
    color='Frequency',
    color_continuous_scale='Viridis',
    hover_data=['Description']
)
fig.update_layout(xaxis_tickangle=-45, height=500)
fig.show()

# Theme network visualization
# Count theme co-occurrences
theme_counts = Counter()
theme_pairs = Counter()

for themes in df['themes']:
    for theme in themes:
        theme_counts[theme] += 1
    
    # Count pairs
    for i, theme1 in enumerate(themes):
        for theme2 in themes[i+1:]:
            pair = tuple(sorted([theme1, theme2]))
            theme_pairs[pair] += 1

print("\nTheme Co-occurrences:")
for pair, count in theme_pairs.most_common():
    print(f"{pair[0]} <-> {pair[1]}: {count}")

## 6. Categorization

Advanced categorization and classification of coded data.

In [None]:
class CategoryManager:
    """Manages multi-level categorization of qualitative data."""
    
    def __init__(self):
        self.categories = {}
        self.logger = logging.getLogger(self.__class__.__name__)
    
    def create_category(self, category_id: str, name: str, 
                       criteria: Dict, level: int = 1):
        """
        Create a category.
        
        Args:
            category_id: Unique identifier
            name: Category name
            criteria: Dictionary defining categorization criteria
            level: Hierarchical level (1 = top level)
        """
        self.categories[category_id] = {
            'name': name,
            'criteria': criteria,
            'level': level,
            'count': 0
        }
        self.logger.info(f"Created category: {category_id} - {name} (Level {level})")
    
    def categorize(self, df: pd.DataFrame, 
                   code_columns: Optional[List[str]] = None) -> pd.DataFrame:
        """
        Apply categorization to DataFrame.
        
        Args:
            df: DataFrame to categorize
            code_columns: List of code column names to consider
            
        Returns:
            DataFrame with category assignments
        """
        categories_assigned = []
        
        for idx, row in df.iterrows():
            assigned = []
            
            for cat_id, cat_info in self.categories.items():
                if self._meets_criteria(row, cat_info['criteria']):
                    assigned.append(cat_id)
                    self.categories[cat_id]['count'] += 1
            
            categories_assigned.append(assigned)
        
        df['categories'] = categories_assigned
        return df
    
    def _meets_criteria(self, row: pd.Series, criteria: Dict) -> bool:
        """
        Check if a row meets category criteria.
        
        Args:
            row: DataFrame row
            criteria: Criteria dictionary
            
        Returns:
            True if criteria are met
        """
        for key, value in criteria.items():
            if key == 'codes_required':
                # Check if any required codes are present
                if not any(code in row.get('codes', []) for code in value):
                    return False
            
            elif key == 'codes_all':
                # Check if all codes are present
                if not all(code in row.get('codes', []) for code in value):
                    return False
            
            elif key == 'themes_required':
                # Check if any required themes are present
                if not any(theme in row.get('themes', []) for theme in value):
                    return False
        
        return True
    
    def summary(self) -> pd.DataFrame:
        """Generate category summary."""
        summary_data = []
        for cat_id, cat_info in self.categories.items():
            summary_data.append({
                'Category ID': cat_id,
                'Name': cat_info['name'],
                'Level': cat_info['level'],
                'Count': cat_info['count']
            })
        return pd.DataFrame(summary_data).sort_values('Level')
    
    def cross_tabulation(self, df: pd.DataFrame, 
                        category1: str, category2: str) -> pd.DataFrame:
        """
        Create cross-tabulation between categories.
        
        Args:
            df: DataFrame with categories
            category1: First category ID
            category2: Second category ID
            
        Returns:
            Cross-tabulation DataFrame
        """
        # Create binary indicators
        df[f'has_{category1}'] = df['categories'].apply(
            lambda x: 1 if category1 in x else 0
        )
        df[f'has_{category2}'] = df['categories'].apply(
            lambda x: 1 if category2 in x else 0
        )
        
        return pd.crosstab(
            df[f'has_{category1}'],
            df[f'has_{category2}'],
            rownames=[self.categories[category1]['name']],
            colnames=[self.categories[category2]['name']]
        )

print("✓ CategoryManager class defined")

### 6.1 Define Categories

In [None]:
# Initialize category manager
category_manager = CategoryManager()

# Level 1: Primary sentiment
category_manager.create_category(
    'CAT_POSITIVE',
    'Overall Positive',
    {'codes_required': ['POS_FLEX', 'POS_BALANCE', 'POS_PROD', 'POS_COST']},
    level=1
)

category_manager.create_category(
    'CAT_NEGATIVE',
    'Overall Negative',
    {'codes_required': ['NEG_COMM', 'NEG_SOCIAL', 'NEG_TECH', 'NEG_BOUND']},
    level=1
)

# Level 2: Specific aspects
category_manager.create_category(
    'CAT_WORK_FOCUSED',
    'Work-Focused',
    {'codes_required': ['POS_PROD', 'NEG_TECH', 'NEG_COMM']},
    level=2
)

category_manager.create_category(
    'CAT_LIFE_FOCUSED',
    'Life-Focused',
    {'codes_required': ['POS_BALANCE', 'POS_COST', 'NEG_BOUND']},
    level=2
)

category_manager.create_category(
    'CAT_SOCIAL_FOCUSED',
    'Social-Focused',
    {'codes_required': ['NEG_SOCIAL', 'NEG_COMM']},
    level=2
)

print(f"\n✓ Created {len(category_manager.categories)} categories")

### 6.2 Apply Categorization

In [None]:
# Apply categories to data
df = category_manager.categorize(df)

print("\nCategorized Responses:")
display(df[['response', 'codes', 'themes', 'categories']].head(10))

# Show category summary
print("\nCategory Summary:")
category_summary = category_manager.summary()
display(category_summary)

### 6.3 Visualize Categories

In [None]:
# Category distribution by level
fig = px.bar(
    category_summary,
    x='Name',
    y='Count',
    color='Level',
    title='Category Distribution by Hierarchical Level',
    barmode='group'
)
fig.update_layout(xaxis_tickangle=-45, height=500)
fig.show()

# Pie chart of primary categories
level1_cats = category_summary[category_summary['Level'] == 1]
fig = px.pie(
    level1_cats,
    values='Count',
    names='Name',
    title='Primary Category Distribution'
)
fig.show()

## 7. Comprehensive Analysis & Reporting

In [None]:
class AnalysisReporter:
    """Generates comprehensive analysis reports."""
    
    def __init__(self, df: pd.DataFrame, code_frame: CodeFrame, 
                 theme_analyzer: ThemeAnalyzer, category_manager: CategoryManager):
        self.df = df
        self.code_frame = code_frame
        self.theme_analyzer = theme_analyzer
        self.category_manager = category_manager
    
    def generate_summary_stats(self) -> Dict:
        """Generate summary statistics."""
        stats = {
            'total_responses': len(self.df),
            'total_codes': len(self.code_frame.codes),
            'total_themes': len(self.theme_analyzer.themes),
            'total_categories': len(self.category_manager.categories),
            'avg_codes_per_response': self.df['codes'].apply(len).mean(),
            'avg_themes_per_response': self.df['themes'].apply(len).mean(),
            'avg_categories_per_response': self.df['categories'].apply(len).mean()
        }
        return stats
    
    def create_dashboard(self):
        """Create interactive dashboard."""
        from plotly.subplots import make_subplots
        
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=(
                'Code Distribution',
                'Theme Distribution',
                'Category Distribution',
                'Coverage Statistics'
            ),
            specs=[
                [{'type': 'bar'}, {'type': 'bar'}],
                [{'type': 'bar'}, {'type': 'indicator'}]
            ]
        )
        
        # Code distribution
        code_summary = self.code_frame.summary()
        fig.add_trace(
            go.Bar(x=code_summary['Label'], y=code_summary['Count'], name='Codes'),
            row=1, col=1
        )
        
        # Theme distribution
        theme_summary = self.theme_analyzer.summary()
        fig.add_trace(
            go.Bar(x=theme_summary['Name'], y=theme_summary['Frequency'], name='Themes'),
            row=1, col=2
        )
        
        # Category distribution
        category_summary = self.category_manager.summary()
        fig.add_trace(
            go.Bar(x=category_summary['Name'], y=category_summary['Count'], name='Categories'),
            row=2, col=1
        )
        
        # Coverage indicator
        stats = self.generate_summary_stats()
        fig.add_trace(
            go.Indicator(
                mode="number+delta",
                value=stats['avg_codes_per_response'],
                title={"text": "Avg Codes/Response"},
                delta={'reference': 1}
            ),
            row=2, col=2
        )
        
        fig.update_layout(height=800, showlegend=False, title_text="Open-Ended Coding Analysis Dashboard")
        return fig

# Create reporter
reporter = AnalysisReporter(df, remote_work_frame, theme_analyzer, category_manager)

# Generate summary statistics
print("\n=== Summary Statistics ===")
stats = reporter.generate_summary_stats()
for key, value in stats.items():
    print(f"{key}: {value:.2f}" if isinstance(value, float) else f"{key}: {value}")

# Display dashboard
print("\n=== Analysis Dashboard ===")
dashboard = reporter.create_dashboard()
dashboard.show()

## 8. Export Results

In [None]:
# Create output directory
output_dir = Path('output')
output_dir.mkdir(exist_ok=True)

# Export coded data
output_file = output_dir / 'coded_data.csv'
df.to_csv(output_file, index=False)
print(f"✓ Exported coded data to {output_file}")

# Export code summary
code_summary = remote_work_frame.summary()
code_summary.to_csv(output_dir / 'code_summary.csv', index=False)
print(f"✓ Exported code summary to {output_dir / 'code_summary.csv'}")

# Export theme summary
theme_summary = theme_analyzer.summary()
theme_summary.to_csv(output_dir / 'theme_summary.csv', index=False)
print(f"✓ Exported theme summary to {output_dir / 'theme_summary.csv'}")

# Export category summary
category_summary = category_manager.summary()
category_summary.to_csv(output_dir / 'category_summary.csv', index=False)
print(f"✓ Exported category summary to {output_dir / 'category_summary.csv'}")

print("\n✓ All results exported successfully!")

## 9. Next Steps

### Customization Options:
1. **Modify Code Frames**: Update the code definitions to match your research needs
2. **Refine Themes**: Adjust theme definitions and associated codes
3. **Add Categories**: Create additional hierarchical categories
4. **Load Your Data**: Replace sample data with your actual responses

### Advanced Analysis:
- Intercoder reliability testing
- Temporal analysis of themes
- Demographic comparisons
- Sentiment analysis integration
- Machine learning-assisted coding

### Quality Assurance:
- Run `make test` to execute unit tests
- Run `make lint` to check code quality
- Review coding consistency across responses