In [1]:
import pymongo
from neo4j import GraphDatabase
import pandas as pd
from datetime import datetime, timedelta
import json
from typing import List, Dict, Any
from dotenv import load_dotenv
load_dotenv()
import logging
import os

In [2]:
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [3]:
class MongoNeo4jAggregator:
    def __init__(self, mongo_uri: str, neo4j_uri: str, neo4j_user: str, neo4j_password: str):
        # MongoDB Connection
        self.mongo_client = pymongo.MongoClient(mongo_uri)
        self.mongo_db = self.mongo_client['dbcafe']
        self.transactions_collection = self.mongo_db['transactionlog']
        
        # Neo4j Connection
        self.neo4j_driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))
        
        # Product price mapping (since prices aren't in MongoDB)
        self.product_prices = {
            "C1": 25000,  # Americano
            "C2": 35000,  # Cappuccino
            "C3": 40000,  # Latte
            "C4": 20000,  # Espresso
            "C5": 30000,  # Macchiato
            "C6": 38000,  # Flat White
            "C7": 45000,  # Mocha
            "C8": 35000,  # Cold Brew
            "NC1": 42000, # Matcha Latte
            "NC2": 40000  # Chai Latte
        }
        
    def close_connections(self):
        """Close database connections"""
        self.mongo_client.close()
        self.neo4j_driver.close()

    def get_product_price(self, product_id: str) -> int:
        """Get product price with fallback"""
        return self.product_prices.get(product_id, 30000)  # Default 30k

    # ========== FIXED IDEA 1: Employee Performance Analysis ==========
    def employee_performance_analysis(self, start_date: str, end_date: str) -> Dict[str, Any]:
        logger.info("Starting employee performance analysis...")
        
        # Get transaction data from MongoDB with fixed aggregation
        mongo_pipeline = [
            {
                "$match": {
                    "transaction_date": {
                        "$gte": start_date,
                        "$lte": end_date
                    }
                }
            },
            {
                "$addFields": {
                    "calculated_revenue": {
                        "$multiply": [
                            "$order_quantity", 
                            30000  # Fixed price instead of trying to access non-existent field
                        ]
                    }
                }
            },
            {
                "$group": {
                    "_id": "$id_employee",
                    "total_transactions": {"$sum": 1},
                    "total_revenue": {"$sum": "$calculated_revenue"},
                    "avg_order_quantity": {"$avg": "$order_quantity"},
                    "franchise_id": {"$first": "$id_franchise"}
                }
            }
        ]
        
        mongo_results = list(self.transactions_collection.aggregate(mongo_pipeline))
        
        # Get employee data from Neo4j
        with self.neo4j_driver.session() as session:
            neo4j_query = """
            MATCH (e:Employee)
            RETURN e.id as employee_id, e.name as employee_name, 
                   e.work_start_hour as start_hour, e.work_end_hour as end_hour,
                   e.id_cafe as cafe_id
            """
            try:
                neo4j_results = session.run(neo4j_query).data()
            except Exception as e:
                logger.warning(f"Neo4j query failed, using empty results: {e}")
                neo4j_results = []
        
        # Combine results
        combined_results = []
        for mongo_emp in mongo_results:
            emp_id = mongo_emp['_id']
            neo4j_emp = next((e for e in neo4j_results if e['employee_id'] == emp_id), None)
            
            if neo4j_emp:
                # Parse work hours (assuming format like "07:00")
                try:
                    start_hour = int(str(neo4j_emp['start_hour']).split(':')[0])
                    end_hour = int(str(neo4j_emp['end_hour']).split(':')[0])
                    work_hours = end_hour - start_hour
                except:
                    work_hours = 8  # Default
                
                performance_score = mongo_emp['total_revenue'] / work_hours if work_hours > 0 else 0
                
                combined_results.append({
                    'employee_id': emp_id,
                    'employee_name': neo4j_emp['employee_name'],
                    'cafe_id': neo4j_emp.get('cafe_id', 'Unknown'),
                    'total_transactions': mongo_emp['total_transactions'],
                    'total_revenue': mongo_emp['total_revenue'],
                    'avg_order_quantity': mongo_emp['avg_order_quantity'],
                    'work_hours': work_hours,
                    'performance_score': performance_score,
                    'revenue_per_hour': performance_score
                })
            else:
                # Include MongoDB data even if Neo4j data is missing
                combined_results.append({
                    'employee_id': emp_id,
                    'employee_name': f'Employee {emp_id}',
                    'cafe_id': mongo_emp.get('franchise_id', 'Unknown'),
                    'total_transactions': mongo_emp['total_transactions'],
                    'total_revenue': mongo_emp['total_revenue'],
                    'avg_order_quantity': mongo_emp['avg_order_quantity'],
                    'work_hours': 8,  # Default work hours
                    'performance_score': mongo_emp['total_revenue'] / 8,
                    'revenue_per_hour': mongo_emp['total_revenue'] / 8
                })
        
        return {
            'analysis_period': f"{start_date} to {end_date}",
            'employee_performance': sorted(combined_results, key=lambda x: x['performance_score'], reverse=True)
        }

    # ========== FIXED IDEA 2: Regional Product Popularity ==========
    def regional_product_analysis(self) -> Dict[str, Any]:
        logger.info("Starting regional product analysis...")
        
        # Since location data is missing from sample, we'll analyze by franchise
        mongo_pipeline = [
            {
                "$unwind": "$product"  # Unwind the product array
            },
            {
                "$group": {
                    "_id": {
                        "franchise_id": "$id_franchise",
                        "product_id": "$product.id_product",
                        "product_name": "$product.name"
                    },
                    "total_quantity": {"$sum": "$product.quantity"},
                    "total_orders": {"$sum": 1},
                    "avg_order_size": {"$avg": "$order_quantity"}
                }
            },
            {
                "$group": {
                    "_id": "$_id.franchise_id",
                    "products": {
                        "$push": {
                            "product_id": "$_id.product_id",
                            "product_name": "$_id.product_name",
                            "total_quantity": "$total_quantity",
                            "total_orders": "$total_orders",
                            "avg_order_size": "$avg_order_size"
                        }
                    },
                    "franchise_total_orders": {"$sum": "$total_orders"}
                }
            }
        ]
        
        mongo_results = list(self.transactions_collection.aggregate(mongo_pipeline))
        
        # Get product category data from Neo4j
        with self.neo4j_driver.session() as session:
            neo4j_query = """
            MATCH (p:Product)
            RETURN p.id_product as product_id, p.name as product_name, 
                   p.category as category, p.price as price
            """
            try:
                product_data = {item['product_id']: item for item in session.run(neo4j_query).data()}
            except Exception as e:
                logger.warning(f"Failed to get product data from Neo4j: {e}")
                product_data = {}
        
        # Enhance results with product categories
        enhanced_results = []
        for franchise_data in mongo_results:
            enhanced_products = []
            for product in franchise_data['products']:
                product_info = product_data.get(product['product_id'], {})
                enhanced_product = {
                    **product,
                    'category': product_info.get('category', 'Coffee'),  # Default category
                    'price': self.get_product_price(product['product_id']),
                    'popularity_score': product['total_orders'] / franchise_data['franchise_total_orders'] if franchise_data['franchise_total_orders'] > 0 else 0
                }
                enhanced_products.append(enhanced_product)
            
            enhanced_results.append({
                'franchise_id': franchise_data['_id'],
                'total_orders': franchise_data['franchise_total_orders'],
                'products': sorted(enhanced_products, key=lambda x: x['popularity_score'], reverse=True)
            })
        
        return {
            'franchise_analysis': enhanced_results,
            'top_franchises': sorted(enhanced_results, key=lambda x: x['total_orders'], reverse=True)[:5]
        }

    # ========== FIXED IDEA 3: Franchise Growth Analysis ==========
    def franchise_growth_analysis(self, months_back: int = 12) -> Dict[str, Any]:
        logger.info("Starting franchise growth analysis...")
        
        # Calculate date range
        end_date = datetime.now()
        start_date = end_date - timedelta(days=months_back * 30)
        
        # Get monthly transaction trends from MongoDB
        mongo_pipeline = [
            {
                "$match": {
                    "transaction_date": {
                        "$gte": start_date.strftime("%Y-%m-%d"),
                        "$lte": end_date.strftime("%Y-%m-%d")
                    }
                }
            },
            {
                "$addFields": {
                    "month_year": {"$substr": ["$transaction_date", 0, 7]},  # Extract YYYY-MM
                    "calculated_revenue": {"$multiply": ["$order_quantity", 30000]}
                }
            },
            {
                "$group": {
                    "_id": {
                        "franchise_id": "$id_franchise",
                        "month": "$month_year"
                    },
                    "monthly_transactions": {"$sum": 1},
                    "monthly_revenue": {"$sum": "$calculated_revenue"}
                }
            },
            {
                "$group": {
                    "_id": "$_id.franchise_id",
                    "monthly_data": {
                        "$push": {
                            "month": "$_id.month",
                            "transactions": "$monthly_transactions",
                            "revenue": "$monthly_revenue"
                        }
                    },
                    "total_transactions": {"$sum": "$monthly_transactions"},
                    "total_revenue": {"$sum": "$monthly_revenue"}
                }
            }
        ]
        
        mongo_results = list(self.transactions_collection.aggregate(mongo_pipeline))
        
        # Get franchise data from Neo4j
        with self.neo4j_driver.session() as session:
            neo4j_query = """
            MATCH (f:Franchise)
            RETURN f.id as franchise_id, f.name as franchise_name, 
                   f.year_established as year_established
            """
            try:
                franchise_data = {item['franchise_id']: item for item in session.run(neo4j_query).data()}
            except Exception as e:
                logger.warning(f"Failed to get franchise data: {e}")
                franchise_data = {}
        
        # Calculate growth metrics
        growth_analysis = []
        for franchise_info in mongo_results:
            franchise_id = franchise_info['_id']
            neo4j_info = franchise_data.get(franchise_id, {})
            
            # Calculate growth trend
            monthly_data = sorted(franchise_info['monthly_data'], key=lambda x: x['month'])
            if len(monthly_data) >= 2:
                first_month_revenue = monthly_data[0]['revenue']
                last_month_revenue = monthly_data[-1]['revenue']
                growth_rate = ((last_month_revenue - first_month_revenue) / first_month_revenue * 100) if first_month_revenue > 0 else 0
            else:
                growth_rate = 0
            
            franchise_age = datetime.now().year - neo4j_info.get('year_established', datetime.now().year)
            
            growth_analysis.append({
                'franchise_id': franchise_id,
                'franchise_name': neo4j_info.get('franchise_name', f'Franchise {franchise_id}'),
                'franchise_age_years': franchise_age,
                'total_transactions': franchise_info['total_transactions'],
                'total_revenue': franchise_info['total_revenue'],
                'growth_rate_percent': growth_rate,
                'monthly_trends': monthly_data,
                'avg_monthly_revenue': franchise_info['total_revenue'] / len(monthly_data) if monthly_data else 0
            })
        
        return {
            'analysis_period_months': months_back,
            'franchise_growth': sorted(growth_analysis, key=lambda x: x['growth_rate_percent'], reverse=True),
            'top_performers': sorted(growth_analysis, key=lambda x: x['total_revenue'], reverse=True)[:10]
        }

    # ========== FIXED IDEA 4: Cross-Selling Opportunity Analysis ==========
    def cross_selling_analysis(self) -> Dict[str, Any]:
        logger.info("Starting cross-selling analysis...")
        
        # Get multi-product transactions
        multi_product_pipeline = [
            {
                "$match": {
                    "$expr": {"$gt": [{"$size": "$product"}, 1]}  # More than 1 product
                }
            },
            {
                "$project": {
                    "id_transaction": 1,
                    "product_ids": "$product.id_product",
                    "id_franchise": 1,
                    "calculated_revenue": {"$multiply": ["$order_quantity", 30000]}
                }
            }
        ]
        
        multi_product_transactions = list(self.transactions_collection.aggregate(multi_product_pipeline))
        
        # Get product data from Neo4j
        with self.neo4j_driver.session() as session:
            neo4j_query = """
            MATCH (p:Product)
            RETURN p.id_product as product_id, p.name as product_name, 
                   p.category as category
            """
            try:
                products = {item['product_id']: item for item in session.run(neo4j_query).data()}
            except Exception as e:
                logger.warning(f"Failed to get product data: {e}")
                products = {}
        
        # Analyze product combinations
        product_combinations = {}
        
        for transaction in multi_product_transactions:
            products_in_transaction = transaction['product_ids']
            
            # Analyze product pairs
            for i, product1 in enumerate(products_in_transaction):
                for product2 in products_in_transaction[i+1:]:
                    pair = tuple(sorted([product1, product2]))
                    product_combinations[pair] = product_combinations.get(pair, 0) + 1
        
        # Sort and format results
        top_product_pairs = sorted(product_combinations.items(), key=lambda x: x[1], reverse=True)[:20]
        
        formatted_product_pairs = []
        for (prod1, prod2), count in top_product_pairs:
            prod1_info = products.get(prod1, {})
            prod2_info = products.get(prod2, {})
            formatted_product_pairs.append({
                'product1': {'id': prod1, 'name': prod1_info.get('product_name', f'Product {prod1}')},
                'product2': {'id': prod2, 'name': prod2_info.get('product_name', f'Product {prod2}')},
                'frequency': count,
                'confidence': count / len(multi_product_transactions) if multi_product_transactions else 0
            })
        
        return {
            'analysis_summary': {
                'total_multi_product_transactions': len(multi_product_transactions),
                'unique_product_combinations': len(product_combinations)
            },
            'top_product_combinations': formatted_product_pairs
        }

    # ========== FIXED MongoDB-Only Analysis ==========
    def mongodb_only_analysis(self) -> Dict[str, Any]:
        logger.info("Running MongoDB-only analysis...")
        
        # Transaction summary
        total_transactions = self.transactions_collection.count_documents({})
        
        # Top employees by transaction count
        employee_pipeline = [
            {
                "$addFields": {
                    "calculated_revenue": {"$multiply": ["$order_quantity", 30000]}
                }
            },
            {
                "$group": {
                    "_id": "$id_employee",
                    "transaction_count": {"$sum": 1},
                    "total_revenue": {"$sum": "$calculated_revenue"},
                    "avg_order_quantity": {"$avg": "$order_quantity"}
                }
            },
            {"$sort": {"total_revenue": -1}},
            {"$limit": 10}
        ]
        
        top_employees = list(self.transactions_collection.aggregate(employee_pipeline))
        
        # Top products (unwind the array first)
        product_pipeline = [
            {"$unwind": "$product"},
            {
                "$group": {
                    "_id": "$product.id_product",
                    "product_name": {"$first": "$product.name"},
                    "total_quantity": {"$sum": "$product.quantity"},
                    "order_count": {"$sum": 1}
                }
            },
            {"$sort": {"total_quantity": -1}},
            {"$limit": 10}
        ]
        
        top_products = list(self.transactions_collection.aggregate(product_pipeline))
        
        # Franchise analysis
        franchise_pipeline = [
            {
                "$addFields": {
                    "calculated_revenue": {"$multiply": ["$order_quantity", 30000]}
                }
            },
            {
                "$group": {
                    "_id": "$id_franchise",
                    "transaction_count": {"$sum": 1},
                    "total_revenue": {"$sum": "$calculated_revenue"}
                }
            },
            {"$sort": {"total_revenue": -1}},
            {"$limit": 10}
        ]
        
        top_franchises = list(self.transactions_collection.aggregate(franchise_pipeline))
        
        return {
            'summary': {
                'total_transactions': total_transactions,
                'analysis_type': 'MongoDB Only'
            },
            'top_employees': top_employees,
            'top_products': top_products,
            'top_franchises': top_franchises
        }

    # ========== FIXED Customer Segmentation ==========
    def customer_segmentation_analysis(self) -> Dict[str, Any]:
        logger.info("Starting customer segmentation analysis...")
        
        pipeline = [
            {
                "$addFields": {
                    "calculated_revenue": {"$multiply": ["$order_quantity", 30000]}
                }
            },
            {
                "$group": {
                    "_id": "$name",  # Customer name as identifier
                    "total_spent": {"$sum": "$calculated_revenue"},
                    "transaction_count": {"$sum": 1},
                    "avg_order_value": {"$avg": "$calculated_revenue"},
                    "preferred_franchises": {"$addToSet": "$id_franchise"},
                    "last_transaction": {"$max": "$transaction_date"}
                }
            },
            {
                "$addFields": {
                    "customer_segment": {
                        "$switch": {
                            "branches": [
                                {
                                    "case": {"$and": [{"$gte": ["$total_spent", 500000]}, {"$gte": ["$transaction_count", 20]}]},
                                    "then": "VIP"
                                },
                                {
                                    "case": {"$and": [{"$gte": ["$total_spent", 200000]}, {"$gte": ["$transaction_count", 10]}]},
                                    "then": "Regular"
                                },
                                {
                                    "case": {"$and": [{"$lte": ["$total_spent", 100000]}, {"$lte": ["$transaction_count", 5]}]},
                                    "then": "Occasional"
                                }
                            ],
                            "default": "New"
                        }
                    },
                    "franchise_loyalty": {"$size": "$preferred_franchises"}
                }
            },
            {
                "$group": {
                    "_id": "$customer_segment",
                    "customer_count": {"$sum": 1},
                    "avg_total_spent": {"$avg": "$total_spent"},
                    "avg_transaction_count": {"$avg": "$transaction_count"},
                    "avg_order_value": {"$avg": "$avg_order_value"}
                }
            }
        ]
        
        segmentation_results = list(self.transactions_collection.aggregate(pipeline))
        
        return {
            'customer_segments': segmentation_results,
            'total_customers': sum(segment['customer_count'] for segment in segmentation_results)
        }

    # ========== Main Analysis Runner ==========
    def run_comprehensive_analysis(self) -> Dict[str, Any]:
        logger.info("Starting comprehensive analysis...")
        
        results = {}
        
        try:
            # Employee Performance Analysis
            try:
                results['employee_performance'] = self.employee_performance_analysis(
                    start_date="2020-01-01", 
                    end_date="2025-12-31"
                )
                logger.info("✓ Employee performance analysis completed")
            except Exception as e:
                logger.error(f"Employee performance analysis failed: {e}")
                results['employee_performance'] = {'error': str(e)}
            
            # Regional Product Analysis
            try:
                results['regional_products'] = self.regional_product_analysis()
                logger.info("✓ Regional product analysis completed")
            except Exception as e:
                logger.error(f"Regional product analysis failed: {e}")
                results['regional_products'] = {'error': str(e)}
            
            # Franchise Growth Analysis
            try:
                results['franchise_growth'] = self.franchise_growth_analysis(months_back=12)
                logger.info("✓ Franchise growth analysis completed")
            except Exception as e:
                logger.error(f"Franchise growth analysis failed: {e}")
                results['franchise_growth'] = {'error': str(e)}
            
            # Cross-selling Analysis
            try:
                results['cross_selling'] = self.cross_selling_analysis()
                logger.info("✓ Cross-selling analysis completed")
            except Exception as e:
                logger.error(f"Cross-selling analysis failed: {e}")
                results['cross_selling'] = {'error': str(e)}
            
            # Customer segmentation
            try:
                results['customer_segmentation'] = self.customer_segmentation_analysis()
                logger.info("✓ Customer segmentation completed")
            except Exception as e:
                logger.error(f"Customer segmentation failed: {e}")
                results['customer_segmentation'] = {'error': str(e)}
            
            # # MongoDB-only analysis as backup
            # try:
            #     results['mongodb_analysis'] = self.mongodb_only_analysis()
            #     logger.info("✓ MongoDB analysis completed")
            # except Exception as e:
            #     logger.error(f"MongoDB analysis failed: {e}")
            #     results['mongodb_analysis'] = {'error': str(e)}
            
            # logger.info("Comprehensive analysis completed!")
            
        except Exception as e:
            logger.error(f"Critical error during analysis: {str(e)}")
            results = {'error': 'Analysis failed', 'details': str(e)}
        
        return results

    # ========== Data Validation ==========
    def validate_data_structure(self) -> Dict[str, Any]:
        logger.info("Validating data structure...")
        
        validation_results = {
            'mongodb': {},
            'neo4j': {},
            'recommendations': []
        }
        
        # MongoDB validation
        try:
            sample_transaction = self.transactions_collection.find_one()
            if sample_transaction:
                validation_results['mongodb'] = {
                    'sample_structure': {
                        'fields': list(sample_transaction.keys()),
                        'has_product_info': 'product' in sample_transaction,
                        'product_is_array': isinstance(sample_transaction.get('product'), list),
                        'has_employee_info': 'id_employee' in sample_transaction,
                        'has_franchise_info': 'id_franchise' in sample_transaction
                    },
                    'total_documents': self.transactions_collection.count_documents({}),
                    'sample_product_structure': sample_transaction.get('product', [])[:2] if sample_transaction.get('product') else []
                }
            else:
                validation_results['mongodb']['error'] = 'No documents found'
        except Exception as e:
            validation_results['mongodb']['error'] = str(e)
        
        # Neo4j validation
        try:
            with self.neo4j_driver.session() as session:
                node_counts = {}
                for label in ['Employee', 'Product', 'Franchise']:
                    try:
                        result = session.run(f"MATCH (n:{label}) RETURN count(n) as count")
                        node_counts[label] = result.single()['count']
                    except Exception as e:
                        node_counts[label] = f"Error: {e}"
                
                validation_results['neo4j']['node_counts'] = node_counts
                
        except Exception as e:
            validation_results['neo4j']['error'] = str(e)
        
        return validation_results

In [4]:
def main():
    # Database connection parameters
    MONGO_URI = "mongodb://localhost:27017/"
    URI = os.getenv("NEO4J_URI")
    USERNAME = os.getenv("NEO4J_USERNAME")
    PASSWORD = os.getenv("NEO4J_PASSWORD")
    
    # Initialize aggregator
    aggregator = MongoNeo4jAggregator(
        mongo_uri=MONGO_URI,
        neo4j_uri=URI,
        neo4j_user=USERNAME,
        neo4j_password=PASSWORD
    )
    
    try:
        # Validate data structure first
        print("=== DATA VALIDATION ===")
        validation = aggregator.validate_data_structure()
        print(json.dumps(validation, indent=2, default=str))
        
        # Run comprehensive analysis
        print("\n=== RUNNING COMPREHENSIVE ANALYSIS ===")
        results = aggregator.run_comprehensive_analysis()
        
        # # Save results to JSON file
        # output_file = f'fixed_analysis_results_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json'
        # with open(output_file, 'w') as f:
        #     json.dump(results, f, indent=2, default=str)
        
        # print(f"\nResults saved to: {output_file}")
        
        # Print summary
        print("\n=== ANALYSIS SUMMARY ===")
        
        for analysis_type, data in results.items():
            if 'error' not in data:
                print(f"✓ {analysis_type.replace('_', ' ').title()}: Success")
                
                # Print specific metrics for each analysis
                if analysis_type == 'employee_performance':
                    emp_count = len(data.get('employee_performance', []))
                    print(f"  - {emp_count} employees analyzed")
                    if emp_count > 0:
                        top_performer = data['employee_performance'][0]
                        print(f"  - Top performer: {top_performer['employee_name']} (Rp{top_performer['revenue_per_hour']:,.2f}/day)")
                
                elif analysis_type == 'regional_products':
                    franchise_count = len(data.get('franchise_analysis', []))
                    print(f"  - {franchise_count} franchises analyzed")
                
                elif analysis_type == 'customer_segmentation':
                    total_customers = data.get('total_customers', 0)
                    print(f"  - {total_customers} customers segmented")
                    for segment in data.get('customer_segments', []):
                        print(f"    • {segment['_id']}: {segment['customer_count']} customers")
                
                # elif analysis_type == 'mongodb_analysis':
                #     total_trans = data['summary'].get('total_transactions', 0)
                #     print(f"  - {total_trans:,} total transactions processed")
                
            else:
                print(f"✗ {analysis_type.replace('_', ' ').title()}: {data['error']}")
        
        # print(f"\n=== DETAILED RESULTS SAVED TO: {output_file} ===")
        
    except Exception as e:
        logger.error(f"Main execution failed: {str(e)}")
        print(f"Analysis failed with error: {e}")
    
    finally:
        # Close connections
        aggregator.close_connections()
        print("Database connections closed.")

if __name__ == "__main__":
    main()

INFO:__main__:Validating data structure...


=== DATA VALIDATION ===


INFO:__main__:Starting comprehensive analysis...
INFO:__main__:Starting employee performance analysis...


{
  "mongodb": {
    "sample_structure": {
      "fields": [
        "_id",
        "id_transaction",
        "id_franchise",
        "id_employee",
        "product",
        "name",
        "transaction_date",
        "order_quantity"
      ],
      "has_product_info": true,
      "product_is_array": true,
      "has_employee_info": true,
      "has_franchise_info": true
    },
    "total_documents": 10000,
    "sample_product_structure": [
      {
        "id_product": "C3",
        "name": "Latte",
        "quantity": 3
      }
    ]
  },
  "neo4j": {
    "node_counts": {
      "Employee": 50,
      "Product": 10,
      "Franchise": 10
    }
  },
  "recommendations": []
}

=== RUNNING COMPREHENSIVE ANALYSIS ===


INFO:__main__:✓ Employee performance analysis completed
INFO:__main__:Starting regional product analysis...
INFO:__main__:✓ Regional product analysis completed
INFO:__main__:Starting franchise growth analysis...
INFO:__main__:✓ Franchise growth analysis completed
INFO:__main__:Starting cross-selling analysis...
INFO:__main__:✓ Cross-selling analysis completed
INFO:__main__:Starting customer segmentation analysis...
INFO:__main__:✓ Customer segmentation completed



=== ANALYSIS SUMMARY ===
✓ Employee Performance: Success
  - 50 employees analyzed
  - Top performer: Employee 16 (Rp5,047,500.00/day)
✓ Regional Products: Success
  - 10 franchises analyzed
✓ Franchise Growth: Success
✓ Cross Selling: Success
✓ Customer Segmentation: Success
  - 9357 customers segmented
    • Occasional: 2193 customers
    • New: 7164 customers
Database connections closed.
