In [1]:
import pandas as pd
import numpy as np
from faker import Faker
import random

# Set up Faker
fake = Faker('en_GB')

# Define lists for additional randomization
industries = [
    'Finance', 'Tech', 'Food & Beverage',
    'Retail', 'Travel'
]

international_regions = [
    'Europe', 'North America', 'Asia Pacific',
    'Middle East', 'Developed Markets'
]

def generate_sales_data(num_entries=200):
    data = []

    for _ in range(num_entries):
        # Company details
        company_name = fake.company()
        hq_location = random.choice([
            'London', 'Manchester', 'Edinburgh',
            'Birmingham', 'Bristol', 'Leeds'
        ])
        industry = random.choice(industries)

        # Financial details
        annual_revenue = round(np.random.uniform(5, 500), 2)
        company_size = random.choice([
            '50-100 employees',
            '100-250 employees',
            '250-500 employees',
            '500-1000 employees'
        ])
        international_exposure = random.choice(international_regions)

        # FX details
        fx_volume = round(np.random.uniform(1, 50), 2)
        lead_score = random.randint(50, 95)

        # Contact details
        first_name = fake.first_name()
        last_name = fake.last_name()
        contact_name = f"{first_name} {last_name}"
        email = f"{first_name.lower()}.{last_name.lower()}@{company_name.replace(' ', '').lower()}.co.uk"

        # Customize contact positions based on industry
        contact_positions = {
            'Finance': ['CFO', 'Financial Director', 'Head of Treasury', 'Finance Manager'],
            'Tech': ['CTO', 'Head of Operations', 'Chief Innovation Officer', 'IT Director'],
            'Food & Beverage': ['Operations Director', 'Supply Chain Manager', 'Procurement Head', 'CEO'],
            'Retail': ['Retail Operations Director', 'Procurement Manager', 'Supply Chain Director', 'Commercial Director'],
            'Travel': ['Operations Director', 'Chief Commercial Officer', 'Head of International Operations', 'Strategy Director']
        }

        contact_position = random.choice(contact_positions[industry])
        phone_number = fake.phone_number()

        # Create entry
        entry = {
            'Company Name': company_name,
            'Company HQ': hq_location,
            'Industry': industry,
            'Annual Revenue (£M)': annual_revenue,
            'Company Size': company_size,
            'International Exposure': international_exposure,
            'Current FX Volume (£M)': fx_volume,
            'Potential Lead Score': lead_score,
            'Contact Name': contact_name,
            'Contact Email': email,
            'Contact Position': contact_position,
            'Phone Number': phone_number
        }

        data.append(entry)

    # Convert to DataFrame
    df = pd.DataFrame(data)
    return df

# Generate and save the dataset
sales_data = generate_sales_data()

# Save to CSV (optional)
sales_data.to_csv('fx_sales_leads.csv', index=False)

# Display first few rows
print(sales_data.head())

# Optional: Show industry distribution
print("\nIndustry Distribution:")
print(sales_data['Industry'].value_counts())

ModuleNotFoundError: No module named 'faker'

In [None]:
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta

# Set up the Faker instance
fake = Faker('en_GB')

def generate_fx_sales_data(num_entries=200):
    data = []

    # Define lists for consistent data generation
    uk_cities = [
        'London', 'Manchester', 'Birmingham', 'Edinburgh',
        'Bristol', 'Leeds', 'Glasgow', 'Cambridge', 'Oxford'
    ]

    industries = [
        'Manufacturing', 'Technology', 'Retail',
        'Professional Services', 'Healthcare',
        'Consumer Goods', 'Energy', 'Media'
    ]

    overseas_markets = [
        'European Union', 'North America', 'Asia Pacific',
        'Western Europe', 'Nordics', 'Australia/NZ'
    ]

    currencies_traded = ['EUR', 'USD', 'JPY', 'CHF', 'AUD', 'CAD', 'SEK', 'NOK']

    for _ in range(num_entries):
        # Company Details
        company_name = fake.company()
        hq_location = random.choice(uk_cities)
        industry = random.choice(industries)
        year_established = random.randint(1950, 2015)

        # Financial Metrics
        annual_revenue = round(random.uniform(10, 500), 2)  # In millions GBP
        market_cap = round(annual_revenue * random.uniform(1.5, 4), 2)  # In millions GBP
        employees = random.choice([50, 100, 250, 500, 1000, 2000, 5000])

        # FX Trading Profile
        monthly_fx_volume = round(random.uniform(1, 50), 2)  # In millions GBP
        primary_currency_pair = random.choice(currencies_traded) + '/GBP'
        num_currency_pairs_traded = random.randint(1, 6)

        # International Exposure
        primary_overseas_market = random.choice(overseas_markets)
        num_overseas_subsidiaries = random.randint(1, 8)
        export_revenue_percentage = random.randint(10, 70)

        # Risk Metrics
        credit_rating = random.choice(['AAA', 'AA+', 'AA', 'AA-', 'A+', 'A', 'A-', 'BBB+'])
        risk_score = random.randint(65, 95)  # Higher is better

        # Sales Pipeline Data
        last_transaction_date = fake.date_between(
            start_date='-6M',
            end_date='today'
        )
        potential_annual_revenue = round(monthly_fx_volume * 12 * random.uniform(0.002, 0.004), 2)
        lead_score = random.randint(1, 100)

        # Contact Information
        contact_name = fake.name()
        contact_position = random.choice([
            'Treasury Manager', 'CFO', 'Finance Director',
            'Head of Finance', 'Financial Controller'
        ])
        contact_email = f"{contact_name.lower().replace(' ', '.')}@{company_name.lower().replace(' ', '')}.co.uk"
        contact_phone = fake.phone_number()

        # Create entry
        entry = {
            'Company Name': company_name,
            'HQ Location': hq_location,
            'Industry': industry,
            'Year Established': year_established,
            'Annual Revenue (£M)': annual_revenue,
            'Market Cap (£M)': market_cap,
            'Employees': employees,
            'Monthly FX Volume (£M)': monthly_fx_volume,
            'Primary Currency Pair': primary_currency_pair,
            'Number of Currency Pairs': num_currency_pairs_traded,
            'Primary Overseas Market': primary_overseas_market,
            'Number of Overseas Subsidiaries': num_overseas_subsidiaries,
            'Export Revenue %': export_revenue_percentage,
            'Credit Rating': credit_rating,
            'Risk Score': risk_score,
            'Last Transaction Date': last_transaction_date,
            'Potential Annual Revenue (£M)': potential_annual_revenue,
            'Lead Score': lead_score,
            'Contact Name': contact_name,
            'Contact Position': contact_position,
            'Contact Email': contact_email,
            'Contact Phone': contact_phone
        }

        data.append(entry)

    return pd.DataFrame(data)

# Generate the data
fx_sales_data = generate_fx_sales_data()

# Display the first few rows and basic statistics
print("\nFirst few rows of the dataset:")
print(fx_sales_data.head())

print("\nDataset Info:")
print(fx_sales_data.info())

print("\nBasic Statistics:")
print(fx_sales_data.describe())

# Save to CSV (optional)
fx_sales_data.to_csv('fx_sales_data.csv', index=False)

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def prepare_data_for_classification(df):
    """Prepare data for classification"""
    # Select features
    features = [
        'Annual Revenue (£M)',
        'Monthly FX Volume (£M)',
        'Export Revenue %',
        'Lead Score',
        'Number of Overseas Subsidiaries'
    ]

    # Create target variable (1 if FX volume is between 3-40M annually)
    df['Target'] = ((df['Monthly FX Volume (£M)'] * 12 >= 3) &
                   (df['Monthly FX Volume (£M)'] * 12 <= 40)).astype(int)

    X = df[features]
    y = df['Target']

    return X, y, features

def train_random_forest(X, y):
    """Train Random Forest model"""
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train model
    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X_train_scaled, y_train)

    # Make predictions
    y_pred = rf.predict(X_test_scaled)

    return rf, scaler, X_train_scaled, X_test_scaled, y_test, y_pred

def calculate_lead_scores(model, X):
    """Calculate lead scores using model probabilities"""
    return model.predict_proba(X)[:, 1]

def create_interactive_visualizations(df, lead_scores):
    # Add suitability scores to dataframe
    df['Suitability_Score'] = lead_scores * 100

    # Create main scatter plot
    fig1 = px.scatter(
        df,
        x='Annual Revenue (£M)',
        y='Monthly FX Volume (£M)',
        color='Suitability_Score',
        size='Lead Score',
        hover_data=[
            'Company Name',
            'Industry',
            'Suitability_Score',
            'Number of Overseas Subsidiaries',
            'Export Revenue %'
        ],
        color_continuous_scale='viridis',
        title='Prospect Suitability Analysis'
    )

    # Update layout
    fig1.update_layout(
        height=800,
        width=1200,
        template='plotly_white',
        hoverlabel=dict(
            bgcolor="white",
            font_size=12,
            font_family="Arial"
        )
    )

    # Add target zone indicators
    fig1.add_hline(y=3/12, line_dash="dash", line_color="red",
                   annotation_text="Min Target Volume")
    fig1.add_hline(y=40/12, line_dash="dash", line_color="red",
                   annotation_text="Max Target Volume")

    fig1.show()

    # Create industry analysis
    fig2 = px.scatter(
        df,
        x='Export Revenue %',
        y='Suitability_Score',
        size='Monthly FX Volume (£M)',
        color='Industry',
        hover_data=[
            'Company Name',
            'Monthly FX Volume (£M)',
            'Number of Overseas Subsidiaries',
            'Lead Score'
        ],
        title='Industry-wise Prospect Analysis'
    )

    fig2.update_layout(
        height=800,
        width=1200,
        template='plotly_white'
    )

    fig2.show()

    # Create 3D analysis
    fig3 = px.scatter_3d(
        df,
        x='Annual Revenue (£M)',
        y='Export Revenue %',
        z='Monthly FX Volume (£M)',
        color='Industry',
        size='Suitability_Score',
        hover_data=[
            'Company Name',
            'Suitability_Score',
            'Lead Score'
        ],
        title='3D Prospect Analysis'
    )

    fig3.update_layout(
        height=800,
        width=1200,
        scene=dict(
            xaxis_title='Annual Revenue (£M)',
            yaxis_title='Export Revenue %',
            zaxis_title='Monthly FX Volume (£M)'
        )
    )

    fig3.show()

    # Create summary statistics
    summary_stats = df.groupby('Industry').agg({
        'Suitability_Score': ['mean', 'count'],
        'Monthly FX Volume (£M)': 'mean',
        'Export Revenue %': 'mean'
    }).round(2)

    print("\nIndustry Summary Statistics:")
    print(summary_stats)

    # Save plots (optional)
    fig1.write_html("suitability_analysis.html")
    fig2.write_html("industry_analysis.html")
    fig3.write_html("3d_analysis.html")

def generate_fx_sales_data(n_samples=1000):
    """Generate synthetic FX sales data"""
    np.random.seed(42)

    data = []
    industries = ['Food & Beverage', 'Travel', 'Tech', 'Engineering']

    for i in range(n_samples):
        record = {
            'Company Name': f'Company_{i}',
            'Industry': np.random.choice(industries),
            'Annual Revenue (£M)': np.random.uniform(10, 500),
            'Monthly FX Volume (£M)': np.random.uniform(0.1, 5),
            'Export Revenue %': np.random.uniform(0, 100),
            'Lead Score': np.random.uniform(0, 100),
            'Number of Overseas Subsidiaries': np.random.randint(0, 20)
        }
        data.append(record)

    return pd.DataFrame(data)

def main():
    # Generate sample data
    fx_sales_data = generate_fx_sales_data()

    # Prepare data
    X, y, features = prepare_data_for_classification(fx_sales_data)

    # Train model
    rf, scaler, X_train_scaled, X_test_scaled, y_test, y_pred = train_random_forest(X, y)

    # Calculate lead scores
    lead_scores = calculate_lead_scores(rf, scaler.transform(X))

    # Create visualizations
    create_interactive_visualizations(fx_sales_data, lead_scores)

    # Print results
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    # Display top prospects
    print("\nTop 10 Most Suitable Prospects:")
    fx_sales_data['Suitability_Score'] = lead_scores * 100
    top_prospects = fx_sales_data.sort_values('Suitability_Score', ascending=False).head(10)
    print(top_prospects[['Company Name', 'Industry', 'Monthly FX Volume (£M)', 'Suitability_Score']])

if __name__ == "__main__":
    main()

In [None]:
def generate_fx_customer_data(n_samples=1000):
    """Generate synthetic FX customer data"""
    try:
        np.random.seed(42)

        # Define industry distribution
        industries = ['Food & Beverage', 'Travel', 'Tech', 'Engineering']
        industry_weights = [0.3, 0.2, 0.3, 0.2]  # Weights for each industry

        data = []
        for i in range(n_samples):
            # Generate base metrics
            industry = np.random.choice(industries, p=industry_weights)

            # Adjust metrics based on industry
            if industry == 'Tech':
                revenue_range = (50, 500)
                volume_range = (0.5, 8)
                export_range = (30, 90)
            elif industry == 'Food & Beverage':
                revenue_range = (20, 300)
                volume_range = (0.2, 5)
                export_range = (10, 60)
            elif industry == 'Travel':
                revenue_range = (10, 200)
                volume_range = (0.1, 3)
                export_range = (20, 80)
            else:  # Engineering
                revenue_range = (30, 400)
                volume_range = (0.3, 6)
                export_range = (40, 90)

            record = {
                'Company Name': f'Company_{i:04d}',
                'Industry': industry,
                'Annual Revenue (£M)': np.random.uniform(*revenue_range),
                'Monthly FX Volume (£M)': np.random.uniform(*volume_range),
                'Export Revenue %': np.random.uniform(*export_range),
                'Lead Score': np.random.uniform(0, 100),
                'Number of Overseas Subsidiaries': np.random.randint(0, 20),
                'Years in Business': np.random.randint(1, 50),
                'Number of Employees': np.random.randint(50, 5000),
                'Credit Rating': np.random.choice(['AAA', 'AA', 'A', 'BBB+', 'BBB']),
                'Geographic Presence': np.random.randint(1, 10),
                'Product Complexity': np.random.choice(['Low', 'Medium', 'High']),
                'Digital Maturity': np.random.choice(['Low', 'Medium', 'High']),
                'Customer Base': np.random.randint(10, 1000)
            }

            data.append(record)

        df = pd.DataFrame(data)

        # Add some derived metrics
        df['Revenue per Employee'] = df['Annual Revenue (£M)'] * 1e6 / df['Number of Employees']
        df['FX Volume per Revenue'] = df['Monthly FX Volume (£M)'] / df['Annual Revenue (£M)']
        df['International Exposure Score'] = (
            df['Export Revenue %'] * 0.4 +
            df['Number of Overseas Subsidiaries'] * 5 * 0.3 +
            df['Geographic Presence'] * 10 * 0.3
        )

        print(f"Generated {len(df)} records with {len(df.columns)} features")
        return df

    except Exception as e:
        print(f"Error in data generation: {str(e)}")
        return None

def main():
    try:
        # Generate sample data
        print("Generating sample data...")
        fx_sales_data = generate_fx_customer_data(n_samples=1000)

        if fx_sales_data is None or fx_sales_data.empty:
            raise ValueError("Failed to generate sample data")

        print("\nData Sample:")
        print(fx_sales_data.head())
        print("\nData Summary:")
        print(fx_sales_data.describe())

        # Prepare data
        print("\nPreparing data for classification...")
        X, y, features = prepare_data_for_classification(fx_sales_data)

        if X is None or y is None:
            raise ValueError("Failed to prepare data for classification")

        # Train model
        print("Training random forest model...")
        rf, scaler, X_train_scaled, X_test_scaled, y_test, y_pred = train_random_forest(X, y)

        if rf is None:
            raise ValueError("Failed to train random forest model")

        # Calculate lead scores
        print("Calculating lead scores...")
        lead_scores = calculate_lead_scores(rf, scaler.transform(X))

        if lead_scores is None:
            raise ValueError("Failed to calculate lead scores")

        # Store lead scores
        fx_sales_data['Suitability_Score'] = lead_scores * 100

        # Perform advanced analysis
        print("Performing advanced analysis...")
        feature_importance = analyze_feature_importance(rf, fx_sales_data)
        segments = perform_customer_segmentation(fx_sales_data)

        # Print results
        print("\nClassification Report:")
        print(classification_report(y_test, y_pred))

        if feature_importance is not None:
            print("\nTop 5 Important Features:")
            print(feature_importance['importance'].head())

        if segments is not None:
            print("\nSegment Profiles:")
            print(segments['profiles'])

        # Display top prospects
        print("\nTop 10 Most Suitable Prospects:")
        top_prospects = fx_sales_data.sort_values('Suitability_Score', ascending=False).head(10)
        print(top_prospects[['Company Name', 'Industry', 'Monthly FX Volume (£M)', 'Suitability_Score']])

        return fx_sales_data, rf, feature_importance, segments

    except Exception as e:
        print(f"Error in main execution: {str(e)}")
        return None, None, None, None

if __name__ == "__main__":
    try:
        data, model, importance, segments = main()

        if all(v is not None for v in [data, model, importance, segments]):
            print("\nAnalysis completed successfully!")
        else:
            print("\nAnalysis completed with some failures.")

    except Exception as e:
        print(f"Script failed: {str(e)}")
        import traceback
        traceback.print_exc()
        sys.exit(1)

In [None]:
def generate_fx_customer_data(n_samples=1000):
    """Generate synthetic FX customer data"""
    try:
        np.random.seed(42)
        fake = Faker()

        data = []
        for i in range(n_samples):
            record = {
                'customer_id': f'CUST_{i:04d}',
                'date': fake.date_between(start_date='-2y', end_date='today'),
                'industry': np.random.choice(['Finance', 'Tech', 'Food & Beverage', 'Retail', 'Travel']),
                'company_size': np.random.choice(['Small', 'Medium', 'Large', 'Enterprise']),
                'monthly_volume': np.random.uniform(0.1, 100),
                'spot_ratio': np.random.uniform(0, 1),
                'forward_ratio': np.random.uniform(0, 1),
                'login_frequency': np.random.randint(1, 100),
                'quote_requests': np.random.randint(0, 50),
                'support_tickets': np.random.randint(0, 20),
                'meeting_count': np.random.randint(0, 10),
                'platform_usage_hours': np.random.uniform(1, 200),
                'credit_score': np.random.uniform(300, 850),
                'risk_score': np.random.uniform(1, 100),
                'engagement_score': np.random.uniform(0, 100),
                'volume_change': np.random.uniform(-0.5, 0.5),
                'engagement_change': np.random.uniform(-0.3, 0.3),
                'annual_revenue': np.random.uniform(1e6, 1e9),
                'employees': np.random.randint(10, 10000),
                'response_time_hours': np.random.uniform(0, 72),
                'churned': np.random.choice([0, 1], p=[0.8, 0.2])  # Using numpy's random choice
            }

            # Add industry-specific features
            if record['industry'] == 'Food & Beverage':
                record.update({
                    'seasonal_impact': np.random.uniform(0, 1),
                    'commodity_exposure': np.random.uniform(0.3, 0.8),
                    'supplier_countries': np.random.randint(2, 10)
                })
            elif record['industry'] == 'Tech':
                record.update({
                    'growth_rate': np.random.uniform(0.1, 0.4),
                    'dev_center_countries': np.random.randint(1, 5),
                    'subscription_revenue_ratio': np.random.uniform(0.4, 0.9)
                })
            elif record['industry'] == 'Retail':
                record.update({
                    'peak_season_volume': np.random.uniform(1.2, 2.0),
                    'currency_pairs_count': np.random.randint(3, 10),
                    'booking_volatility': np.random.uniform(0.1, 0.4)
                })
            else:  # Travel or Finance
                record.update({
                    'project_count': np.random.randint(1, 15),
                    'equipment_import_ratio': np.random.uniform(0.2, 0.7),
                    'contract_duration_months': np.random.randint(6, 36)
                })

            data.append(record)

        df = pd.DataFrame(data)
        print(f"Generated {len(df)} records with {len(df.columns)} features")
        return df

    except Exception as e:
        print(f"Error in data generation: {str(e)}")
        return None

In [None]:
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import Lasso
from sklearn.feature_selection import SelectKBest, f_classif
import logging
import sys
import traceback

# Set up logging
log_filename = f'churn_model_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_filename),
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger(__name__)

def generate_fx_customer_data(n_samples=1000):
    """Generate synthetic FX customer data with churn indicators"""
    try:
        logger.info(f"Starting data generation for {n_samples} samples")
        np.random.seed(42)
        start_date = datetime(2020, 1, 1)

        # Basic company profiles
        industries = ['Food & Beverage', 'Travel', 'Tech', 'Engineering']
        company_sizes = ['Small', 'Medium', 'Large', 'Enterprise']

        data = []

        for i in range(n_samples):
            # Generate base customer data
            customer_data = {
                'customer_id': f'CUST_{i:04d}',
                'industry': np.random.choice(industries),
                'company_size': np.random.choice(company_sizes),
                'date': start_date + timedelta(days=np.random.randint(0, 365*2)),
                'annual_revenue': np.random.uniform(1e6, 1e9),
                'employees': np.random.randint(50, 10000)
            }

            # Add transaction metrics
            customer_data.update({
                'monthly_volume': np.random.uniform(1e5, 1e7),
                'spot_ratio': np.random.uniform(0.3, 0.7),
                'forward_ratio': np.random.uniform(0.3, 0.7),
                'avg_transaction_size': np.random.uniform(1e4, 1e6),
                'transaction_frequency': np.random.randint(5, 100)
            })

            # Add engagement metrics
            customer_data.update({
                'login_frequency': np.random.randint(1, 30),
                'support_tickets': np.random.randint(0, 10),
                'meeting_count': np.random.randint(0, 5),
                'platform_usage_hours': np.random.uniform(1, 100),
                'response_time_hours': np.random.uniform(1, 48)
            })

            # Add risk metrics
            customer_data.update({
                'credit_score': np.random.uniform(300, 850),
                'risk_score': np.random.uniform(1, 100),
                'payment_delays': np.random.randint(0, 5),
                'failed_transactions': np.random.randint(0, 3)
            })

            # Calculate churn probability and status
            churn_factors = [
                customer_data['payment_delays'] > 2,
                customer_data['failed_transactions'] > 1,
                customer_data['login_frequency'] < 5,
                customer_data['support_tickets'] > 5
            ]

            churn_probability = sum(churn_factors) / len(churn_factors)
            customer_data['churned'] = 1 if np.random.random() < churn_probability else 0

            data.append(customer_data)

        # Convert to DataFrame
        df = pd.DataFrame(data)

        # Validate generated data
        if df.empty:
            raise ValueError("Generated DataFrame is empty")

        logger.info(f"Successfully generated {len(df)} records")
        logger.info(f"Churn rate: {df['churned'].mean():.2%}")

        return df

    except Exception as e:
        logger.error(f"Error in data generation: {str(e)}")
        logger.error(f"Traceback: {traceback.format_exc()}")
        raise

def validate_dataset(df):
    """Validate the generated dataset"""
    try:
        # Check for minimum size
        if len(df) < 100:
            raise ValueError("Dataset too small: minimum 100 records required")

        # Check for required columns
        required_columns = [
            'customer_id', 'industry', 'company_size', 'monthly_volume',
            'login_frequency', 'churned'
        ]
        missing_columns = [col for col in required_columns if col not in df.columns]
        if missing_columns:
            raise ValueError(f"Missing required columns: {missing_columns}")

        # Check for data types
        if not pd.api.types.is_numeric_dtype(df['monthly_volume']):
            raise ValueError("monthly_volume must be numeric")

        # Check for missing values
        missing_values = df.isnull().sum()
        if missing_values.any():
            logger.warning(f"Missing values found:\n{missing_values[missing_values > 0]}")

        # Check class balance
        churn_rate = df['churned'].mean()
        if churn_rate < 0.05 or churn_rate > 0.95:
            logger.warning(f"Severe class imbalance detected: {churn_rate:.2%} churn rate")

        logger.info("Dataset validation completed successfully")
        return True

    except Exception as e:
        logger.error(f"Dataset validation failed: {str(e)}")
        return False

def main():
    try:
        # Generate data
        logger.info("Starting data generation process...")
        df = generate_fx_customer_data(n_samples=1000)

        # Validate dataset
        if not validate_dataset(df):
            raise ValueError("Dataset validation failed")

        # Continue with the rest of your processing...
        logger.info("Data generation and validation completed successfully")
        return df

    except Exception as e:
        logger.error(f"Error in main execution: {str(e)}")
        logger.error(f"Traceback: {traceback.format_exc()}")
        return None

if __name__ == "__main__":
    try:
        result = main()
        if result is None:
            logger.error("Script failed to complete successfully")
            sys.exit(1)
        else:
            logger.info(f"Script completed successfully. Generated {len(result)} records")
            # Save the data (optional)
            result.to_csv('fx_customer_data.csv', index=False)
    except Exception as e:
        logger.error(f"Unexpected error in script execution: {str(e)}")
        logger.error(f"Traceback: {traceback.format_exc()}")
        sys.exit(1)

In [None]:
class ChurnModelPipeline:
    def __init__(self, df):
        self.df = df
        self.scaler = StandardScaler()
        self.pca = PCA(n_components=0.95)  # Explain 95% of variance
        self.feature_importance = None
        self.best_model = None
        self.feature_columns = None  # Add this line
        logger.info("ChurnModelPipeline initialized")

    def engineer_features(self):
        """Create advanced features for the model"""
        try:
            logger.info("Starting feature engineering...")

            # Create time-based features
            self.df['days_since_first_transaction'] = (
                self.df.groupby('customer_id')['date']
                .transform('max') - self.df.groupby('customer_id')['date']
                .transform('min')
            ).dt.days

            # Create ratio features
            self.df['volume_per_login'] = self.df['monthly_volume'] / self.df['login_frequency'].clip(lower=1)
            self.df['tickets_per_volume'] = self.df['support_tickets'] / self.df['monthly_volume'].clip(lower=1)
            self.df['revenue_per_employee'] = self.df['annual_revenue'] / self.df['employees'].clip(lower=1)

            # Create interaction features
            self.df['risk_volume_interaction'] = self.df['risk_score'] * self.df['monthly_volume']
            self.df['engagement_score'] = (
                self.df['login_frequency'] * 0.3 +
                self.df['platform_usage_hours'] * 0.3 +
                (1 - self.df['response_time_hours']/48) * 0.2 +
                (1 - self.df['support_tickets']/10) * 0.2
            ) * 100

            # Create categorical encodings
            categorical_columns = ['industry', 'company_size']
            for col in categorical_columns:
                dummies = pd.get_dummies(self.df[col], prefix=col)
                self.df = pd.concat([self.df, dummies], axis=1)

            logger.info("Feature engineering completed successfully")
            return self.df

        except Exception as e:
            logger.error(f"Error in feature engineering: {str(e)}")
            raise

    def prepare_features(self):
        """Prepare features for modeling"""
        try:
            logger.info("Preparing features for modeling...")

            # Select features for modeling
            self.feature_columns = [  # Store as instance variable
                'monthly_volume', 'spot_ratio', 'forward_ratio',
                'login_frequency', 'support_tickets', 'platform_usage_hours',
                'credit_score', 'risk_score', 'volume_per_login',
                'tickets_per_volume', 'revenue_per_employee',
                'risk_volume_interaction', 'engagement_score',
                'days_since_first_transaction'
            ]

            # Add dummy columns
            dummy_columns = [col for col in self.df.columns if 'industry_' in col or 'company_size_' in col]
            self.feature_columns.extend(dummy_columns)

            # Prepare X and y
            X = self.df[self.feature_columns]
            y = self.df['churned']

            # Scale features
            X_scaled = self.scaler.fit_transform(X)

            logger.info(f"Prepared {len(self.feature_columns)} features for modeling")
            return X_scaled, y, self.feature_columns

        except Exception as e:
            logger.error(f"Error in feature preparation: {str(e)}")
            raise

    # [Rest of the methods remain the same until visualize_results]

    def visualize_results(self, results, X_test, y_test):
        """Create visualizations of model performance"""
        try:
            if self.feature_columns is None:
                raise ValueError("Feature columns not set. Run prepare_features first.")

            # Create figure with subplots
            plt.figure(figsize=(20, 10))

            # 1. ROC Curves
            plt.subplot(2, 2, 1)
            for name, result in results.items():
                fpr, tpr, _ = roc_curve(y_test, result['predictions'])
                plt.plot(fpr, tpr, label=f"{name} (AUC = {result['auc_roc']:.3f})")

            plt.plot([0, 1], [0, 1], 'k--')
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('ROC Curves')
            plt.legend()

            # 2. Precision-Recall Curves
            plt.subplot(2, 2, 2)
            for name, result in results.items():
                precision, recall, _ = precision_recall_curve(y_test, result['predictions'])
                plt.plot(recall, precision, label=f"{name} (AP = {result['avg_precision']:.3f})")

            plt.xlabel('Recall')
            plt.ylabel('Precision')
            plt.title('Precision-Recall Curves')
            plt.legend()

            # 3. Feature Importance (for best model)
            if hasattr(self.best_model, 'feature_importances_'):
                plt.subplot(2, 2, 3)
                importances = pd.Series(
                    self.best_model.feature_importances_,
                    index=self.feature_columns
                ).sort_values(ascending=False)[:10]

                sns.barplot(x=importances.values, y=importances.index)
                plt.title('Top 10 Feature Importance')

            # 4. Confusion Matrix
            plt.subplot(2, 2, 4)
            cm = confusion_matrix(y_test, self.best_model.predict(X_test))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
            plt.title('Confusion Matrix')

            plt.tight_layout()
            plt.show()

        except Exception as e:
            logger.error(f"Error in visualization: {str(e)}")
            raise

In [None]:
class ChurnModelPipeline:
    def __init__(self, df):
        self.df = df
        self.scaler = StandardScaler()
        self.pca = PCA(n_components=0.95)
        self.feature_importance = None
        self.best_model = None
        self.feature_columns = []  # Initialize as empty list
        self.X_test = None
        self.y_test = None
        logger.info("ChurnModelPipeline initialized")

    def prepare_features(self):
        """Prepare features for modeling"""
        try:
            logger.info("Preparing features for modeling...")

            # Select features for modeling
            self.feature_columns = [
                'monthly_volume', 'spot_ratio', 'forward_ratio',
                'login_frequency', 'support_tickets', 'platform_usage_hours',
                'credit_score', 'risk_score', 'volume_per_login',
                'tickets_per_volume', 'revenue_per_employee',
                'risk_volume_interaction', 'engagement_score',
                'days_since_first_transaction'
            ]

            # Verify all columns exist in dataframe
            missing_columns = [col for col in self.feature_columns if col not in self.df.columns]
            if missing_columns:
                raise ValueError(f"Missing columns in dataframe: {missing_columns}")

            # Add dummy columns
            dummy_columns = [col for col in self.df.columns if 'industry_' in col or 'company_size_' in col]
            self.feature_columns.extend(dummy_columns)

            # Prepare X and y
            X = self.df[self.feature_columns]
            y = self.df['churned']

            # Check for missing values
            if X.isnull().any().any():
                logger.warning("Missing values found in features. Filling with zeros.")
                X = X.fillna(0)

            # Scale features
            X_scaled = self.scaler.fit_transform(X)

            logger.info(f"Prepared {len(self.feature_columns)} features for modeling")

            # Verify shapes
            logger.info(f"X shape: {X_scaled.shape}, y shape: {y.shape}")

            return X_scaled, y, self.feature_columns

        except Exception as e:
            logger.error(f"Error in feature preparation: {str(e)}")
            logger.error(f"Available columns: {list(self.df.columns)}")
            raise


In [None]:
class ChurnModelPipeline:
    def __init__(self, df):
        self.df = df
        self.scaler = StandardScaler()
        self.pca = PCA(n_components=0.95)
        self.feature_importance = None
        self.best_model = None
        self.feature_columns = []
        self.X_test = None
        self.y_test = None
        logger.info("ChurnModelPipeline initialized")

    def engineer_features(self):
        """Create advanced features for the model"""
        try:
            logger.info("Starting feature engineering...")

            # Create time-based features
            self.df['days_since_first_transaction'] = (
                self.df.groupby('customer_id')['date']
                .transform('max') - self.df.groupby('customer_id')['date']
                .transform('min')
            ).dt.days

            # Create ratio features
            self.df['volume_per_login'] = self.df['monthly_volume'] / self.df['login_frequency'].clip(lower=1)
            self.df['tickets_per_volume'] = self.df['support_tickets'] / self.df['monthly_volume'].clip(lower=1)
            self.df['revenue_per_employee'] = self.df['annual_revenue'] / self.df['employees'].clip(lower=1)

            # Create interaction features
            self.df['risk_volume_interaction'] = self.df['risk_score'] * self.df['monthly_volume']
            self.df['engagement_score'] = (
                self.df['login_frequency'] * 0.3 +
                self.df['platform_usage_hours'] * 0.3 +
                (1 - self.df['response_time_hours']/48) * 0.2 +
                (1 - self.df['support_tickets']/10) * 0.2
            ) * 100

            # Create categorical encodings
            categorical_columns = ['industry', 'company_size']
            for col in categorical_columns:
                dummies = pd.get_dummies(self.df[col], prefix=col)
                self.df = pd.concat([self.df, dummies], axis=1)

            logger.info("Feature engineering completed successfully")
            return self.df

        except Exception as e:
            logger.error(f"Error in feature engineering: {str(e)}")
            raise

    def prepare_features(self):
        """Prepare features for modeling"""
        try:
            logger.info("Preparing features for modeling...")

            # Select features for modeling
            self.feature_columns = [
                'monthly_volume', 'spot_ratio', 'forward_ratio',
                'login_frequency', 'support_tickets', 'platform_usage_hours',
                'credit_score', 'risk_score', 'volume_per_login',
                'tickets_per_volume', 'revenue_per_employee',
                'risk_volume_interaction', 'engagement_score',
                'days_since_first_transaction'
            ]

            # Verify all columns exist in dataframe
            missing_columns = [col for col in self.feature_columns if col not in self.df.columns]
            if missing_columns:
                raise ValueError(f"Missing columns in dataframe: {missing_columns}")

            # Add dummy columns
            dummy_columns = [col for col in self.df.columns if 'industry_' in col or 'company_size_' in col]
            self.feature_columns.extend(dummy_columns)

            # Prepare X and y
            X = self.df[self.feature_columns]
            y = self.df['churned']

            # Check for missing values
            if X.isnull().any().any():
                logger.warning("Missing values found in features. Filling with zeros.")
                X = X.fillna(0)

            # Scale features
            X_scaled = self.scaler.fit_transform(X)

            logger.info(f"Prepared {len(self.feature_columns)} features for modeling")
            return X_scaled, y, self.feature_columns

        except Exception as e:
            logger.error(f"Error in feature preparation: {str(e)}")
            raise

    def apply_pca(self, X):
        """Apply PCA transformation"""
        try:
            logger.info("Applying PCA...")

            # Verify input
            if X is None:
                raise ValueError("Input matrix X is None")

            if len(X.shape) != 2:
                raise ValueError(f"Expected 2D array, got {len(X.shape)}D")

            # Apply PCA
            X_pca = self.pca.fit_transform(X)

            # Calculate explained variance
            explained_variance = np.cumsum(self.pca.explained_variance_ratio_)

            # Log PCA results
            logger.info(f"Number of components selected: {self.pca.n_components_}")
            logger.info(f"Total variance explained: {explained_variance[-1]:.3f}")

            # Store PCA attributes
            self.n_components_ = self.pca.n_components_
            self.explained_variance_ratio_ = self.pca.explained_variance_ratio_
            self.components_ = self.pca.components_

            # Create component names
            self.pca_features = [f'PC{i+1}' for i in range(self.n_components_)]

            # Create loadings dataframe
            self.pca_loadings = pd.DataFrame(
                self.components_,
                columns=self.feature_columns,
                index=self.pca_features
            )

            return X_pca, explained_variance

        except Exception as e:
            logger.error(f"Error in PCA application: {str(e)}")
            raise

    def train_models(self, X, y):
        """Train and evaluate multiple models"""
        try:
            logger.info("Training models...")

            # Split data
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, random_state=42
            )

            # Store test data
            self.X_test = X_test
            self.y_test = y_test

            # Initialize models
            models = {
                'random_forest': RandomForestClassifier(n_estimators=100, random_state=42),
                'gradient_boosting': GradientBoostingClassifier(n_estimators=100, random_state=42)
            }

            # Train and evaluate models
            results = {}
            for name, model in models.items():
                model.fit(X_train, y_train)
                y_pred_proba = model.predict_proba(X_test)[:, 1]

                results[name] = {
                    'auc_roc': roc_auc_score(y_test, y_pred_proba),
                    'avg_precision': average_precision_score(y_test, y_pred_proba),
                    'model': model,
                    'predictions': y_pred_proba
                }

                logger.info(f"{name} - AUC ROC: {results[name]['auc_roc']:.3f}")

            # Select best model
            best_model_name = max(results.items(), key=lambda x: x[1]['auc_roc'])[0]
            self.best_model = results[best_model_name]['model']

            logger.info(f"Best model: {best_model_name}")
            return results, X_test, y_test

        except Exception as e:
            logger.error(f"Error in model training: {str(e)}")
            raise

In [None]:
def generate_fx_customer_data(n_samples=1000):
    """Generate synthetic FX customer data"""
    try:
        fake = Faker()
        np.random.seed(42)  # For reproducibility

        data = []
        for i in range(n_samples):
            # Use numpy's random choice instead of random.choice
            record = {
                'customer_id': f'CUST_{i:04d}',
                'date': fake.date_between(start_date='-2y', end_date='today'),
                'industry': np.random.choice(['Finance', 'Tech', 'Food & Beverage', 'Retail', 'Travel']),
                'company_size': np.random.choice(['Small', 'Medium', 'Large', 'Enterprise']),
                'monthly_volume': np.random.uniform(0.1, 100),
                'spot_ratio': np.random.uniform(0, 1),
                'forward_ratio': np.random.uniform(0, 1),
                'login_frequency': np.random.randint(1, 100),
                'support_tickets': np.random.randint(0, 20),
                'platform_usage_hours': np.random.uniform(1, 200),
                'credit_score': np.random.uniform(300, 850),
                'risk_score': np.random.uniform(1, 100),
                'annual_revenue': np.random.uniform(1e6, 1e9),
                'employees': np.random.randint(10, 10000),
                'response_time_hours': np.random.uniform(0, 72),
                # Use numpy's random choice for churned with probability
                'churned': np.random.choice([0, 1], p=[0.8, 0.2])
            }
            data.append(record)

        df = pd.DataFrame(data)
        logger.info(f"Generated {len(df)} records with {len(df.columns)} features")

        # Validate generated data
        if df.isnull().any().any():
            logger.warning("Generated data contains null values")

        return df

    except Exception as e:
        logger.error(f"Error in data generation: {str(e)}")
        logger.error(traceback.format_exc())
        return None

In [None]:
def create_interactive_dashboard(pipeline, results):
    """Create comprehensive interactive dashboard"""
    # Add visualization code from earlier

In [None]:
def analyze_model_decisions(pipeline):
    """Analyze and explain model decisions"""
    # SHAP values
    # Feature interactions
    # Decision paths

In [None]:
def perform_customer_segmentation(df):
    """Segment customers based on behavior"""
    # Clustering
    # Segment profiling
    # Segment-specific insights

In [None]:
def analyze_risk_profiles(df):
    """Analyze customer risk profiles"""
    # Risk scoring
    # Risk factors
    # Risk trends

In [None]:
def generate_recommendations(pipeline):
    """Generate customer-specific recommendations"""
    # Product recommendations
    # Risk mitigation suggestions
    # Engagement strategies

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import dash_bootstrap_components as dbc
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.cluster import KMeans
from sklearn.metrics import roc_curve, auc, precision_recall_curve
import shap
import warnings
warnings.filterwarnings('ignore')

class ComprehensiveDashboard:
    def __init__(self, pipeline, results, data):
        self.pipeline = pipeline
        self.results = results
        self.data = data
        self.app = dash.Dash(__name__, external_stylesheets=[dbc.themes.FLATLY])
        self.setup_layout()
        self.setup_callbacks()

    def setup_layout(self):
        self.app.layout = dbc.Container([
            # Header
            dbc.Row([
                dbc.Col([
                    html.H1("FX Sales Analytics Dashboard",
                           className="text-center mb-4"),
                    html.Hr()
                ])
            ]),

            # Control Panel
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Control Panel"),
                        dbc.CardBody([
                            dbc.Row([
                                dbc.Col([
                                    html.Label("Select Industry"),
                                    dcc.Dropdown(
                                        id='industry-filter',
                                        options=[{'label': x, 'value': x}
                                                for x in self.data['Industry'].unique()],
                                        value='All',
                                        clearable=False
                                    )
                                ], width=4),
                                dbc.Col([
                                    html.Label("Select Analysis Type"),
                                    dcc.Dropdown(
                                        id='analysis-type',
                                        options=[
                                            {'label': 'Customer Overview', 'value': 'overview'},
                                            {'label': 'Risk Analysis', 'value': 'risk'},
                                            {'label': 'Performance Metrics', 'value': 'performance'},
                                            {'label': 'Segmentation', 'value': 'segmentation'}
                                        ],
                                        value='overview',
                                        clearable=False
                                    )
                                ], width=4),
                                dbc.Col([
                                    html.Label("Time Range"),
                                    dcc.RangeSlider(
                                        id='time-range',
                                        marks={i: str(i) for i in range(0, 13, 3)},
                                        min=0,
                                        max=12,
                                        value=[0, 12]
                                    )
                                ], width=4)
                            ])
                        ])
                    ])
                ])
            ], className="mb-4"),

            # KPI Cards
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Total Customers", className="card-title"),
                            html.H2(id="kpi-total-customers"),
                        ])
                    ])
                ], width=3),
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Average FX Volume", className="card-title"),
                            html.H2(id="kpi-avg-volume"),
                        ])
                    ])
                ], width=3),
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Churn Rate", className="card-title"),
                            html.H2(id="kpi-churn-rate"),
                        ])
                    ])
                ], width=3),
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Risk Score", className="card-title"),
                            html.H2(id="kpi-risk-score"),
                        ])
                    ])
                ], width=3),
            ], className="mb-4"),

            # Main Content Area
            dbc.Row([
                # Left Column - Charts
                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Primary Analysis"),
                        dbc.CardBody([
                            dcc.Graph(id='main-chart')
                        ])
                    ])
                ], width=8),

                # Right Column - Details
                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Detailed Metrics"),
                        dbc.CardBody([
                            dcc.Graph(id='detail-chart')
                        ])
                    ])
                ], width=4)
            ], className="mb-4"),

            # Bottom Row - Additional Analysis
            dbc.Row([
                dbc.Col([
                    dbc.Tabs([
                        dbc.Tab([
                            dcc.Graph(id='model-performance')
                        ], label="Model Performance"),
                        dbc.Tab([
                            dcc.Graph(id='feature-importance')
                        ], label="Feature Importance"),
                        dbc.Tab([
                            dcc.Graph(id='customer-segments')
                        ], label="Customer Segments"),
                        dbc.Tab([
                            dcc.Graph(id='risk-analysis')
                        ], label="Risk Analysis")
                    ])
                ])
            ])
        ], fluid=True)

In [None]:
    def setup_callbacks(self):
        @self.app.callback(
            [Output('kpi-total-customers', 'children'),
             Output('kpi-avg-volume', 'children'),
             Output('kpi-churn-rate', 'children'),
             Output('kpi-risk-score', 'children'),
             Output('main-chart', 'figure'),
             Output('detail-chart', 'figure'),
             Output('model-performance', 'figure'),
             Output('feature-importance', 'figure'),
             Output('customer-segments', 'figure'),
             Output('risk-analysis', 'figure')],
            [Input('industry-filter', 'value'),
             Input('analysis-type', 'value'),
             Input('time-range', 'value')]
        )
        def update_dashboard(industry, analysis_type, time_range):
            # Filter data based on selections
            filtered_df = self.filter_data(industry, time_range)

            # Calculate KPIs
            kpis = self.calculate_kpis(filtered_df)

            # Generate charts based on analysis type
            if analysis_type == 'overview':
                main_fig = self.create_overview_chart(filtered_df)
                detail_fig = self.create_overview_details(filtered_df)
            elif analysis_type == 'risk':
                main_fig = self.create_risk_chart(filtered_df)
                detail_fig = self.create_risk_details(filtered_df)
            elif analysis_type == 'performance':
                main_fig = self.create_performance_chart(filtered_df)
                detail_fig = self.create_performance_details(filtered_df)
            else:  # segmentation
                main_fig = self.create_segmentation_chart(filtered_df)
                detail_fig = self.create_segmentation_details(filtered_df)

            # Create model performance visualizations
            model_perf_fig = self.create_model_performance_chart()
            feature_imp_fig = self.create_feature_importance_chart()
            segment_fig = self.create_customer_segment_chart(filtered_df)
            risk_fig = self.create_risk_analysis_chart(filtered_df)

            return (
                kpis['total_customers'],
                f"£{kpis['avg_volume']:,.2f}M",
                f"{kpis['churn_rate']:.1%}",
                f"{kpis['risk_score']:.1f}",
                main_fig,
                detail_fig,
                model_perf_fig,
                feature_imp_fig,
                segment_fig,
                risk_fig
            )

    def filter_data(self, industry, time_range):
        """Filter data based on selections"""
        df = self.data.copy()

        if industry != 'All':
            df = df[df['Industry'] == industry]

        # Add time-based filtering if needed

        return df

    def calculate_kpis(self, df):
        """Calculate key performance indicators"""
        return {
            'total_customers': len(df),
            'avg_volume': df['Monthly FX Volume (£M)'].mean(),
            'churn_rate': df['churned'].mean(),
            'risk_score': df['risk_score'].mean()
        }

    def create_overview_chart(self, df):
        """Create main overview chart"""
        fig = px.scatter(
            df,
            x='Monthly FX Volume (£M)',
            y='risk_score',
            color='Industry',
            size='Annual Revenue (£M)',
            hover_data=['Company Name', 'churned'],
            title='Customer Overview by Volume and Risk'
        )

        fig.update_layout(
            height=600,
            template='plotly_white'
        )

        return fig

    def create_overview_details(self, df):
        """Create detailed overview metrics"""
        fig = make_subplots(rows=2, cols=1)

        # Volume distribution
        fig.add_trace(
            go.Histogram(
                x=df['Monthly FX Volume (£M)'],
                name='Volume Distribution'
            ),
            row=1, col=1
        )

        # Risk distribution
        fig.add_trace(
            go.Histogram(
                x=df['risk_score'],
                name='Risk Distribution'
            ),
            row=2, col=1
        )

        fig.update_layout(height=400, showlegend=True)
        return fig

    def create_risk_chart(self, df):
        """Create risk analysis chart"""
        # Calculate risk metrics
        df['composite_risk'] = (
            df['risk_score'] * 0.4 +
            df['Monthly FX Volume (£M)'] * 0.3 +
            df['churned'] * 0.3
        )

        fig = px.scatter_3d(
            df,
            x='risk_score',
            y='Monthly FX Volume (£M)',
            z='composite_risk',
            color='Industry',
            size='Annual Revenue (£M)',
            hover_data=['Company Name'],
            title='3D Risk Analysis'
        )

        return fig

    def create_risk_details(self, df):
        """Create detailed risk metrics"""
        risk_by_industry = df.groupby('Industry')['risk_score'].mean().reset_index()

        fig = px.bar(
            risk_by_industry,
            x='Industry',
            y='risk_score',
            title='Average Risk Score by Industry'
        )

        return fig

    def create_performance_chart(self, df):
        """Create performance analysis chart"""
        fig = px.scatter(
            df,
            x='Monthly FX Volume (£M)',
            y='Annual Revenue (£M)',
            color='churned',
            size='risk_score',
            facet_col='Industry',
            title='Performance Analysis by Industry'
        )

        return fig

    def create_performance_details(self, df):
        """Create detailed performance metrics"""
        metrics = df.groupby('Industry').agg({
            'Monthly FX Volume (£M)': 'mean',
            'Annual Revenue (£M)': 'mean',
            'churned': 'mean'
        }).reset_index()

        fig = px.bar(
            metrics,
            x='Industry',
            y=['Monthly FX Volume (£M)', 'Annual Revenue (£M)', 'churned'],
            title='Key Metrics by Industry',
            barmode='group'
        )

        return fig

    def create_model_performance_chart(self):
        """Create model performance visualization"""
        # Get predictions from the pipeline
        y_pred_proba = self.pipeline.best_model.predict_proba(self.pipeline.X_test)[:, 1]

        # Calculate ROC curve
        fpr, tpr, _ = roc_curve(self.pipeline.y_test, y_pred_proba)
        roc_auc = auc(fpr, tpr)

        fig = go.Figure()
        fig.add_trace(
            go.Scatter(
                x=fpr, y=tpr,
                name=f'ROC curve (AUC = {roc_auc:.2f})'
            )
        )

        fig.add_trace(
            go.Scatter(
                x=[0, 1], y=[0, 1],
                line=dict(dash='dash'),
                name='Random'
            )
        )

        fig.update_layout(
            title='Model ROC Curve',
            xaxis_title='False Positive Rate',
            yaxis_title='True Positive Rate'
        )

        return fig

    def create_feature_importance_chart(self):
        """Create feature importance visualization"""
        if hasattr(self.pipeline.best_model, 'feature_importances_'):
            importance = pd.DataFrame({
                'feature': self.pipeline.feature_columns,
                'importance': self.pipeline.best_model.feature_importances_
            }).sort_values('importance', ascending=True)

            fig = px.bar(
                importance.tail(10),
                x='importance',
                y='feature',
                orientation='h',
                title='Top 10 Feature Importance'
            )

            return fig
        return go.Figure()

    def create_customer_segment_chart(self, df):
        """Create customer segmentation visualization"""
        # Perform clustering
        features_for_clustering = [
            'Monthly FX Volume (£M)',
            'Annual Revenue (£M)',
            'risk_score'
        ]

        X = StandardScaler().fit_transform(df[features_for_clustering])
        kmeans = KMeans(n_clusters=4, random_state=42)
        df['Segment'] = kmeans.fit_predict(X)

        fig = px.scatter_3d(
            df,
            x='Monthly FX Volume (£M)',
            y='Annual Revenue (£M)',
            z='risk_score',
            color='Segment',
            title='Customer Segments'
        )

        return fig

    def create_risk_analysis_chart(self, df):
        """Create risk analysis visualization"""
        # Calculate risk metrics
        df['volume_risk'] = pd.qcut(df['Monthly FX Volume (£M)'], q=5, labels=['VL', 'L', 'M', 'H', 'VH'])
        df['revenue_risk'] = pd.qcut(df['Annual Revenue (£M)'], q=5, labels=['VL', 'L', 'M', 'H', 'VH'])

        risk_matrix = pd.crosstab(df['volume_risk'], df['revenue_risk'])

        fig = px.imshow(
            risk_matrix,
            title='Risk Matrix: Volume vs Revenue',
            labels=dict(x='Revenue Risk', y='Volume Risk')
        )

        return fig

    def run(self):
        """Run the dashboard"""
        self.app.run_server(debug=True)

In [None]:
class FXAnalyticsSuite:
    def __init__(self, pipeline, results, data):
        self.pipeline = pipeline
        self.results = results
        self.data = data
        self.shap_values = None
        self.calculate_shap_values()

    def analyze_model_performance(self):
        """Analyze detailed model performance"""
        try:
            # Get feature importance
            if hasattr(self.pipeline.best_model, 'feature_importances_'):
                # Ensure feature names match the actual features used
                feature_importance = pd.DataFrame({
                    'feature': self.pipeline.feature_columns,
                    'importance': self.pipeline.best_model.feature_importances_
                })
                feature_importance = feature_importance.sort_values('importance', ascending=False)
            else:
                feature_importance = pd.DataFrame()

            return {
                'accuracy': accuracy_score(
                    self.pipeline.y_test,
                    self.pipeline.best_model.predict(self.pipeline.X_test)
                ),
                'feature_importance': feature_importance,
                'shap_values': self.shap_values,
                'model_type': type(self.pipeline.best_model).__name__,
                'performance_metrics': {
                    'precision': precision_score(
                        self.pipeline.y_test,
                        self.pipeline.best_model.predict(self.pipeline.X_test)
                    ),
                    'recall': recall_score(
                        self.pipeline.y_test,
                        self.pipeline.best_model.predict(self.pipeline.X_test)
                    ),
                    'roc_auc': roc_auc_score(
                        self.pipeline.y_test,
                        self.pipeline.best_model.predict_proba(self.pipeline.X_test)[:, 1]
                    )
                }
            }
        except Exception as e:
            print(f"Error in model performance analysis: {str(e)}")
            return {
                'accuracy': None,
                'feature_importance': pd.DataFrame(),
                'shap_values': None,
                'model_type': None,
                'performance_metrics': {}
            }

    def generate_insights_report(self):
        """Generate comprehensive insights report"""
        try:
            insights = {
                'model_performance': self.analyze_model_performance(),
                'customer_segments': self.analyze_customer_segments(),
                'risk_profiles': self.analyze_risk_profiles(),
                'recommendations': self.generate_recommendations()
            }
            return insights
        except Exception as e:
            print(f"Error generating insights report: {str(e)}")
            return {}

In [None]:
class ChurnModelPipeline:
    def prepare_features(self):
        try:
            logger.info("Preparing features for modeling...")

            # Select features for modeling
            self.feature_columns = [
                'monthly_volume', 'spot_ratio', 'forward_ratio',
                'login_frequency', 'support_tickets', 'platform_usage_hours',
                'credit_score', 'risk_score', 'volume_per_login',
                'tickets_per_volume', 'revenue_per_employee',
                'risk_volume_interaction'
            ]

            # Add dummy columns
            dummy_columns = [col for col in self.df.columns if 'industry_' in col or 'company_size_' in col]
            self.feature_columns.extend(dummy_columns)

            # Verify all columns exist
            missing_cols = [col for col in self.feature_columns if col not in self.df.columns]
            if missing_cols:
                raise ValueError(f"Missing columns: {missing_cols}")

            # Prepare X and y
            X = self.df[self.feature_columns]
            y = self.df['churned']

            # Scale features
            X_scaled = self.scaler.fit_transform(X)

            logger.info(f"Prepared {len(self.feature_columns)} features for modeling")
            return X_scaled, y, self.feature_columns

        except Exception as e:
            logger.error(f"Error in feature preparation: {str(e)}")
            raise

    def train_models(self, X, y):
        try:
            logger.info("Training models...")

            # Split data
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            # Store test data
            self.X_test = X_test
            self.y_test = y_test

            # Initialize models
            models = {
                'random_forest': RandomForestClassifier(n_estimators=100, random_state=42),
                'gradient_boosting': GradientBoostingClassifier(n_estimators=100, random_state=42)
            }

            # Train and evaluate models
            results = {}
            for name, model in models.items():
                model.fit(X_train, y_train)
                y_pred_proba = model.predict_proba(X_test)[:, 1]

                results[name] = {
                    'auc_roc': roc_auc_score(y_test, y_pred_proba),
                    'model': model,
                    'predictions': y_pred_proba
                }

                logger.info(f"{name} - AUC ROC: {results[name]['auc_roc']:.3f}")

            # Select best model
            best_model_name = max(results.items(), key=lambda x: x[1]['auc_roc'])[0]
            self.best_model = results[best_model_name]['model']

            logger.info(f"Best model: {best_model_name}")
            return results, X_test, y_test

        except Exception as e:
            logger.error(f"Error in model training: {str(e)}")
            raise

In [None]:
class ChurnModelPipeline:
    def __init__(self, df):  # Add initialization with df parameter
        self.df = df
        self.scaler = StandardScaler()
        self.pca = PCA(n_components=0.95)
        self.feature_importance = None
        self.best_model = None
        self.feature_columns = []
        self.X_test = None
        self.y_test = None
        logger.info("ChurnModelPipeline initialized")

    def engineer_features(self):
        try:
            logger.info("Starting feature engineering...")

            # Create ratio features
            self.df['volume_per_login'] = self.df['monthly_volume'] / self.df['login_frequency'].clip(lower=1)
            self.df['tickets_per_volume'] = self.df['support_tickets'] / self.df['monthly_volume'].clip(lower=1)
            self.df['revenue_per_employee'] = self.df['annual_revenue'] / self.df['employees'].clip(lower=1)

            # Create interaction features
            self.df['risk_volume_interaction'] = self.df['risk_score'] * self.df['monthly_volume']

            # Create categorical encodings
            categorical_columns = ['industry', 'company_size']
            for col in categorical_columns:
                dummies = pd.get_dummies(self.df[col], prefix=col)
                self.df = pd.concat([self.df, dummies], axis=1)

            logger.info("Feature engineering completed successfully")
            return self.df

        except Exception as e:
            logger.error(f"Error in feature engineering: {str(e)}")
            raise

    def prepare_features(self):
        try:
            logger.info("Preparing features for modeling...")

            # Select features for modeling
            self.feature_columns = [
                'monthly_volume', 'spot_ratio', 'forward_ratio',
                'login_frequency', 'support_tickets', 'platform_usage_hours',
                'credit_score', 'risk_score', 'volume_per_login',
                'tickets_per_volume', 'revenue_per_employee',
                'risk_volume_interaction'
            ]

            # Add dummy columns
            dummy_columns = [col for col in self.df.columns if 'industry_' in col or 'company_size_' in col]
            self.feature_columns.extend(dummy_columns)

            # Verify all columns exist
            missing_cols = [col for col in self.feature_columns if col not in self.df.columns]
            if missing_cols:
                raise ValueError(f"Missing columns: {missing_cols}")

            # Prepare X and y
            X = self.df[self.feature_columns]
            y = self.df['churned']

            # Scale features
            X_scaled = self.scaler.fit_transform(X)

            logger.info(f"Prepared {len(self.feature_columns)} features for modeling")
            return X_scaled, y, self.feature_columns

        except Exception as e:
            logger.error(f"Error in feature preparation: {str(e)}")
            raise

    def apply_pca(self, X):
        try:
            logger.info("Applying PCA...")

            X_pca = self.pca.fit_transform(X)
            explained_variance = np.cumsum(self.pca.explained_variance_ratio_)

            logger.info(f"Number of components selected: {self.pca.n_components_}")
            logger.info(f"Total variance explained: {explained_variance[-1]:.3f}")

            return X_pca, explained_variance

        except Exception as e:
            logger.error(f"Error in PCA application: {str(e)}")
            raise

    def train_models(self, X, y):
        try:
            logger.info("Training models...")

            # Split data
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            # Store test data
            self.X_test = X_test
            self.y_test = y_test

            # Initialize models
            models = {
                'random_forest': RandomForestClassifier(n_estimators=100, random_state=42),
                'gradient_boosting': GradientBoostingClassifier(n_estimators=100, random_state=42)
            }

            # Train and evaluate models
            results = {}
            for name, model in models.items():
                model.fit(X_train, y_train)
                y_pred_proba = model.predict_proba(X_test)[:, 1]

                results[name] = {
                    'auc_roc': roc_auc_score(y_test, y_pred_proba),
                    'model': model,
                    'predictions': y_pred_proba
                }

                logger.info(f"{name} - AUC ROC: {results[name]['auc_roc']:.3f}")

            # Select best model
            best_model_name = max(results.items(), key=lambda x: x[1]['auc_roc'])[0]
            self.best_model = results[best_model_name]['model']

            logger.info(f"Best model: {best_model_name}")
            return results, X_test, y_test

        except Exception as e:
            logger.error(f"Error in model training: {str(e)}")
            raise

In [None]:
def generate_insights_report(self):
    """Generate comprehensive insights report"""
    try:
        logger.info("Generating insights report...")

        insights = {
            'model_performance': self.analyze_model_performance(),
            'customer_segments': self.analyze_customer_segments(),
            'risk_profiles': self.analyze_risk_profiles(),
            'recommendations': self.generate_recommendations(),
            'summary_statistics': self.generate_summary_statistics()
        }

        logger.info("Insights report generated successfully")
        return insights

    except Exception as e:
        logger.error(f"Error generating insights report: {e}")
        return {
            'model_performance': None,
            'customer_segments': None,
            'risk_profiles': None,
            'recommendations': None,
            'summary_statistics': None
        }

def generate_summary_statistics(self):
    """Generate summary statistics for the dataset"""
    try:
        summary = {
            'total_customers': len(self.data),
            'average_monthly_volume': self.data['monthly_volume'].mean(),
            'average_risk_score': self.data['risk_score'].mean(),
            'churn_rate': self.data['churned'].mean(),
            'industry_distribution': self.data['industry'].value_counts().to_dict(),
            'risk_distribution': {
                'high_risk': len(self.data[self.data['risk_score'] > 75]),
                'medium_risk': len(self.data[(self.data['risk_score'] > 25) & (self.data['risk_score'] <= 75)]),
                'low_risk': len(self.data[self.data['risk_score'] <= 25])
            },
            'volume_statistics': {
                'mean': self.data['monthly_volume'].mean(),
                'median': self.data['monthly_volume'].median(),
                'std': self.data['monthly_volume'].std(),
                'min': self.data['monthly_volume'].min(),
                'max': self.data['monthly_volume'].max()
            }
        }

        # Add correlation analysis
        numeric_columns = self.data.select_dtypes(include=[np.number]).columns
        correlations = self.data[numeric_columns].corr()

        # Get top correlations with monthly_volume and risk_score
        volume_correlations = correlations['monthly_volume'].sort_values(ascending=False)[:5]
        risk_correlations = correlations['risk_score'].sort_values(ascending=False)[:5]

        summary['correlations'] = {
            'volume_correlations': volume_correlations.to_dict(),
            'risk_correlations': risk_correlations.to_dict()
        }

        return summary

    except Exception as e:
        logger.error(f"Error generating summary statistics: {e}")
        return None

def print_insights_summary(self, insights):
    """Print a summary of the insights"""
    try:
        print("\n=== INSIGHTS SUMMARY ===")

        # Model Performance
        if insights['model_performance']:
            print("\nModel Performance:")
            print(f"Accuracy: {insights['model_performance']['accuracy']:.3f}")
            print(f"ROC AUC: {insights['model_performance']['performance_metrics']['roc_auc']:.3f}")

            if not insights['model_performance']['feature_importance'].empty:
                print("\nTop 5 Important Features:")
                print(insights['model_performance']['feature_importance'].head())

        # Customer Segments
        if insights['customer_segments']:
            print("\nCustomer Segments:")
            print(f"Number of segments: {len(insights['customer_segments']['profiles'])}")
            print("\nSegment Profiles:")
            print(insights['customer_segments']['profiles'])

        # Risk Profiles
        if insights['risk_profiles']:
            print("\nRisk Analysis:")
            print(f"Number of high-risk clients: {len(insights['risk_profiles']['high_risk_clients'])}")
            print("\nRisk by Industry:")
            print(insights['risk_profiles']['risk_by_industry'])

        # Recommendations
        if insights['recommendations']:
            print("\nRecommendations:")
            for rec in insights['recommendations']:
                print(f"\n{rec['type']}:")
                print(f"Description: {rec['description']}")
                print(f"Action: {rec['action']}")
                print(f"Number of clients: {len(rec['clients'])}")

        # Summary Statistics
        if insights['summary_statistics']:
            print("\nSummary Statistics:")
            print(f"Total Customers: {insights['summary_statistics']['total_customers']}")
            print(f"Average Monthly Volume: £{insights['summary_statistics']['average_monthly_volume']:.2f}M")
            print(f"Churn Rate: {insights['summary_statistics']['churn_rate']:.2%}")

    except Exception as e:
        logger.error(f"Error printing insights summary: {e}")

In [None]:
def run_complete_analysis():
    try:
        # Generate data
        print("Generating and processing data...")
        df = generate_fx_customer_data(n_samples=1000)

        # Initialize and run pipeline
        print("Running model pipeline...")
        pipeline = ChurnModelPipeline(df)
        df_engineered = pipeline.engineer_features()
        X, y, feature_columns = pipeline.prepare_features()
        X_pca, explained_variance = pipeline.apply_pca(X)
        results, X_test, y_test = pipeline.train_models(X_pca, y)

        # Initialize analytics suite
        print("Initializing analytics suite...")
        analytics_suite = FXAnalyticsSuite(pipeline, results, df)

        # Generate insights
        print("Generating insights...")
        insights = analytics_suite.generate_insights_report()

        # Print insights summary
        analytics_suite.print_insights_summary(insights)

        # Launch dashboard
        print("\nLaunching dashboard...")
        dashboard = analytics_suite.launch_dashboard()
        dashboard.run()

        return analytics_suite, insights, dashboard

    except Exception as e:
        print(f"Error in analysis: {str(e)}")
        traceback.print_exc()
        return None, None, None

In [None]:
class FXAnalyticsSuite:
    def __init__(self, pipeline, results, data):
        self.pipeline = pipeline
        self.results = results
        self.data = data
        self.shap_values = None
        self.feature_columns = self.pipeline.feature_columns  # Ensure feature columns are available

    def analyze_model_performance(self):
        """Analyze detailed model performance"""
        try:
            # Get predictions
            y_pred = self.pipeline.best_model.predict(self.pipeline.X_test)
            y_pred_proba = self.pipeline.best_model.predict_proba(self.pipeline.X_test)[:, 1]

            # Basic metrics
            performance = {
                'accuracy': accuracy_score(self.pipeline.y_test, y_pred),
                'roc_auc': roc_auc_score(self.pipeline.y_test, y_pred_proba),
                'precision': precision_score(self.pipeline.y_test, y_pred),
                'recall': recall_score(self.pipeline.y_test, y_pred)
            }

            # Feature importance (ensure lengths match)
            if hasattr(self.pipeline.best_model, 'feature_importances_'):
                importance_df = pd.DataFrame({
                    'feature': self.feature_columns,
                    'importance': self.pipeline.best_model.feature_importances_
                })
                performance['feature_importance'] = importance_df.sort_values('importance', ascending=False)

            return performance

        except Exception as e:
            logger.error(f"Error in model performance analysis: {str(e)}")
            return None

    def analyze_customer_segments(self):
        """Perform customer segmentation analysis"""
        try:
            # Select features for clustering
            features_for_clustering = ['monthly_volume', 'risk_score']

            # Scale features
            scaler = StandardScaler()
            X_cluster = scaler.fit_transform(self.data[features_for_clustering])

            # Perform clustering
            kmeans = KMeans(n_clusters=4, random_state=42)
            self.data['Segment'] = kmeans.fit_predict(X_cluster)

            # Calculate segment profiles
            segment_profiles = self.data.groupby('Segment').agg({
                'monthly_volume': 'mean',
                'risk_score': 'mean',
                'churned': 'mean'
            }).round(2)

            return {
                'profiles': segment_profiles,
                'segment_sizes': self.data['Segment'].value_counts()
            }

        except Exception as e:
            logger.error(f"Error in customer segmentation: {str(e)}")
            return None

    def analyze_risk_profiles(self):
        """Analyze risk profiles"""
        try:
            risk_profiles = {
                'high_risk_count': len(self.data[self.data['risk_score'] > 75]),
                'risk_by_industry': self.data.groupby('industry')['risk_score'].mean(),
                'risk_distribution': self.data['risk_score'].describe()
            }
            return risk_profiles

        except Exception as e:
            logger.error(f"Error in risk profile analysis: {str(e)}")
            return None

    def generate_recommendations(self):
        """Generate recommendations"""
        try:
            recommendations = []

            # High-risk clients
            high_risk = self.data[self.data['risk_score'] > 75]
            if len(high_risk) > 0:
                recommendations.append({
                    'type': 'High Risk Alert',
                    'count': len(high_risk),
                    'action': 'Immediate review required'
                })

            # Growth opportunities
            low_volume = self.data[self.data['monthly_volume'] < self.data['monthly_volume'].quantile(0.25)]
            if len(low_volume) > 0:
                recommendations.append({
                    'type': 'Growth Opportunity',
                    'count': len(low_volume),
                    'action': 'Develop growth strategy'
                })

            return recommendations

        except Exception as e:
            logger.error(f"Error generating recommendations: {str(e)}")
            return None

    def generate_summary_statistics(self):
        """Generate summary statistics"""
        try:
            summary = {
                'total_customers': len(self.data),
                'average_volume': self.data['monthly_volume'].mean(),
                'churn_rate': self.data['churned'].mean(),
                'risk_score_avg': self.data['risk_score'].mean()
            }
            return summary

        except Exception as e:
            logger.error(f"Error generating summary statistics: {str(e)}")
            return None

    def generate_insights_report(self):
        """Generate comprehensive insights report"""
        try:
            insights = {
                'model_performance': self.analyze_model_performance(),
                'customer_segments': self.analyze_customer_segments(),
                'risk_profiles': self.analyze_risk_profiles(),
                'recommendations': self.generate_recommendations(),
                'summary_statistics': self.generate_summary_statistics()
            }
            return insights

        except Exception as e:
            logger.error(f"Error generating insights report: {str(e)}")
            return None

    def print_insights_summary(self, insights):
        """Print insights summary"""
        try:
            if insights is None:
                print("No insights available.")
                return

            print("\n=== INSIGHTS SUMMARY ===")

            # Print model performance
            if insights['model_performance']:
                print("\nModel Performance:")
                perf = insights['model_performance']
                print(f"Accuracy: {perf['accuracy']:.3f}")
                print(f"ROC AUC: {perf['roc_auc']:.3f}")
                print(f"Precision: {perf['precision']:.3f}")
                print(f"Recall: {perf['recall']:.3f}")

            # Print customer segments
            if insights['customer_segments']:
                print("\nCustomer Segments:")
                print(insights['customer_segments']['profiles'])

            # Print risk profiles
            if insights['risk_profiles']:
                print("\nRisk Profiles:")
                print(f"High Risk Clients: {insights['risk_profiles']['high_risk_count']}")

            # Print recommendations
            if insights['recommendations']:
                print("\nRecommendations:")
                for rec in insights['recommendations']:
                    print(f"- {rec['type']}: {rec['action']} ({rec['count']} clients)")

            # Print summary statistics
            if insights['summary_statistics']:
                print("\nSummary Statistics:")
                stats = insights['summary_statistics']
                print(f"Total Customers: {stats['total_customers']}")
                print(f"Average Volume: £{stats['average_volume']:.2f}M")
                print(f"Churn Rate: {stats['churn_rate']:.2%}")

        except Exception as e:
            logger.error(f"Error printing insights summary: {str(e)}")

def run_complete_analysis():
    try:
        # Generate data
        print("Generating and processing data...")
        df = generate_fx_customer_data(n_samples=1000)

        # Initialize and run pipeline
        print("Running model pipeline...")
        pipeline = ChurnModelPipeline(df)
        df_engineered = pipeline.engineer_features()
        X, y, feature_columns = pipeline.prepare_features()
        X_pca, explained_variance = pipeline.apply_pca(X)
        results, X_test, y_test = pipeline.train_models(X_pca, y)

        # Initialize analytics suite
        print("Initializing analytics suite...")
        analytics_suite = FXAnalyticsSuite(pipeline, results, df)

        # Generate insights
        print("Generating insights...")
        insights = analytics_suite.generate_insights_report()

        # Print insights summary
        analytics_suite.print_insights_summary(insights)

        return analytics_suite, insights

    except Exception as e:
        print(f"Error in analysis: {str(e)}")
        traceback.print_exc()
        return None, None

if __name__ == "__main__":
    analytics_suite, insights = run_complete_analysis()

Generating and processing data...
Running model pipeline...


ERROR:__main__:Error in model performance analysis: name 'accuracy_score' is not defined


Initializing analytics suite...
Generating insights...

=== INSIGHTS SUMMARY ===

Customer Segments:
         monthly_volume  risk_score  churned
Segment                                     
0                 24.39       25.37     0.20
1                 77.97       75.53     0.20
2                 25.33       74.56     0.19
3                 73.01       28.28     0.15

Risk Profiles:
High Risk Clients: 250

Recommendations:
- High Risk Alert: Immediate review required (250 clients)
- Growth Opportunity: Develop growth strategy (250 clients)

Summary Statistics:
Total Customers: 1000
Average Volume: £49.23M
Churn Rate: 18.80%


In [None]:
class ComprehensiveDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__, external_stylesheets=[dbc.themes.FLATLY])
        self.setup_layout()
        self.setup_callbacks()

    def setup_layout(self):
        self.app.layout = dbc.Container([
            # Header
            dbc.Row([
                dbc.Col([
                    html.H1("FX Sales Analytics Dashboard",
                           className="text-center mb-4")
                ])
            ]),

            # Filters
            dbc.Row([
                dbc.Col([
                    html.Label("Select Industry"),
                    dcc.Dropdown(
                        id='industry-filter',
                        options=[{'label': x, 'value': x}
                                for x in self.analytics_suite.data['industry'].unique()],
                        value=None
                    )
                ], width=4),
                dbc.Col([
                    html.Label("Risk Level"),
                    dcc.RangeSlider(
                        id='risk-slider',
                        min=0,
                        max=100,
                        step=5,
                        marks={i: str(i) for i in range(0, 101, 20)},
                        value=[0, 100]
                    )
                ], width=8)
            ]),

            # KPI Cards
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Total Clients", className="card-title"),
                            html.H2(id="kpi-total-clients")
                        ])
                    ])
                ], width=3),
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Average Volume", className="card-title"),
                            html.H2(id="kpi-avg-volume")
                        ])
                    ])
                ], width=3),
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Churn Rate", className="card-title"),
                            html.H2(id="kpi-churn-rate")
                        ])
                    ])
                ], width=3),
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Risk Score", className="card-title"),
                            html.H2(id="kpi-risk-score")
                        ])
                    ])
                ], width=3)
            ], className="mb-4"),

            # Main Charts
            dbc.Row([
                dbc.Col([
                    dcc.Graph(id='volume-risk-scatter')
                ], width=6),
                dbc.Col([
                    dcc.Graph(id='industry-distribution')
                ], width=6)
            ], className="mb-4"),

            # Additional Charts
            dbc.Row([
                dbc.Col([
                    dcc.Graph(id='risk-distribution')
                ], width=6),
                dbc.Col([
                    dcc.Graph(id='churn-analysis')
                ], width=6)
            ])
        ], fluid=True)

    def setup_callbacks(self):
        @self.app.callback(
            [Output('kpi-total-clients', 'children'),
             Output('kpi-avg-volume', 'children'),
             Output('kpi-churn-rate', 'children'),
             Output('kpi-risk-score', 'children'),
             Output('volume-risk-scatter', 'figure'),
             Output('industry-distribution', 'figure'),
             Output('risk-distribution', 'figure'),
             Output('churn-analysis', 'figure')],
            [Input('industry-filter', 'value'),
             Input('risk-slider', 'value')]
        )
        def update_dashboard(selected_industry, risk_range):
            # Filter data
            df = self.analytics_suite.data.copy()
            if selected_industry:
                df = df[df['industry'] == selected_industry]
            df = df[
                (df['risk_score'] >= risk_range[0]) &
                (df['risk_score'] <= risk_range[1])
            ]

            # Calculate KPIs
            kpis = self.calculate_kpis(df)

            # Create figures
            scatter_fig = self.create_scatter_plot(df)
            industry_fig = self.create_industry_distribution(df)
            risk_fig = self.create_risk_distribution(df)
            churn_fig = self.create_churn_analysis(df)

            return (
                f"{len(df):,}",
                f"£{df['monthly_volume'].mean():.1f}M",
                f"{df['churned'].mean():.1%}",
                f"{df['risk_score'].mean():.1f}",
                scatter_fig,
                industry_fig,
                risk_fig,
                churn_fig
            )

    def calculate_kpis(self, df):
        return {
            'total_clients': len(df),
            'avg_volume': df['monthly_volume'].mean(),
            'churn_rate': df['churned'].mean(),
            'risk_score': df['risk_score'].mean()
        }

    def create_scatter_plot(self, df):
        return px.scatter(
            df,
            x='monthly_volume',
            y='risk_score',
            color='industry',
            size='monthly_volume',
            hover_data=['churned'],
            title='Volume vs Risk Score'
        )

    def create_industry_distribution(self, df):
        return px.bar(
            df['industry'].value_counts().reset_index(),
            x='index',
            y='industry',
            title='Industry Distribution'
        )

    def create_risk_distribution(self, df):
        return px.histogram(
            df,
            x='risk_score',
            nbins=20,
            title='Risk Score Distribution'
        )

    def create_churn_analysis(self, df):
        churn_by_industry = df.groupby('industry')['churned'].mean().reset_index()
        return px.bar(
            churn_by_industry,
            x='industry',
            y='churned',
            title='Churn Rate by Industry'
        )

    def run(self):
        self.app.run_server(debug=True)

def run_complete_analysis():
    try:
        # Generate data
        print("Generating and processing data...")
        df = generate_fx_customer_data(n_samples=1000)

        # Initialize and run pipeline
        print("Running model pipeline...")
        pipeline = ChurnModelPipeline(df)
        df_engineered = pipeline.engineer_features()
        X, y, feature_columns = pipeline.prepare_features()
        X_pca, explained_variance = pipeline.apply_pca(X)
        results, X_test, y_test = pipeline.train_models(X_pca, y)

        # Initialize analytics suite
        print("Initializing analytics suite...")
        analytics_suite = FXAnalyticsSuite(pipeline, results, df)

        # Generate insights
        print("Generating insights...")
        insights = analytics_suite.generate_insights_report()

        # Print insights summary
        analytics_suite.print_insights_summary(insights)

        # Create and launch dashboard
        print("\nLaunching dashboard...")
        dashboard = ComprehensiveDashboard(analytics_suite)
        dashboard.run()

        return analytics_suite, insights, dashboard

    except Exception as e:
        print(f"Error in analysis: {str(e)}")
        traceback.print_exc()
        return None, None, None

if __name__ == "__main__":
    analytics_suite, insights, dashboard = run_complete_analysis()

Generating and processing data...
Running model pipeline...


ERROR:__main__:Error in model performance analysis: name 'accuracy_score' is not defined


Initializing analytics suite...
Generating insights...

=== INSIGHTS SUMMARY ===

Customer Segments:
         monthly_volume  risk_score  churned
Segment                                     
0                 24.39       25.37     0.20
1                 77.97       75.53     0.20
2                 25.33       74.56     0.19
3                 73.01       28.28     0.15

Risk Profiles:
High Risk Clients: 250

Recommendations:
- High Risk Alert: Immediate review required (250 clients)
- Growth Opportunity: Develop growth strategy (250 clients)

Summary Statistics:
Total Customers: 1000
Average Volume: £49.23M
Churn Rate: 18.80%

Launching dashboard...


<IPython.core.display.Javascript object>

In [None]:
def run_complete_analysis():
    try:
        # Generate data
        print("Generating and processing data...")
        df = generate_fx_customer_data(n_samples=1000)

        # Initialize and run pipeline
        print("Running model pipeline...")
        pipeline = ChurnModelPipeline(df)
        df_engineered = pipeline.engineer_features()
        X, y, feature_columns = pipeline.prepare_features()
        X_pca, explained_variance = pipeline.apply_pca(X)
        results, X_test, y_test = pipeline.train_models(X_pca, y)

        # Initialize analytics suite
        print("Initializing analytics suite...")
        analytics_suite = FXAnalyticsSuite(pipeline, results, df)

        # Generate insights
        print("Generating insights...")
        insights = analytics_suite.generate_insights_report()

        # Print insights summary
        analytics_suite.print_insights_summary(insights)

        # Create dashboard
        print("\nCreating dashboard...")
        dashboard = ComprehensiveDashboard(analytics_suite)

        return analytics_suite, insights, dashboard

    except Exception as e:
        print(f"Error in analysis: {str(e)}")
        traceback.print_exc()
        return None, None, None

# Separate function to run the dashboard
def run_dashboard(dashboard):
    if dashboard is not None:
        print("Launching dashboard...")
        dashboard.run()
    else:
        print("Dashboard creation failed.")

# Main execution
if __name__ == "__main__":
    analytics_suite, insights, dashboard = run_complete_analysis()
    if all(v is not None for v in [analytics_suite, insights, dashboard]):
        run_dashboard(dashboard)

Generating and processing data...
Running model pipeline...


ERROR:__main__:Error in model performance analysis: name 'accuracy_score' is not defined


Initializing analytics suite...
Generating insights...

=== INSIGHTS SUMMARY ===

Customer Segments:
         monthly_volume  risk_score  churned
Segment                                     
0                 24.39       25.37     0.20
1                 77.97       75.53     0.20
2                 25.33       74.56     0.19
3                 73.01       28.28     0.15

Risk Profiles:
High Risk Clients: 250

Recommendations:
- High Risk Alert: Immediate review required (250 clients)
- Growth Opportunity: Develop growth strategy (250 clients)

Summary Statistics:
Total Customers: 1000
Average Volume: £49.23M
Churn Rate: 18.80%

Creating dashboard...
Launching dashboard...


<IPython.core.display.Javascript object>

In [None]:
def run_complete_analysis():
    try:
        # Generate data
        print("Generating and processing data...")
        df = generate_fx_customer_data(n_samples=1000)

        # Initialize and run pipeline
        print("Running model pipeline...")
        pipeline = ChurnModelPipeline(df)
        df_engineered = pipeline.engineer_features()
        X, y, feature_columns = pipeline.prepare_features()
        X_pca, explained_variance = pipeline.apply_pca(X)
        results, X_test, y_test = pipeline.train_models(X_pca, y)

        # Initialize analytics suite
        print("Initializing analytics suite...")
        analytics_suite = FXAnalyticsSuite(pipeline, results, df)

        # Generate insights
        print("Generating insights...")
        insights = analytics_suite.generate_insights_report()

        # Print insights summary
        analytics_suite.print_insights_summary(insights)

        # Create dashboard
        print("\nCreating dashboard...")
        dashboard = ComprehensiveDashboard(analytics_suite)

        return analytics_suite, insights, dashboard

    except Exception as e:
        print(f"Error in analysis: {str(e)}")
        traceback.print_exc()
        return None, None, None

# Separate function to run the dashboard
def run_dashboard(dashboard):
    if dashboard is not None:
        print("Launching dashboard...")
        dashboard.run()
    else:
        print("Dashboard creation failed.")

# Main execution
if __name__ == "__main__":
    analytics_suite, insights, dashboard = run_complete_analysis()
    if all(v is not None for v in [analytics_suite, insights, dashboard]):
        run_dashboard(dashboard)

Generating and processing data...
Running model pipeline...


ERROR:__main__:Error in model performance analysis: name 'accuracy_score' is not defined


Initializing analytics suite...
Generating insights...

=== INSIGHTS SUMMARY ===

Customer Segments:
         monthly_volume  risk_score  churned
Segment                                     
0                 24.39       25.37     0.20
1                 77.97       75.53     0.20
2                 25.33       74.56     0.19
3                 73.01       28.28     0.15

Risk Profiles:
High Risk Clients: 250

Recommendations:
- High Risk Alert: Immediate review required (250 clients)
- Growth Opportunity: Develop growth strategy (250 clients)

Summary Statistics:
Total Customers: 1000
Average Volume: £49.23M
Churn Rate: 18.80%

Creating dashboard...
Launching dashboard...


<IPython.core.display.Javascript object>

In [None]:
html.Div(id='kpi-total-clients'),
html.Div(id='kpi-avg-volume'),
html.Div(id='kpi-churn-rate'),
html.Div(id='kpi-risk-score'),
dcc.Graph(id='volume-risk-scatter'),
dcc.Graph(id='industry-distribution'),
dcc.Graph(id='risk-distribution'),
dcc.Graph(id='churn-analysis'),

(Graph(id='churn-analysis'),)

In [None]:
class ComprehensiveDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__, external_stylesheets=[dbc.themes.FLATLY])
        self.setup_layout()
        self.setup_callbacks()

    def setup_layout(self):
        self.app.layout = dbc.Container([
            # Header
            dbc.Row([
                dbc.Col([
                    html.H1("FX Sales Analytics Dashboard",
                           className="text-center mb-4")
                ])
            ]),

            # Filters
            dbc.Row([
                dbc.Col([
                    html.Label("Select Industry"),
                    dcc.Dropdown(
                        id='industry-filter',
                        options=[
                            {'label': str(x), 'value': str(x)}
                            for x in self.analytics_suite.data['industry'].unique()
                        ],
                        value=None
                    )
                ], width=4),
                dbc.Col([
                    html.Label("Risk Level"),
                    dcc.RangeSlider(
                        id='risk-slider',
                        min=0,
                        max=100,
                        step=5,
                        marks={i: str(i) for i in range(0, 101, 20)},
                        value=[0, 100]
                    )
                ], width=8)
            ]),

            # KPI Cards
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Total Clients", className="card-title"),
                            html.H2(id="kpi-total-clients", children="0")
                        ])
                    ])
                ], width=3),
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Average Volume", className="card-title"),
                            html.H2(id="kpi-avg-volume", children="0")
                        ])
                    ])
                ], width=3),
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Churn Rate", className="card-title"),
                            html.H2(id="kpi-churn-rate", children="0%")
                        ])
                    ])
                ], width=3),
                dbc.Col([
                    dbc.Card([
                        dbc.CardBody([
                            html.H4("Risk Score", className="card-title"),
                            html.H2(id="kpi-risk-score", children="0")
                        ])
                    ])
                ], width=3)
            ], className="mb-4"),

            # Main Charts
            dbc.Row([
                dbc.Col([
                    dcc.Graph(
                        id='volume-risk-scatter',
                        figure={}
                    )
                ], width=6),
                dbc.Col([
                    dcc.Graph(
                        id='industry-distribution',
                        figure={}
                    )
                ], width=6)
            ], className="mb-4"),

            # Additional Charts
            dbc.Row([
                dbc.Col([
                    dcc.Graph(
                        id='risk-distribution',
                        figure={}
                    )
                ], width=6),
                dbc.Col([
                    dcc.Graph(
                        id='churn-analysis',
                        figure={}
                    )
                ], width=6)
            ])
        ], fluid=True)

    def setup_callbacks(self):
        @self.app.callback(
            [Output('kpi-total-clients', 'children'),
             Output('kpi-avg-volume', 'children'),
             Output('kpi-churn-rate', 'children'),
             Output('kpi-risk-score', 'children'),
             Output('volume-risk-scatter', 'figure'),
             Output('industry-distribution', 'figure'),
             Output('risk-distribution', 'figure'),
             Output('churn-analysis', 'figure')],
            [Input('industry-filter', 'value'),
             Input('risk-slider', 'value')]
        )
        def update_dashboard(selected_industry, risk_range):
            try:
                df = self.analytics_suite.data.copy()
                if selected_industry:
                    df = df[df['industry'].astype(str) == selected_industry]
                if risk_range:
                    df = df[
                        (df['risk_score'] >= risk_range[0]) &
                        (df['risk_score'] <= risk_range[1])
                    ]

                # Calculate KPIs
                kpis = self.calculate_kpis(df)

                # Create figures
                scatter_fig = self.create_scatter_plot(df)
                industry_fig = self.create_industry_distribution(df)
                risk_fig = self.create_risk_distribution(df)
                churn_fig = self.create_churn_analysis(df)

                return (
                    f"{kpis['total_clients']:,}",
                    f"£{kpis['avg_volume']:.1f}M",
                    f"{kpis['churn_rate']:.1%}",
                    f"{kpis['risk_score']:.1f}",
                    scatter_fig,
                    industry_fig,
                    risk_fig,
                    churn_fig
                )
            except Exception as e:
                print(f"Callback error: {str(e)}")
                return "0", "£0M", "0%", "0", {}, {}, {}, {}

    def run(self):
        self.app.run_server(debug=True)

In [None]:
analytics_suite, insights, dashboard = run_complete_analysis()

if dashboard is not None:
    dashboard.run()

ValueError: not enough values to unpack (expected 3, got 2)

In [None]:
class EnhancedDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.FLATLY],
                            suppress_callback_exceptions=True)
        self.error_status = {'last_error': None}
        self.cache = {}
        self.initialize_cache()
        self.setup_layout()
        self.setup_callbacks()

    def initialize_cache(self):
        """Initialize cache with computed values"""
        try:
            df = self.analytics_suite.data
            self.cache.update({
                'total_volume': df['monthly_volume'].sum(),
                'avg_risk': df['risk_score'].mean(),
                'industry_counts': df['industry'].value_counts().to_dict(),
                'risk_segments': self.calculate_risk_segments(df)
            })
        except Exception as e:
            self.log_error("Cache initialization failed", e)

    def log_error(self, context, error):
        """Enhanced error logging"""
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        error_msg = f"{timestamp} - {context}: {str(error)}"
        print(error_msg)  # For immediate feedback
        self.error_status['last_error'] = error_msg

    def calculate_risk_segments(self, df):
        """Calculate risk segments"""
        try:
            return {
                'high_risk': len(df[df['risk_score'] >= 75]),
                'medium_risk': len(df[(df['risk_score'] >= 25) & (df['risk_score'] < 75)]),
                'low_risk': len(df[df['risk_score'] < 25])
            }
        except Exception as e:
            self.log_error("Risk segment calculation failed", e)
            return {'high_risk': 0, 'medium_risk': 0, 'low_risk': 0}

    def setup_layout(self):
        """Enhanced dashboard layout"""
        self.app.layout = dbc.Container([
            # Navigation Bar
            dbc.Navbar(
                dbc.Container([
                    dbc.NavbarBrand("FX Sales Analytics", className="ms-2"),
                    dbc.NavItem(dbc.NavLink("Refresh Data", href="#", id="refresh-data")),
                    dbc.NavItem(dbc.NavLink("Export Report", href="#", id="export-report")),
                    dbc.NavItem(
                        dbc.Button("Error Log", id="error-log-button", color="danger",
                                 className="ms-2", style={'display': 'none'})
                    )
                ]),
                color="dark",
                dark=True,
            ),

            # Main Content
            dbc.Container([
                # Filters and Controls
                dbc.Row([
                    dbc.Col([
                        html.Div([
                            html.H4("Filters", className="mb-3"),
                            dbc.Card([
                                dbc.CardBody([
                                    # Industry Filter
                                    html.Label("Industry"),
                                    dcc.Dropdown(
                                        id='industry-filter',
                                        options=[
                                            {'label': str(x), 'value': str(x)}
                                            for x in self.analytics_suite.data['industry'].unique()
                                        ],
                                        multi=True,
                                        placeholder="Select Industries"
                                    ),

                                    # Risk Range Filter
                                    html.Label("Risk Score Range", className="mt-3"),
                                    dcc.RangeSlider(
                                        id='risk-slider',
                                        min=0,
                                        max=100,
                                        step=5,
                                        marks={i: str(i) for i in range(0, 101, 20)},
                                        value=[0, 100]
                                    ),

                                    # Volume Filter
                                    html.Label("Monthly Volume (£M)", className="mt-3"),
                                    dcc.RangeSlider(
                                        id='volume-slider',
                                        min=0,
                                        max=self.analytics_suite.data['monthly_volume'].max(),
                                        step=10,
                                        value=[0, int(self.analytics_suite.data['monthly_volume'].max())]
                                    ),

                                    # Additional Controls
                                    html.Div([
                                        dbc.Button(
                                            "Reset Filters",
                                            id="reset-filters",
                                            color="secondary",
                                            className="mt-3"
                                        ),
                                        dbc.Button(
                                            "Apply Filters",
                                            id="apply-filters",
                                            color="primary",
                                            className="mt-3 ms-2"
                                        )
                                    ])
                                ])
                            ])
                        ])
                    ], width=3),

                    # Main Content Area
                    dbc.Col([
                        # KPI Cards
                        dbc.Row([
                            dbc.Col(dbc.Card([
                                dbc.CardBody([
                                    html.H4("Total Clients"),
                                    html.H2(id="kpi-total-clients"),
                                    dbc.Progress(id="clients-progress", value=0)
                                ])
                            ]), width=3),
                            dbc.Col(dbc.Card([
                                dbc.CardBody([
                                    html.H4("Average Volume"),
                                    html.H2(id="kpi-avg-volume"),
                                    dbc.Progress(id="volume-progress", value=0)
                                ])
                            ]), width=3),
                            dbc.Col(dbc.Card([
                                dbc.CardBody([
                                    html.H4("Risk Score"),
                                    html.H2(id="kpi-risk-score"),
                                    dbc.Progress(id="risk-progress", value=0)
                                ])
                            ]), width=3),
                            dbc.Col(dbc.Card([
                                dbc.CardBody([
                                    html.H4("Churn Rate"),
                                    html.H2(id="kpi-churn-rate"),
                                    dbc.Progress(id="churn-progress", value=0)
                                ])
                            ]), width=3),
                        ], className="mb-4"),

                        # Main Charts
                        dbc.Tabs([
                            dbc.Tab([
                                dcc.Graph(id='volume-risk-scatter')
                            ], label="Risk Analysis"),

                            dbc.Tab([
                                dcc.Graph(id='industry-distribution')
                            ], label="Industry Analysis"),

                            dbc.Tab([
                                dcc.Graph(id='time-series-analysis')
                            ], label="Time Series"),

                            dbc.Tab([
                                dcc.Graph(id='customer-segments')
                            ], label="Segmentation")
                        ])
                    ], width=9)
                ])
            ], fluid=True)
        ])

In [None]:
    def setup_callbacks(self):
        """Setup all dashboard callbacks"""
        self.setup_filter_callbacks()
        self.setup_chart_callbacks()
        self.setup_error_callbacks()
        self.setup_export_callbacks()

    def setup_filter_callbacks(self):
        @self.app.callback(
            [Output('kpi-total-clients', 'children'),
             Output('kpi-avg-volume', 'children'),
             Output('kpi-risk-score', 'children'),
             Output('kpi-churn-rate', 'children'),
             Output('clients-progress', 'value'),
             Output('volume-progress', 'value'),
             Output('risk-progress', 'value'),
             Output('churn-progress', 'value')],
            [Input('industry-filter', 'value'),
             Input('risk-slider', 'value'),
             Input('volume-slider', 'value'),
             Input('apply-filters', 'n_clicks')],
            [State('industry-filter', 'value')]
        )
        def update_kpis(industries, risk_range, volume_range, n_clicks, current_industries):
            try:
                # Get filtered dataframe
                df = self.get_filtered_data(industries, risk_range, volume_range)
                total_clients = len(df)
                avg_volume = df['monthly_volume'].mean()
                risk_score = df['risk_score'].mean()
                churn_rate = df['churned'].mean()

                # Calculate progress values
                clients_progress = (total_clients / len(self.analytics_suite.data)) * 100
                volume_progress = (avg_volume / self.cache['total_volume']) * 100
                risk_progress = risk_score
                churn_progress = churn_rate * 100

                return (
                    f"{total_clients:,}",
                    f"£{avg_volume:.1f}M",
                    f"{risk_score:.1f}",
                    f"{churn_rate:.1%}",
                    clients_progress,
                    volume_progress,
                    risk_progress,
                    churn_progress
                )
            except Exception as e:
                self.log_error("KPI update failed", e)
                return "0", "£0M", "0", "0%", 0, 0, 0, 0

    def setup_chart_callbacks(self):
        @self.app.callback(
            [Output('volume-risk-scatter', 'figure'),
             Output('industry-distribution', 'figure'),
             Output('time-series-analysis', 'figure'),
             Output('customer-segments', 'figure')],
            [Input('industry-filter', 'value'),
             Input('risk-slider', 'value'),
             Input('volume-slider', 'value')]
        )
        def update_charts(industries, risk_range, volume_range):
            try:
                df = self.get_filtered_data(industries, risk_range, volume_range)

                return (
                    self.create_risk_scatter(df),
                    self.create_industry_distribution(df),
                    self.create_time_series(df),
                    self.create_customer_segments(df)
                )
            except Exception as e:
                self.log_error("Chart update failed", e)
                return {}, {}, {}, {}

    def create_risk_scatter(self, df):
        """Create advanced risk scatter plot"""
        fig = px.scatter(
            df,
            x='monthly_volume',
            y='risk_score',
            color='industry',
            size='monthly_volume',
            hover_data=[
                'churned',
                'platform_usage_hours',
                'credit_score'
            ],
            title='Risk vs Volume Analysis',
            template='plotly_dark'
        )

        # Add risk zones
        fig.add_hrect(
            y0=75, y1=100,
            fillcolor="red", opacity=0.1,
            layer="below", line_width=0,
            annotation_text="High Risk Zone"
        )

        fig.add_hrect(
            y0=25, y1=75,
            fillcolor="yellow", opacity=0.1,
            layer="below", line_width=0,
            annotation_text="Medium Risk Zone"
        )

        return fig

    def create_industry_distribution(self, df):
        """Create industry distribution analysis"""
        # Create subplots
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=(
                'Industry Distribution',
                'Average Volume by Industry',
                'Risk Distribution by Industry',
                'Churn Rate by Industry'
            )
        )

        # Industry Distribution
        industry_counts = df['industry'].value_counts()
        fig.add_trace(
            go.Bar(x=industry_counts.index, y=industry_counts.values),
            row=1, col=1
        )

        # Average Volume
        volume_by_industry = df.groupby('industry')['monthly_volume'].mean()
        fig.add_trace(
            go.Bar(x=volume_by_industry.index, y=volume_by_industry.values),
            row=1, col=2
        )

        # Risk Distribution
        risk_by_industry = df.groupby('industry')['risk_score'].mean()
        fig.add_trace(
            go.Bar(x=risk_by_industry.index, y=risk_by_industry.values),
            row=2, col=1
        )

        # Churn Rate
        churn_by_industry = df.groupby('industry')['churned'].mean()
        fig.add_trace(
            go.Bar(x=churn_by_industry.index, y=churn_by_industry.values),
            row=2, col=2
        )

        fig.update_layout(height=800, showlegend=False)
        return fig

    def create_time_series(self, df):
        """Create time series analysis"""
        # Group by date and calculate metrics
        daily_metrics = df.groupby('date').agg({
            'monthly_volume': 'sum',
            'risk_score': 'mean',
            'churned': 'mean'
        }).reset_index()

        fig = make_subplots(rows=3, cols=1, shared_xaxes=True)

        # Volume Trend
        fig.add_trace(
            go.Scatter(
                x=daily_metrics['date'],
                y=daily_metrics['monthly_volume'],
                name='Volume'
            ),
            row=1, col=1
        )

        # Risk Trend
        fig.add_trace(
            go.Scatter(
                x=daily_metrics['date'],
                y=daily_metrics['risk_score'],
                name='Risk'
            ),
            row=2, col=1
        )

        # Churn Trend
        fig.add_trace(
            go.Scatter(
                x=daily_metrics['date'],
                y=daily_metrics['churned'],
                name='Churn'
            ),
            row=3, col=1
        )

        fig.update_layout(height=800)
        return fig

    def create_customer_segments(self, df):
        """Create customer segmentation analysis"""
        # Perform clustering
        features_for_clustering = ['monthly_volume', 'risk_score', 'platform_usage_hours']
        X = StandardScaler().fit_transform(df[features_for_clustering])

        kmeans = KMeans(n_clusters=4, random_state=42)
        df['Segment'] = kmeans.fit_predict(X)

        fig = px.scatter_3d(
            df,
            x='monthly_volume',
            y='risk_score',
            z='platform_usage_hours',
            color='Segment',
            hover_data=['industry', 'churned'],
            title='Customer Segments'
        )

        return fig

    def get_filtered_data(self, industries, risk_range, volume_range):
        """Get filtered dataframe based on selections"""
        df = self.analytics_suite.data.copy()

        if industries:
            if isinstance(industries, list):
                df = df[df['industry'].isin(industries)]
            else:
                df = df[df['industry'] == industries]

        if risk_range:
            df = df[
                (df['risk_score'] >= risk_range[0]) &
                (df['risk_score'] <= risk_range[1])
            ]

        if volume_range:
            df = df[
                (df['monthly_volume'] >= volume_range[0]) &
                (df['monthly_volume'] <= volume_range[1])
            ]

        return df

In [None]:
class EnhancedDashboard:
    def add_performance_optimizations(self):
        """Add performance optimizations to the dashboard"""

        # Data caching system
        self.cache = {
            'data': {},
            'computations': {},
            'last_update': None,
            'update_frequency': 300  # 5 minutes in seconds
        }

        # Background task handler
        self.background_tasks = {
            'running': False,
            'last_error': None,
            'status': 'idle'
        }

    def setup_performance_monitoring(self):
        """Setup performance monitoring"""
        self.performance_metrics = {
            'response_times': [],
            'memory_usage': [],
            'cache_hits': 0,
            'cache_misses': 0
        }

    @property
    def needs_update(self):
        """Check if cache needs update"""
        if not self.cache['last_update']:
            return True
        return (datetime.now() - self.cache['last_update']).seconds > self.cache['update_frequency']

    def update_cache(self):
        """Update cache with latest computations"""
        try:
            df = self.analytics_suite.data

            # Pre-compute common aggregations
            self.cache['data'] = {
                'industry_metrics': self.compute_industry_metrics(df),
                'risk_metrics': self.compute_risk_metrics(df),
                'time_series': self.compute_time_series_metrics(df),
                'customer_segments': self.compute_customer_segments(df)
            }

            self.cache['last_update'] = datetime.now()
            logger.info("Cache updated successfully")

        except Exception as e:
            logger.error(f"Cache update failed: {str(e)}")
            self.background_tasks['last_error'] = str(e)

    def add_advanced_features(self):
        """Add advanced features to the dashboard"""

        # Add predictive analytics
        self.setup_predictive_analytics()

        # Add anomaly detection
        self.setup_anomaly_detection()

        # Add advanced filtering
        self.setup_advanced_filtering()

        # Add export capabilities
        self.setup_export_capabilities()

    def setup_predictive_analytics(self):
        """Setup predictive analytics features"""
        @self.app.callback(
            Output('prediction-results', 'children'),
            [Input('predict-button', 'n_clicks')],
            [State('industry-filter', 'value')]
        )
        def update_predictions(n_clicks, industry):
            if not n_clicks:
                return []

            try:
                df = self.get_filtered_data([industry] if industry else None, None, None)
                predictions = self.generate_predictions(df)
                return self.create_prediction_cards(predictions)
            except Exception as e:
                self.log_error("Prediction update failed", e)
                return []

    def generate_predictions(self, df):
        """Generate predictions for clients"""
        try:
            # Churn prediction
            churn_predictions = self.analytics_suite.pipeline.best_model.predict_proba(
                self.analytics_suite.pipeline.X_test
            )[:, 1]

            # Volume prediction using time series
            volume_predictions = self.predict_volume(df)

            # Risk trend prediction
            risk_predictions = self.predict_risk_trends(df)

            return {
                'churn_predictions': churn_predictions,
                'volume_predictions': volume_predictions,
                'risk_predictions': risk_predictions
            }
        except Exception as e:
            self.log_error("Prediction generation failed", e)
            return None

    def setup_anomaly_detection(self):
        """Setup anomaly detection system"""
        @self.app.callback(
            Output('anomaly-indicators', 'children'),
            [Input('check-anomalies-button', 'n_clicks')]
        )
        def update_anomalies(n_clicks):
            if not n_clicks:
                return []

            try:
                anomalies = self.detect_anomalies()
                return self.create_anomaly_alerts(anomalies)
            except Exception as e:
                self.log_error("Anomaly detection failed", e)
                return []

    def detect_anomalies(self):
        """Detect anomalies in the data"""
        df = self.analytics_suite.data
        anomalies = {
            'volume_anomalies': self.detect_volume_anomalies(df),
            'risk_anomalies': self.detect_risk_anomalies(df),
            'behavior_anomalies': self.detect_behavior_anomalies(df)
        }
        return anomalies

    def setup_advanced_filtering(self):
        """Setup advanced filtering capabilities"""
        @self.app.callback(
            Output('filtered-data-table', 'data'),
            [Input('advanced-filter-button', 'n_clicks')],
            [State('filter-conditions', 'value')]
        )
        def apply_advanced_filters(n_clicks, conditions):
            if not n_clicks:
                return []

            try:
                filtered_data = self.apply_complex_filters(conditions)
                return filtered_data.to_dict('records')
            except Exception as e:
                self.log_error("Advanced filtering failed", e)
                return []

    def setup_export_capabilities(self):
        """Setup data export capabilities"""
        @self.app.callback(
            Output('download-data', 'data'),
            [Input('export-button', 'n_clicks')],
            [State('export-format', 'value')]
        )
        def export_data(n_clicks, export_format):
            if not n_clicks:
                return None

            try:
                return self.prepare_export_data(export_format)
            except Exception as e:
                self.log_error("Data export failed", e)
                return None

    def prepare_export_data(self, export_format):
        """Prepare data for export"""
        df = self.analytics_suite.data

        if export_format == 'csv':
            return dict(content=df.to_csv(index=False), filename="fx_sales_data.csv")
        elif export_format == 'excel':
            return dict(content=df.to_excel(index=False), filename="fx_sales_data.xlsx")
        elif export_format == 'json':
            return dict(content=df.to_json(orient='records'), filename="fx_sales_data.json")

    def add_real_time_updates(self):
        """Add real-time update capabilities"""
        @self.app.callback(
            Output('last-update-time', 'children'),
            [Input('interval-component', 'n_intervals')]
        )
        def update_data(n):
            if self.needs_update:
                self.update_cache()
            return f"Last updated: {self.cache['last_update'].strftime('%H:%M:%S')}"

    def optimize_layout(self):
        """Optimize dashboard layout"""
        self.app.layout = html.Div([
            dcc.Store(id='session-store'),
            dcc.Store(id='local-store', storage_type='local'),
            dcc.Interval(
                id='interval-component',
                interval=5*60*1000,  # 5 minutes
                n_intervals=0
            ),
            self.create_layout()
        ])

    def setup_error_handling(self):
        """Setup enhanced error handling"""
        @self.app.callback(
            Output('error-log', 'children'),
            [Input('error-check-interval', 'n_intervals')]
        )
        def update_error_log(n):
            if self.background_tasks['last_error']:
                return html.Div([
                    html.H4("Error Log"),
                    html.P(self.background_tasks['last_error'])
                ])
            return []

In [None]:
class EnhancedDashboard:
    def __init__(self, analytics_suite=None):  # Add initialization parameter
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.FLATLY],
                            suppress_callback_exceptions=True)
        self.error_status = {'last_error': None}
        self.cache = {}
        self.initialize_cache()
        self.setup_layout()
        self.setup_callbacks()

    def initialize_cache(self):
        """Initialize cache with computed values"""
        try:
            if self.analytics_suite is not None:
                df = self.analytics_suite.data
                self.cache.update({
                    'total_volume': df['monthly_volume'].sum(),
                    'avg_risk': df['risk_score'].mean(),
                    'industry_counts': df['industry'].value_counts().to_dict(),
                    'risk_segments': self.calculate_risk_segments(df)
                })
        except Exception as e:
            self.log_error("Cache initialization failed", e)

    def run(self):
        """Run the dashboard"""
        try:
            self.app.run_server(debug=True)
        except Exception as e:
            print(f"Error running dashboard: {str(e)}")
            raise

    # Rest of the EnhancedDashboard class implementation remains the same...

def launch_dashboard(analytics_suite):
    """Launch the dashboard with error handling"""
    try:
        print("\nInitializing dashboard...")
        dashboard = EnhancedDashboard(analytics_suite=analytics_suite)

        print("\nDashboard is ready!")
        print("Access the dashboard at: http://127.0.0.1:8050")
        print("\nPress Ctrl+C to stop the dashboard")

        dashboard.run()

    except Exception as e:
        print(f"Error launching dashboard: {str(e)}")
        traceback.print_exc()

# Main execution
if __name__ == "__main__":
    try:
        # Run analysis
        print("Running analysis...")
        analytics_suite, insights = run_complete_analysis()

        if analytics_suite is not None:
            # Launch dashboard
            print("\nLaunching dashboard...")
            launch_dashboard(analytics_suite)
        else:
            print("Analysis failed, cannot launch dashboard.")

    except Exception as e:
        print(f"Error in main execution: {str(e)}")
        traceback.print_exc()

Running analysis...
Generating and processing data...
Running model pipeline...


ERROR:__main__:Error in model performance analysis: name 'accuracy_score' is not defined


Initializing analytics suite...
Generating insights...

=== INSIGHTS SUMMARY ===

Customer Segments:
         monthly_volume  risk_score  churned
Segment                                     
0                 24.39       25.37     0.20
1                 77.97       75.53     0.20
2                 25.33       74.56     0.19
3                 73.01       28.28     0.15

Risk Profiles:
High Risk Clients: 250

Recommendations:
- High Risk Alert: Immediate review required (250 clients)
- Growth Opportunity: Develop growth strategy (250 clients)

Summary Statistics:
Total Customers: 1000
Average Volume: £49.23M
Churn Rate: 18.80%

Creating dashboard...
Error in main execution: too many values to unpack (expected 2)


Traceback (most recent call last):
  File "<ipython-input-104-7fdecb465640>", line 58, in <cell line: 0>
    analytics_suite, insights = run_complete_analysis()
    ^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: too many values to unpack (expected 2)


In [None]:
class EnhancedDashboard:
    def __init__(self, analytics_suite=None):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.FLATLY],
                            suppress_callback_exceptions=True)
        self.error_status = {'last_error': None}
        self.cache = {}
        self.setup_logging()
        self.initialize_cache()
        self.setup_layout()
        self.setup_callbacks()

    def setup_logging(self):
        """Setup logging for the dashboard"""
        self.logger = logging.getLogger(__name__)
        if not self.logger.handlers:
            handler = logging.StreamHandler()
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
            )
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
            self.logger.setLevel(logging.INFO)

    def log_error(self, message, error):
        """Log error messages"""
        error_msg = f"{message}: {str(error)}"
        self.logger.error(error_msg)
        self.error_status['last_error'] = error_msg

    def calculate_risk_segments(self, df):
        """Calculate risk segments"""
        try:
            return {
                'high_risk': len(df[df['risk_score'] >= 75]),
                'medium_risk': len(df[(df['risk_score'] >= 25) & (df['risk_score'] < 75)]),
                'low_risk': len(df[df['risk_score'] < 25])
            }
        except Exception as e:
            self.log_error("Risk segment calculation failed", e)
            return {'high_risk': 0, 'medium_risk': 0, 'low_risk': 0}

    def initialize_cache(self):
        """Initialize cache with computed values"""
        try:
            if self.analytics_suite is not None:
                df = self.analytics_suite.data
                self.cache.update({
                    'total_volume': df['monthly_volume'].sum(),
                    'avg_risk': df['risk_score'].mean(),
                    'industry_counts': df['industry'].value_counts().to_dict(),
                    'risk_segments': self.calculate_risk_segments(df)
                })
                self.logger.info("Cache initialized successfully")
        except Exception as e:
            self.log_error("Cache initialization failed", e)

    def setup_layout(self):
        """Setup dashboard layout"""
        try:
            self.app.layout = dbc.Container([
                # Header
                dbc.Row([
                    dbc.Col([
                        html.H1("FX Sales Analytics Dashboard",
                               className="text-center mb-4")
                    ])
                ]),

                # Filters
                dbc.Row([
                    dbc.Col([
                        html.Label("Select Industry"),
                        dcc.Dropdown(
                            id='industry-filter',
                            options=[
                                {'label': str(x), 'value': str(x)}
                                for x in self.analytics_suite.data['industry'].unique()
                            ] if self.analytics_suite is not None else [],
                            value=None
                        )
                    ], width=6)
                ]),

                # KPI Cards
                dbc.Row([
                    dbc.Col([
                        dbc.Card([
                            dbc.CardBody([
                                html.H4("Total Clients"),
                                html.H2(id="kpi-total-clients", children="0")
                            ])
                        ])
                    ], width=3),
                    dbc.Col([
                        dbc.Card([
                            dbc.CardBody([
                                html.H4("Average Volume"),
                                html.H2(id="kpi-avg-volume", children="£0M")
                            ])
                        ])
                    ], width=3),
                    dbc.Col([
                        dbc.Card([
                            dbc.CardBody([
                                html.H4("Risk Score"),
                                html.H2(id="kpi-risk-score", children="0")
                            ])
                        ])
                    ], width=3),
                    dbc.Col([
                        dbc.Card([
                            dbc.CardBody([
                                html.H4("Churn Rate"),
                                html.H2(id="kpi-churn-rate", children="0%")
                            ])
                        ])
                    ], width=3)
                ], className="mb-4"),

                # Main Chart
                dbc.Row([
                    dbc.Col([
                        dcc.Graph(id='main-chart')
                    ])
                ])
            ], fluid=True)

            self.logger.info("Layout setup completed")
        except Exception as e:
            self.log_error("Layout setup failed", e)

    def setup_callbacks(self):
        """Setup dashboard callbacks"""
        try:
            @self.app.callback(
                [Output('kpi-total-clients', 'children'),
                 Output('kpi-avg-volume', 'children'),
                 Output('kpi-risk-score', 'children'),
                 Output('kpi-churn-rate', 'children'),
                 Output('main-chart', 'figure')],
                [Input('industry-filter', 'value')]
            )
            def update_dashboard(selected_industry):
                try:
                    df = self.analytics_suite.data
                    if selected_industry:
                        df = df[df['industry'].astype(str) == selected_industry]

                    # Calculate KPIs
                    total_clients = len(df)
                    avg_volume = df['monthly_volume'].mean()
                    risk_score = df['risk_score'].mean()
                    churn_rate = df['churned'].mean()

                    # Create main chart
                    fig = px.scatter(
                        df,
                        x='monthly_volume',
                        y='risk_score',
                        color='industry',
                        title='Volume vs Risk Analysis'
                    )

                    return (
                        f"{total_clients:,}",
                        f"£{avg_volume:.1f}M",
                        f"{risk_score:.1f}",
                        f"{churn_rate:.1%}",
                        fig
                    )
                except Exception as e:
                    self.log_error("Callback update failed", e)
                    return "0", "£0M", "0", "0%", {}

            self.logger.info("Callbacks setup completed")
        except Exception as e:
            self.log_error("Callback setup failed", e)

    def run(self):
        """Run the dashboard"""
        try:
            self.logger.info("Starting dashboard server...")
            self.app.run_server(debug=True)
        except Exception as e:
            self.log_error("Dashboard server failed to start", e)
            raise

def launch_dashboard(analytics_suite):
    """Launch the dashboard with error handling"""
    try:
        print("\nInitializing dashboard...")
        dashboard = EnhancedDashboard(analytics_suite=analytics_suite)

        print("\nDashboard is ready!")
        print("Access the dashboard at: http://127.0.0.1:8050")
        print("\nPress Ctrl+C to stop the dashboard")

        dashboard.run()

    except Exception as e:
        print(f"Error launching dashboard: {str(e)}")
        traceback.print_exc()

# Main execution
if __name__ == "__main__":
    try:
        # Run analysis
        print("Running analysis...")
        analytics_suite, insights = run_complete_analysis()

        if analytics_suite is not None:
            # Launch dashboard
            print("\nLaunching dashboard...")
            launch_dashboard(analytics_suite)
        else:
            print("Analysis failed, cannot launch dashboard.")

    except Exception as e:
        print(f"Error in main execution: {str(e)}")
        traceback.print_exc()

Running analysis...
Generating and processing data...
Running model pipeline...


ERROR:__main__:Error in model performance analysis: name 'accuracy_score' is not defined


Initializing analytics suite...
Generating insights...

=== INSIGHTS SUMMARY ===

Customer Segments:
         monthly_volume  risk_score  churned
Segment                                     
0                 24.39       25.37     0.20
1                 77.97       75.53     0.20
2                 25.33       74.56     0.19
3                 73.01       28.28     0.15

Risk Profiles:
High Risk Clients: 250

Recommendations:
- High Risk Alert: Immediate review required (250 clients)
- Growth Opportunity: Develop growth strategy (250 clients)

Summary Statistics:
Total Customers: 1000
Average Volume: £49.23M
Churn Rate: 18.80%

Creating dashboard...
Error in main execution: too many values to unpack (expected 2)


Traceback (most recent call last):
  File "<ipython-input-105-39653b0ffc72>", line 211, in <cell line: 0>
    analytics_suite, insights = run_complete_analysis()
    ^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: too many values to unpack (expected 2)


In [None]:
class AdvancedAnalyticsDashboard:
    def __init__(self, analytics_suite=None):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(
            __name__,
            external_stylesheets=[dbc.themes.FLATLY],
            suppress_callback_exceptions=True
        )

        # Initialize components
        self.setup_logging()
        self.initialize_cache()
        self.setup_ml_models()
        self.setup_layout()
        self.setup_callbacks()

    def setup_logging(self):
        """Setup enhanced logging system"""
        self.logger = logging.getLogger(__name__)
        handler = logging.StreamHandler()
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)
        self.logger.setLevel(logging.INFO)

    def setup_layout(self):
        """Setup advanced dashboard layout"""
        self.app.layout = dbc.Container([
            # Navigation Bar
            self.create_navbar(),

            # Main Content Area
            dbc.Row([
                # Sidebar
                dbc.Col([
                    self.create_sidebar()
                ], width=2),

                # Main Content
                dbc.Col([
                    dbc.Tabs([
                        # Predictive Analytics Tab
                        dbc.Tab([
                            self.create_predictive_tab()
                        ], label="Predictive Analytics"),

                        # Customer Segmentation Tab
                        dbc.Tab([
                            self.create_segmentation_tab()
                        ], label="Customer Segmentation"),

                        # Risk Analysis Tab
                        dbc.Tab([
                            self.create_risk_tab()
                        ], label="Risk Analysis"),

                        # Business Intelligence Tab
                        dbc.Tab([
                            self.create_bi_tab()
                        ], label="Business Intelligence"),

                        # Operations Tab
                        dbc.Tab([
                            self.create_operations_tab()
                        ], label="Operations")
                    ])
                ], width=10)
            ]),

            # Footer
            self.create_footer()

        ], fluid=True)

    def create_navbar(self):
        """Create navigation bar"""
        return dbc.Navbar(
            dbc.Container([
                dbc.NavbarBrand("Advanced FX Analytics", className="ms-2"),
                dbc.Nav([
                    dbc.NavItem(dbc.NavLink("Dashboard", href="#")),
                    dbc.NavItem(dbc.NavLink("Reports", href="#")),
                    dbc.NavItem(dbc.NavLink("Settings", href="#")),
                    dbc.NavItem(
                        dbc.Button("Export", color="success", className="ms-2")
                    ),
                    dbc.NavItem(
                        dbc.Button("Alerts", color="danger", className="ms-2")
                    )
                ]),
                dbc.NavbarToggler(id="navbar-toggler")
            ]),
            color="dark",
            dark=True
        )

    def create_sidebar(self):
        """Create sidebar with filters and controls"""
        return html.Div([
            html.H4("Filters", className="mb-3"),
            html.Label("Time Period"),
            dcc.DatePickerRange(
                id='date-range',
                start_date=datetime.now() - timedelta(days=30),
                end_date=datetime.now()
            ),
            html.Label("Industry", className="mt-3"),
            dcc.Dropdown(
                id='industry-filter',
                options=[{'label': x, 'value': x}
                        for x in self.analytics_suite.data['industry'].unique()],
                multi=True
            ),
            html.Label("Risk Level", className="mt-3"),
            dcc.RangeSlider(
                id='risk-slider',
                min=0,
                max=100,
                step=5,
                value=[0, 100]
            ),
            html.Hr(),
            html.H4("Quick Actions"),
            dbc.Button("Generate Report", color="primary", className="mt-2 w-100"),
            dbc.Button("Export Data", color="secondary", className="mt-2 w-100"),
            dbc.Button("Refresh", color="info", className="mt-2 w-100")
        ])

In [None]:
class RiskAnalyticsDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.DARKLY],
                            suppress_callback_exceptions=True)
        self.setup_layout()
        # Optional: Only call setup_callbacks if you have actual callbacks
        # self.setup_callbacks()  # Comment or remove this line if no callbacks

    def setup_callbacks(self):
        # Implement callbacks here if needed
        pass

In [None]:
from sklearn.metrics import roc_auc_score

In [None]:
import logging

logging.basicConfig(level=logging.ERROR)
logger = logging.getLogger(__name__)

def run_complete_analysis():
    try:
        # Your analysis logic here
        return analytics_suite, insights
    except Exception as e:
        logger.error(f"Error in analysis: {e}")
        raise

In [None]:
class AdvancedAnalyticsSuite:
    """Advanced Analytics Suite with multiple specialized dashboards"""
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.dashboards = {
            'risk': RiskAnalyticsDashboard(analytics_suite),
            'bi': BusinessIntelligenceDashboard(analytics_suite),
            'ops': OperationsDashboard(analytics_suite),
            'viz': AdvancedVisualizationDashboard(analytics_suite)
        }

    def launch_dashboard(self, dashboard_type='risk'):
        """Launch specific dashboard"""
        if dashboard_type in self.dashboards:
            self.dashboards[dashboard_type].run()
        else:
            raise ValueError(f"Dashboard type {dashboard_type} not found")

# Risk Analytics Dashboard
class RiskAnalyticsDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.DARKLY],
                            suppress_callback_exceptions=True)
        self.setup_layout()
        self.setup_callbacks()

    def setup_layout(self):
        self.app.layout = dbc.Container([
            dbc.Row([
                html.H1("Risk Analytics Dashboard",
                        className="text-center text-primary mb-4")
            ]),

            # Risk Overview Section
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Risk Matrix"),
                        dbc.CardBody([
                            dcc.Graph(id='risk-heatmap')
                        ])
                    ])
                ], width=8),

                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Risk Metrics"),
                        dbc.CardBody([
                            html.Div(id='risk-metrics')
                        ])
                    ])
                ], width=4)
            ]),

            # Detailed Analysis Section
            dbc.Row([
                dbc.Col([
                    dbc.Tabs([
                        dbc.Tab([
                            dcc.Graph(id='credit-risk-analysis')
                        ], label="Credit Risk"),

                        dbc.Tab([
                            dcc.Graph(id='market-risk-analysis')
                        ], label="Market Risk"),

                        dbc.Tab([
                            dcc.Graph(id='operational-risk-analysis')
                        ], label="Operational Risk")
                    ])
                ])
            ])
        ], fluid=True)

# Business Intelligence Dashboard
class BusinessIntelligenceDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.BOOTSTRAP],
                            suppress_callback_exceptions=True)
        self.setup_layout()
        self.setup_callbacks()

    def setup_layout(self):
        self.app.layout = dbc.Container([
            dbc.Row([
                html.H1("Business Intelligence Dashboard",
                        className="text-center mb-4")
            ]),

            # KPI Section
            dbc.Row([
                dbc.Col([
                    self.create_kpi_card("Revenue", "revenue-kpi")
                ], width=3),
                dbc.Col([
                    self.create_kpi_card("Volume", "volume-kpi")
                ], width=3),
                dbc.Col([
                    self.create_kpi_card("Growth", "growth-kpi")
                ], width=3),
                dbc.Col([
                    self.create_kpi_card("Risk", "risk-kpi")
                ], width=3)
            ]),

            # Analysis Section
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Trend Analysis"),
                        dbc.CardBody([
                            dcc.Graph(id='trend-analysis')
                        ])
                    ])
                ], width=8),

                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Insights"),
                        dbc.CardBody([
                            html.Div(id='automated-insights')
                        ])
                    ])
                ], width=4)
            ])
        ], fluid=True)

# Operations Dashboard
class OperationsDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.FLATLY],
                            suppress_callback_exceptions=True)
        self.setup_layout()
        self.setup_callbacks()

    def setup_layout(self):
        self.app.layout = dbc.Container([
            dbc.Row([
                html.H1("Operations Dashboard",
                        className="text-center mb-4")
            ]),

            # Operations Overview
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Active Alerts"),
                        dbc.CardBody([
                            html.Div(id='active-alerts')
                        ])
                    ])
                ], width=6),

                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Task Queue"),
                        dbc.CardBody([
                            html.Div(id='task-queue')
                        ])
                    ])
                ], width=6)
            ]),

            # Performance Metrics
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Performance Metrics"),
                        dbc.CardBody([
                            dcc.Graph(id='performance-metrics')
                        ])
                    ])
                ])
            ])
        ], fluid=True)

# Advanced Visualization Dashboard
class AdvancedVisualizationDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.CYBORG],
                            suppress_callback_exceptions=True)
        self.setup_layout()
        self.setup_callbacks()

    def setup_layout(self):
        self.app.layout = dbc.Container([
            dbc.Row([
                html.H1("Advanced Visualization Dashboard",
                        className="text-center mb-4")
            ]),

            # 3D Visualizations
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("3D Risk-Volume-Time Analysis"),
                        dbc.CardBody([
                            dcc.Graph(id='3d-analysis')
                        ])
                    ])
                ])
            ]),

            # Network Analysis
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Client Relationship Network"),
                        dbc.CardBody([
                            dcc.Graph(id='network-analysis')
                        ])
                    ])
                ])
            ])
        ], fluid=True)

# Usage Example
def launch_advanced_analytics():
    try:
        # Initialize base analytics
        analytics_suite, insights = run_complete_analysis()

        # Create advanced analytics suite
        advanced_suite = AdvancedAnalyticsSuite(analytics_suite)

        print("Available Dashboards:")
        print("1. Risk Analytics (risk)")
        print("2. Business Intelligence (bi)")
        print("3. Operations (ops)")
        print("4. Advanced Visualization (viz)")

        dashboard_type = input("Enter dashboard type to launch: ")
        advanced_suite.launch_dashboard(dashboard_type)

    except Exception as e:
        print(f"Error launching advanced analytics: {str(e)}")
        traceback.print_exc()

if __name__ == "__main__":
    launch_advanced_analytics()

Error launching advanced analytics: 'RiskAnalyticsDashboard' object has no attribute 'setup_callbacks'


Traceback (most recent call last):
  File "<ipython-input-110-6de85a4d3778>", line 232, in launch_advanced_analytics
    advanced_suite = AdvancedAnalyticsSuite(analytics_suite)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-110-6de85a4d3778>", line 6, in __init__
    'risk': RiskAnalyticsDashboard(analytics_suite),
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<ipython-input-110-6de85a4d3778>", line 27, in __init__
    self.setup_callbacks()
    ^^^^^^^^^^^^^^^^^^^^
AttributeError: 'RiskAnalyticsDashboard' object has no attribute 'setup_callbacks'


In [None]:
class RiskAnalyticsDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.DARKLY],
                            suppress_callback_exceptions=True)
        self.setup_layout()
        self.setup_callbacks()

    def setup_layout(self):
        self.app.layout = dbc.Container([
            # Layout code
        ], fluid=True)

    def setup_callbacks(self):
        """Define callbacks for the Risk Analytics Dashboard."""
        # You can leave it empty for now or add your callbacks here
        pass


class BusinessIntelligenceDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.BOOTSTRAP],
                            suppress_callback_exceptions=True)
        self.setup_layout()
        self.setup_callbacks()

    def setup_layout(self):
        self.app.layout = dbc.Container([
            # Layout code
        ], fluid=True)

    def setup_callbacks(self):
        """Define callbacks for the Business Intelligence Dashboard."""
        pass


class OperationsDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.FLATLY],
                            suppress_callback_exceptions=True)
        self.setup_layout()
        self.setup_callbacks()

    def setup_layout(self):
        self.app.layout = dbc.Container([
            # Layout code
        ], fluid=True)

    def setup_callbacks(self):
        """Define callbacks for the Operations Dashboard."""
        pass


class AdvancedVisualizationDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.CYBORG],
                            suppress_callback_exceptions=True)
        self.setup_layout()
        self.setup_callbacks()

    def setup_layout(self):
        self.app.layout = dbc.Container([
            # Layout code
        ], fluid=True)

    def setup_callbacks(self):
        """Define callbacks for the Advanced Visualization Dashboard."""
        pass

In [None]:
def setup_callbacks(self):
    """Define the callbacks for the dashboard."""
    pass

In [None]:
class RiskAnalyticsDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.DARKLY],
                            suppress_callback_exceptions=True)
        self.setup_layout()
        self.setup_callbacks()  # This now refers to the defined method

    def setup_layout(self):
        self.app.layout = dbc.Container([
            dbc.Row([
                html.H1("Risk Analytics Dashboard",
                        className="text-center text-primary mb-4")
            ]),

            # Risk Overview Section
            dbc.Row([
                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Risk Matrix"),
                        dbc.CardBody([
                            dcc.Graph(id='risk-heatmap')
                        ])
                    ])
                ], width=8),

                dbc.Col([
                    dbc.Card([
                        dbc.CardHeader("Risk Metrics"),
                        dbc.CardBody([
                            html.Div(id='risk-metrics')
                        ])
                    ])
                ], width=4)
            ]),

            # Detailed Analysis Section
            dbc.Row([
                dbc.Col([
                    dbc.Tabs([
                        dbc.Tab([
                            dcc.Graph(id='credit-risk-analysis')
                        ], label="Credit Risk"),

                        dbc.Tab([
                            dcc.Graph(id='market-risk-analysis')
                        ], label="Market Risk"),

                        dbc.Tab([
                            dcc.Graph(id='operational-risk-analysis')
                        ], label="Operational Risk")
                    ])
                ])
            ])
        ], fluid=True)

    def setup_callbacks(self):
        """Define the callbacks for the dashboard."""
        # Add your Dash callback definitions here
        pass

In [None]:
class RiskAnalyticsDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.DARKLY],
                            suppress_callback_exceptions=True)
        self.setup_layout()

    def setup_layout(self):
        # Existing layout code remains the same
        pass

    def setup_callbacks(self):
        # Optional method for adding callbacks
        pass

class BusinessIntelligenceDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.BOOTSTRAP],
                            suppress_callback_exceptions=True)
        self.setup_layout()

    def setup_layout(self):
        # Existing layout code remains the same
        pass

    def setup_callbacks(self):
        # Optional method for adding callbacks
        pass

class OperationsDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.FLATLY],
                            suppress_callback_exceptions=True)
        self.setup_layout()

    def setup_layout(self):
        # Existing layout code remains the same
        pass

    def setup_callbacks(self):
        # Optional method for adding callbacks
        pass

class AdvancedVisualizationDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.CYBORG],
                            suppress_callback_exceptions=True)
        self.setup_layout()

    def setup_layout(self):
        pass

    def setup_callbacks(self):
        pass

In [None]:
class AdvancedAnalyticsSuite:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.dashboards = {
            'risk': RiskAnalyticsDashboard(analytics_suite),
            'bi': BusinessIntelligenceDashboard(analytics_suite),
            'ops': OperationsDashboard(analytics_suite),
            'viz': AdvancedVisualizationDashboard(analytics_suite)
        }

    def launch_dashboard(self, dashboard_type='risk'):
        if dashboard_type in self.dashboards:
            self.dashboards[dashboard_type].run()  # or appropriate method to launch
        else:
            raise ValueError(f"Dashboard type {dashboard_type} not found")

In [None]:
def launch_advanced_analytics():
    try:
        # Initialize base analytics
        analytics_suite, insights = run_complete_analysis()

        # Create advanced analytics suite
        advanced_suite = AdvancedAnalyticsSuite(analytics_suite)

        print("Available Dashboards:")
        print("1. Risk Analytics (risk)")
        print("2. Business Intelligence (bi)")
        print("3. Operations (ops)")
        print("4. Advanced Visualization (viz)")

        dashboard_type = input("Enter dashboard type to launch: ")
        advanced_suite.launch_dashboard(dashboard_type)

    except Exception as e:
        print(f"Error launching advanced analytics: {str(e)}")
        traceback.print_exc()

In [None]:
import traceback
import dash
import dash_bootstrap_components as dbc
import dash_core_components as dcc
import dash_html_components as html

def launch_advanced_analytics():
    try:
        analytics_suite = {}

        advanced_suite = AdvancedAnalyticsSuite(analytics_suite)

        print("Available Dashboards:")
        print("1. Risk Analytics (risk)")
        print("2. Business Intelligence (bi)")
        print("3. Operations (ops)")
        print("4. Advanced Visualization (viz)")

        dashboard_type = input("Enter dashboard type to launch: ").lower()

        # Modify to use .app.run() instead of .run()
        if dashboard_type in advanced_suite.dashboards:
            dashboard = advanced_suite.dashboards[dashboard_type]
            dashboard.app.run_server(debug=True)
        else:
            raise ValueError(f"Dashboard type {dashboard_type} not found")

    except Exception as e:
        print(f"Error launching advanced analytics: {str(e)}")
        traceback.print_exc()

class RiskAnalyticsDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.DARKLY],
                            suppress_callback_exceptions=True)
        self.setup_layout()

    def setup_layout(self):
        pass

In [None]:
class RiskAnalyticsDashboard:
    def __init__(self, analytics_suite):
        self.analytics_suite = analytics_suite
        self.app = dash.Dash(__name__,
                            external_stylesheets=[dbc.themes.DARKLY],
                            suppress_callback_exceptions=True)
        self.setup_layout()

    def setup_layout(self):
        pass

    def setup_callbacks(self):
        pass