In [10]:
from main import *

In [11]:
def launch_demo(width='100%', height=1000, path="", scale=0.7):
    print("🎯 Launching Master Thesis Demo Application")
    print("=" * 50)
    
    # Clean up existing processes on target ports
    cleanup_ports([8000, 8080])
    
    # Find available ports
    try:
        api_port = find_available_port(8000)
        client_port = find_available_port(8080)
        print(f"🔍 Found available ports - API: {api_port}, Client: {client_port}")
    except RuntimeError as e:
        print(f"❌ {e}")
        return
    
    # Reconfigure CORS with dynamic ports
    try:
        reconfigure_cors(api_port, client_port)
    except Exception as e:
        print(f"⚠️  Warning: Could not reconfigure CORS: {e}")
    
    # Get host configuration
    host = os.environ.get("HOST", "0.0.0.0")
    
    print(f"📡 Starting API server on {host}:{api_port}")
    
    # Start server in background thread
    server_thread, actual_api_port = start_server_thread(host, api_port)
    
    # Wait for server to be ready
    if not wait_for_server("localhost", actual_api_port):
        print("❌ Failed to start API server. Exiting...")
        return
    
    # Start client
    client_process, actual_client_port = start_client(client_port)
    if not client_process or actual_client_port is None:
        print("❌ Failed to start client. Exiting...")
        return
    
    # Wait a moment for the client to start
    print("⏳ Waiting for client to start...")
    time.sleep(3)
    
    print("\n" + "=" * 50)
    print("✅ Demo application started successfully!")
    print(f"📡 API: http://localhost:{actual_api_port}")
    print(f"🌐 Client: http://localhost:{actual_client_port}")
    print("💡 If ports were busy, alternative ports were automatically selected")
    print("=" * 50)
    
    # Show the client in the notebook
    show_localhost_content(actual_client_port, width, height, scale)

In [12]:
def setup_custom_explanation_system(dataset_df, classifier, target_column='target'):
    print("🎯 Setting up Custom Dataset Explanation System")
    print("=" * 50)
    
    # Clean up existing processes on target ports first
    cleanup_ports([8000, 8080])
    
    try:
        import pandas as pd
        import numpy as np
        from sklearn.model_selection import train_test_split
        from sklearn.pipeline import make_pipeline
        from sklearn.preprocessing import StandardScaler, OrdinalEncoder
        from sklearn.compose import ColumnTransformer
        from lore_sa.dataset import TabularDataset
        from lore_sa.bbox import sklearn_classifier_bbox
        from pythonHelpers.routes.state import global_state
        
        # Validate inputs
        print("📊 Validating custom dataset...")
        if dataset_df.empty:
            raise ValueError("Dataset is empty")
        if target_column not in dataset_df.columns:
            raise ValueError(f"Target column '{target_column}' not found in dataset")
        if len(dataset_df.columns) < 2:
            raise ValueError("Dataset must have at least 2 columns (features + target)")
            
        print("🤖 Validating custom classifier...")
        required_methods = ['predict', 'predict_proba']
        for method in required_methods:
            if not hasattr(classifier, method):
                raise ValueError(f"Classifier must have '{method}' method")
        
        # Test classifier on sample
        feature_cols = [col for col in dataset_df.columns if col != target_column]
        X_sample = dataset_df[feature_cols].iloc[:1]
        try:
            prediction = classifier.predict(X_sample)
            prob_prediction = classifier.predict_proba(X_sample)
        except Exception as e:
            raise ValueError(f"Classifier validation failed: {str(e)}")
        
        print("🔄 Converting dataset to LORE format...")
        
        # Create data dictionary for TabularDataset
        data_dict = {}
        for col in dataset_df.columns:
            if col != target_column:
                series = dataset_df[col]
                if pd.api.types.is_numeric_dtype(series):
                    data_dict[col] = series.values
                else:
                    data_dict[col] = series.astype(str).values
            else:
                # Convert target to string labels
                data_dict[target_column] = dataset_df[col].astype(str).values
        
        # Create TabularDataset
        dataset = TabularDataset.from_dict(data_dict, target_column)
        dataset.df.dropna(inplace=True)
        
        print("📦 Setting up classifier bbox...")
        
        # Create preprocessor for the classifier
        numeric_indices = [v['index'] for v in dataset.descriptor['numeric'].values()]
        categorical_indices = [v['index'] for v in dataset.descriptor['categorical'].values()]
        
        preprocessor = ColumnTransformer([
            ('num', StandardScaler(), numeric_indices),
            ('cat', OrdinalEncoder(), categorical_indices)
        ])
        
        # Create pipeline with preprocessor and classifier
        model = make_pipeline(preprocessor, classifier)
        
        # Prepare training data for fitting the preprocessor
        feature_indices = numeric_indices + categorical_indices
        X = dataset.df.iloc[:, feature_indices]
        y = dataset.df[target_column]
        
        # Split for training (we need to fit the preprocessor)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.3, random_state=42, stratify=y
        )
        
        # Fit the pipeline (this will fit the preprocessor)
        model.fit(X_train, y_train)
        
        # Create LORE-compatible bbox
        bbox = sklearn_classifier_bbox.sklearnBBox(model)
        
        print("💾 Updating global state...")
        
        # Update global state to match what training normally does
        global_state.bbox = bbox
        global_state.dataset = dataset  
        global_state.X_train = X_train
        global_state.y_train = y_train
        global_state.X_test = X_test
        global_state.y_test = y_test
        global_state.descriptor = dataset.descriptor
        global_state.feature_names = feature_cols
        global_state.target_names = sorted(dataset_df[target_column].unique().tolist())
        global_state.dataset_name = "Custom Dataset"
        
        print("✅ Custom dataset and classifier loaded successfully!")
        print(f"📈 Dataset: {len(dataset_df)} samples, {len(feature_cols)} features")  
        print(f"🏷️  Classes: {global_state.target_names}")
        print(f"📊 Feature types: {len(dataset.descriptor.get('numeric', {}))} numeric, {len(dataset.descriptor.get('categorical', {}))} categorical")
        print("=" * 50)
        print("🌐 Custom data is now loaded!")
        print("👉 Go to localhost:8080 in your browser and refresh the page")
        print("🎯 You should see the feature inputs directly (skipping dataset selection)")
        
    except Exception as e:
        print(f"❌ Error setting up custom explanation system: {str(e)}")
        raise

In [13]:
def launch_demo_with_custom_data(width='100%', height=1000, scale=0.7):
    print("🚀 Launching demo with custom data loaded...")
    
    # Clean up existing processes on target ports first
    cleanup_ports([8000, 8080])
    
    # Start the normal demo but with a flag to indicate custom data is loaded
    os.environ["CUSTOM_DATA_LOADED"] = "true"
    launch_demo(width=width, height=height, scale=scale)

In [14]:
# launch_demo()

In [15]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

# Example with iris dataset
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target

# Split and train
from sklearn.model_selection import train_test_split
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

In [16]:
setup_custom_explanation_system(df, clf, target_column='target')

🎯 Setting up Custom Dataset Explanation System
🧹 Cleaning up processes on target ports...
🔍 Checking port 8000...
⚠️  Error finding processes on port 8000: invalid attr name 'connections'
✅ No processes found running on port 8000
🔍 Checking port 8080...
⚠️  Error finding processes on port 8080: invalid attr name 'connections'
✅ No processes found running on port 8080
✅ Port cleanup completed successfully
📊 Validating custom dataset...
🤖 Validating custom classifier...
🔄 Converting dataset to LORE format...
📦 Setting up classifier bbox...
💾 Updating global state...
✅ Custom dataset and classifier loaded successfully!
📈 Dataset: 150 samples, 4 features
🏷️  Classes: [0, 1, 2]
📊 Feature types: 4 numeric, 0 categorical
🌐 Custom data is now loaded!
👉 Go to localhost:8080 in your browser and refresh the page
🎯 You should see the feature inputs directly (skipping dataset selection)


In [17]:
from pythonHelpers.routes.state import global_state

print(f"\nGlobal state after setup:")
print(f"Dataset descriptor: {global_state.dataset.descriptor}")
print(f"Feature names: {global_state.feature_names}")


Global state after setup:
Dataset descriptor: {'numeric': {'sepal length (cm)': {'index': 0, 'min': 4.3, 'max': 7.9, 'mean': 5.843333333333334, 'std': 0.828066127977863, 'median': 5.8, 'q1': 5.1, 'q3': 6.4}, 'sepal width (cm)': {'index': 1, 'min': 2.0, 'max': 4.4, 'mean': 3.0573333333333337, 'std': 0.4358662849366982, 'median': 3.0, 'q1': 2.8, 'q3': 3.3}, 'petal length (cm)': {'index': 2, 'min': 1.0, 'max': 6.9, 'mean': 3.7580000000000005, 'std': 1.7652982332594662, 'median': 4.35, 'q1': 1.6, 'q3': 5.1}, 'petal width (cm)': {'index': 3, 'min': 0.1, 'max': 2.5, 'mean': 1.1993333333333336, 'std': 0.7622376689603465, 'median': 1.3, 'q1': 0.3, 'q3': 1.8}}, 'categorical': {}, 'ordinal': {}, 'target': {'target': {'index': 4, 'distinct_values': ['0', '1', '2'], 'count': {'0': 50, '1': 50, '2': 50}}}}
Feature names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [18]:
# launch_demo_with_custom_data()
launch_demo()

🎯 Launching Master Thesis Demo Application
🧹 Cleaning up processes on target ports...
🔍 Checking port 8000...
⚠️  Error finding processes on port 8000: invalid attr name 'connections'
✅ No processes found running on port 8000
🔍 Checking port 8080...
⚠️  Error finding processes on port 8080: invalid attr name 'connections'
✅ No processes found running on port 8080
✅ Port cleanup completed successfully
🔍 Found available ports - API: 8000, Client: 8080
✅ CORS configured for origins: ['http://localhost:8080', 'http://127.0.0.1:8080', '*']
📡 Starting API server on 0.0.0.0:8000


INFO:     Started server process [2280]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 10048] error while attempting to bind on address ('0.0.0.0', 8000): only one usage of each socket address (protocol/network address/port) is normally permitted
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


✅ API server is ready at http://localhost:8000
🚀 Starting client application on port 8080...
⏳ Waiting for client to start...

✅ Demo application started successfully!
📡 API: http://localhost:8000
🌐 Client: http://localhost:8080
💡 If ports were busy, alternative ports were automatically selected
