In [None]:
import os
import sys
import glob 
import logging
import multiprocessing
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets
import numpy as np
import pandas as pd

# Add the Code directory to Python path
notebook_path = os.getcwd()
code_dir = os.path.dirname(notebook_path)
if code_dir not in sys.path:
    sys.path.append(code_dir)

from pipelines.clustering_pipeline import run_clustering
from pipelines.classification_pipeline import run_classification
from pipelines.regression_pipeline import run_regression
from pipelines.risk_pipeline import run_risk_assessment



In [None]:
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(name)s %(message)s",
    handlers=[logging.StreamHandler()]
)

# Verify CMAPSS data directory exists
def validate_data_path(data_path):
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"Data directory not found: {data_path}")
    required_files = []
    for dataset in ['FD001', 'FD002', 'FD003', 'FD004']:
        required_files.extend([
            os.path.join(data_path, 'train', f'train_{dataset}.txt'),
            os.path.join(data_path, 'test', f'test_{dataset}.txt')
        ])
    missing_files = [f for f in required_files if not os.path.exists(f)]
    if missing_files:
        raise FileNotFoundError(f"Missing required files: {missing_files}")

In [None]:
def create_interactive_controls():
    phase_widget = widgets.Dropdown(
        options=[
            ('All Phases', 0),
            ('Clustering Only', 1),
            ('Classification Only', 2), 
            ('Regression Only', 3),
            ('Risk Assessment Only', 4)
        ],
        value=0,
        description='Phase:',
        style={'description_width': 'initial'}
    )

    dataset_widget = widgets.SelectMultiple(
        options=['FD001', 'FD002', 'FD003', 'FD004'],
        value=['FD001', 'FD003'],
        description='Datasets:',
        style={'description_width': 'initial'}
    )

    n_jobs_widget = widgets.IntSlider(
        value=multiprocessing.cpu_count()-1,
        min=1,
        max=multiprocessing.cpu_count(),
        description='Parallel Jobs:',
        style={'description_width': 'initial'}
    )

    vis_type_widget = widgets.RadioButtons(
        options=[('2D Visualizations', 1), 
                ('3D Visualizations', 2), 
                ('Both', 3)],
        description='Visualization:',
        style={'description_width': 'initial'}
    )

    return phase_widget, dataset_widget, n_jobs_widget, vis_type_widget

phase_widget, dataset_widget, n_jobs_widget, vis_type_widget = create_interactive_controls()

In [None]:
def plot_clustering_results(cluster_results, vis_type):
    try:
        if vis_type in [1, 3]:
            # 2D plots
            plt.figure(figsize=(15, 5))
            plt.subplot(121)
            sns.scatterplot(data=cluster_results['data'], 
                           x='time_cycles', y='sensor1', 
                           hue='cluster', palette='viridis')
            plt.title('Sensor 1 Readings by Cluster')
            
            plt.subplot(122)
            sns.boxplot(data=cluster_results['data'], 
                       x='cluster', y='RUL', palette='viridis')
            plt.title('RUL Distribution by Cluster')
            plt.show()

        if vis_type in [2, 3]:
            # 3D plot
            fig = go.Figure(data=[go.Scatter3d(
                x=cluster_results['data']['sensor1'],
                y=cluster_results['data']['sensor2'],
                z=cluster_results['data']['sensor3'],
                mode='markers',
                marker=dict(
                    size=5,
                    color=cluster_results['data']['cluster'],
                    colorscale='Viridis',
                )
            )])
            fig.update_layout(title='3D Cluster Distribution')
            fig.show()
    except Exception as e:
        print(f"Error in clustering visualization: {str(e)}")



In [None]:
def plot_classification_results(classification_results, vis_type):
    try:
        if vis_type in [1, 3]:
            # Confusion Matrix
            plt.figure(figsize=(10, 8))
            sns.heatmap(classification_results['confusion_matrix'], 
                       annot=True, fmt='d', cmap='Blues')
            plt.title('Confusion Matrix')
            plt.show()
            
            # Feature Importance
            plt.figure(figsize=(12, 6))
            feature_imp = pd.Series(classification_results['feature_importance'])
            sns.barplot(x=feature_imp.index, y=feature_imp.values)
            plt.xticks(rotation=45)
            plt.title('Feature Importance')
            plt.tight_layout()
            plt.show()

        if vis_type in [2, 3]:
            # 3D Decision Boundary
            fig = px.scatter_3d(classification_results['feature_space'],
                              x='PC1', y='PC2', z='PC3',
                              color='predicted_class',
                              title='3D Decision Boundary')
            fig.show()
    except Exception as e:
        print(f"Error in classification visualization: {str(e)}")



In [None]:
def plot_regression_results(regression_results, vis_type):
    try:
        if vis_type in [1, 3]:
            plt.figure(figsize=(15, 5))
            # Actual vs Predicted
            plt.subplot(121)
            plt.scatter(regression_results['actual'], 
                       regression_results['predicted'],
                       alpha=0.5)
            plt.plot([0, max(regression_results['actual'])],
                     [0, max(regression_results['actual'])],
                     'r--')
            plt.xlabel('Actual RUL')
            plt.ylabel('Predicted RUL')
            plt.title('Actual vs Predicted RUL')
            
            # Error Distribution
            plt.subplot(122)
            errors = regression_results['predicted'] - regression_results['actual']
            sns.histplot(errors, kde=True)
            plt.title('Prediction Error Distribution')
            plt.show()

        if vis_type in [2, 3]:
            # 3D Time Series
            fig = go.Figure(data=[go.Scatter3d(
                x=regression_results['time'],
                y=regression_results['sensor_values'],
                z=regression_results['predictions'],
                mode='markers',
                marker=dict(
                    size=5,
                    color=regression_results['rul'],
                    colorscale='Viridis',
                )
            )])
            fig.update_layout(title='3D RUL Prediction Visualization')
            fig.show()
    except Exception as e:
        print(f"Error in regression visualization: {str(e)}")

In [None]:
def run_pipeline_interactive(b):
    try:
        clear_output(wait=True)
        class Args:
            def __init__(self):
                self.phase = None
                self.datasets = None
                self.n_jobs = None
                self.data_path = None  # You'll need to set this value appropriately

        args = Args()
        
        args.phase = phase_widget.value
        args.datasets = ','.join(dataset_widget.value)
        args.n_jobs = n_jobs_widget.value
        vis_type = vis_type_widget.value
        
        if not args.datasets:
            raise ValueError("Please select at least one dataset")
            
        print(f"Running pipeline with:")
        print(f"Phase: {args.phase}")
        print(f"Datasets: {args.datasets}")
        print(f"Parallel Jobs: {args.n_jobs}")
        
        validate_data_path(args.data_path)
        
        # Run selected phases
        cluster_results = None
        if args.phase == 0 or args.phase == 1:
            print("\nRunning Phase 1: Clustering...")
            cluster_results = run_clustering(args.data_path)
            if cluster_results:
                plot_clustering_results(cluster_results, vis_type)
        
        if args.phase == 0 or args.phase == 2:
            print("\nRunning Phase 2: Classification...")
            classification_results = run_classification(
                args.data_path,
                cluster_results if args.phase == 0 else None,
                n_jobs=args.n_jobs
            )
            if classification_results:
                plot_classification_results(classification_results, vis_type)
        
        if args.phase == 0 or args.phase == 3:
            print("\nRunning Phase 3: Regression...")
            regression_results = run_regression(
                datasets=args.datasets.split(','),
                classifier='random_forest',
                base_path=os.path.dirname(args.data_path),
                n_jobs=args.n_jobs
            )
            if regression_results:
                plot_regression_results(regression_results, vis_type)
        
        print("\nPipeline execution complete!")
        
    except Exception as e:
        print(f"Error running pipeline: {str(e)}")
    finally:
        plt.close('all')  # Cleanup plots

# Create and display run button with configuration panel
run_button = widgets.Button(
    description='Run Pipeline',
    button_style='success',
    tooltip='Click to run the pipeline'
)
run_button.on_click(run_pipeline_interactive)

# Display all controls
display(widgets.VBox([
    widgets.HTML("<h3>Pipeline Configuration</h3>"),
    phase_widget,
    dataset_widget,
    n_jobs_widget,
    vis_type_widget,
    run_button
]))