In [29]:
import sys, os
import logging
import argparse
import multiprocessing
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display, HTML

# Add the Code directory to Python path
notebook_path = os.getcwd()
code_dir = os.path.dirname(notebook_path)
if code_dir not in sys.path:
    sys.path.append(code_dir)

from pipelines.clustering_pipeline import run_clustering
from pipelines.classification_pipeline import run_classification
from pipelines.regression_pipeline import run_regression
from pipelines.risk_pipeline import run_risk_assessment

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(name)s %(message)s",
    handlers=[logging.StreamHandler()]
)



In [30]:
def select_visualization_type():
    print("Select visualization type:")
    print("1. 2D Visualizations")
    print("2. 3D Visualizations")
    print("3. Both")
    return int(input("Enter your choice (1-3): "))

def plot_clustering_results(cluster_results):
    vis_type = select_visualization_type()
    
    if vis_type in [1, 3]:
        # 2D plots
        plt.figure(figsize=(15, 5))
        plt.subplot(131)
        sns.scatterplot(data=cluster_results['data'], x='cycle', y='sensor1', hue='cluster')
        plt.title('Sensor 1 Degradation Pattern')
        
        plt.subplot(132)
        sns.boxplot(data=cluster_results['data'], x='cluster', y='rul')
        plt.title('RUL Distribution by Cluster')

    if vis_type in [2, 3]:
        # 3D plot
        fig = go.Figure(data=[go.Scatter3d(
            x=cluster_results['data']['sensor1'],
            y=cluster_results['data']['sensor2'],
            z=cluster_results['data']['sensor3'],
            mode='markers',
            marker=dict(
                size=5,
                color=cluster_results['data']['cluster'],
                colorscale='Viridis',
            )
        )])
        fig.update_layout(title='3D Cluster Distribution')
        fig.show()

In [31]:
import os
notebook_path = os.getcwd()
code_dir = os.path.dirname(notebook_path)

In [32]:
class Args:
    phase = 0  # 0=all, 1=clustering, 2=classification, 3=regression, 4=risk
    data_path = os.path.join(os.path.dirname(os.getcwd()), 'Code', 'data')  # Use absolute path
    datasets = "FD001,FD003"
    n_jobs = None  # Will be auto-calculated

args = Args()
print(f"Data path: {args.data_path}")  # Print to verify the path

Data path: c:\Users\shriv.SHRI\Documents\GitHub\G_AI096_AI115_ECM025_CSE223\Code\data


In [33]:
import glob
print("Available files in data directory:")
print(glob.glob(os.path.join(args.data_path, 'train', '*.txt')))
print(glob.glob(os.path.join(args.data_path, 'test', '*.txt')))

Available files in data directory:
['c:\\Users\\shriv.SHRI\\Documents\\GitHub\\G_AI096_AI115_ECM025_CSE223\\Code\\data\\train\\train_FD001.txt', 'c:\\Users\\shriv.SHRI\\Documents\\GitHub\\G_AI096_AI115_ECM025_CSE223\\Code\\data\\train\\train_FD002.txt', 'c:\\Users\\shriv.SHRI\\Documents\\GitHub\\G_AI096_AI115_ECM025_CSE223\\Code\\data\\train\\train_FD003.txt', 'c:\\Users\\shriv.SHRI\\Documents\\GitHub\\G_AI096_AI115_ECM025_CSE223\\Code\\data\\train\\train_FD004.txt']
['c:\\Users\\shriv.SHRI\\Documents\\GitHub\\G_AI096_AI115_ECM025_CSE223\\Code\\data\\test\\test_FD001.txt', 'c:\\Users\\shriv.SHRI\\Documents\\GitHub\\G_AI096_AI115_ECM025_CSE223\\Code\\data\\test\\test_FD002.txt', 'c:\\Users\\shriv.SHRI\\Documents\\GitHub\\G_AI096_AI115_ECM025_CSE223\\Code\\data\\test\\test_FD003.txt', 'c:\\Users\\shriv.SHRI\\Documents\\GitHub\\G_AI096_AI115_ECM025_CSE223\\Code\\data\\test\\test_FD004.txt']


In [34]:
# Determine number of parallel jobs
if args.n_jobs is None:
    n_jobs = max(1, multiprocessing.cpu_count() - 1)
else:
    n_jobs = args.n_jobs

# Parse datasets
dataset_ids = args.datasets.split(',') if args.datasets else ["FD001", "FD003"]

print(f"Using {n_jobs} parallel jobs for processing")
print(f"Processing datasets: {dataset_ids}")


Using 7 parallel jobs for processing
Processing datasets: ['FD001', 'FD003']


In [35]:
cluster_results = None
if args.phase == 0 or args.phase == 1:
    print("Running Phase 1: Clustering for degradation stages...")
    cluster_results = run_clustering(args.data_path)  # Remove n_jobs parameter
    
    if cluster_results is not None:
        plot_clustering_results(cluster_results)

Running Phase 1: Clustering for degradation stages...
Loading dataset FD001...
Loaded 20631 training samples and 13096 test samples
Preprocessing data...


KeyboardInterrupt: 

In [None]:
classification_results = None
if args.phase == 0 or args.phase == 2:
    print("Running Phase 2: Classification for degradation stage prediction...")
    classification_results = run_classification(
        args.data_path,
        cluster_results if args.phase == 0 else None,
        n_jobs=n_jobs
    )


In [None]:
def plot_classification_results(classification_results):
    vis_type = select_visualization_type()
    
    if vis_type in [1, 3]:
        # Plot confusion matrix
        plt.figure(figsize=(10, 8))
        sns.heatmap(classification_results['confusion_matrix'], 
                   annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.show()
        
        # Plot feature importance
        plt.figure(figsize=(12, 6))
        sns.barplot(x=classification_results['feature_importance'].index,
                   y=classification_results['feature_importance'].values)
        plt.xticks(rotation=45)
        plt.title('Feature Importance')
        plt.show()

    if vis_type in [2, 3]:
        # 3D Decision Boundary (if available)
        if 'decision_boundary' in classification_results:
            fig = go.Figure(data=go.Volume(
                x=classification_results['decision_boundary']['x'],
                y=classification_results['decision_boundary']['y'],
                z=classification_results['decision_boundary']['z'],
                value=classification_results['decision_boundary']['pred'],
                opacity=0.1,
                surface_count=20,
            ))
            fig.update_layout(title='3D Decision Boundary')
            fig.show()

In [None]:
regression_results = None
if args.phase == 0 or args.phase == 3:
    print("Running Phase 3: Regression model for time-to-failure prediction...")
    regression_results = run_regression(
        datasets=dataset_ids,
        classifier='random_forest',
        base_path=os.path.dirname(args.data_path),
        n_jobs=n_jobs
    )


In [None]:
def plot_regression_results(regression_results):
    vis_type = select_visualization_type()
    
    if vis_type in [1, 3]:
        # Actual vs Predicted
        plt.figure(figsize=(10, 6))
        sns.scatterplot(x=regression_results['actual'], 
                       y=regression_results['predicted'])
        plt.plot([0, max(regression_results['actual'])], 
                 [0, max(regression_results['actual'])], 
                 'r--')
        plt.xlabel('Actual RUL')
        plt.ylabel('Predicted RUL')
        plt.title('Actual vs Predicted RUL')
        plt.show()

    if vis_type in [2, 3]:
        # 3D Time Series
        fig = go.Figure(data=[go.Scatter3d(
            x=regression_results['time'],
            y=regression_results['sensor_values'],
            z=regression_results['predictions'],
            mode='markers',
            marker=dict(
                size=5,
                color=regression_results['rul'],
                colorscale='Viridis',
            )
        )])
        fig.update_layout(title='3D RUL Prediction Visualization')
        fig.show()

In [None]:
def plot_risk_assessment(risk_results):
    vis_type = select_visualization_type()
    
    if vis_type in [1, 3]:
        # Risk Distribution
        plt.figure(figsize=(15, 5))
        plt.subplot(131)
        sns.histplot(data=risk_results['risk_scores'])
        plt.title('Risk Score Distribution')
        
        plt.subplot(132)
        sns.barplot(x='category', y='count', 
                   data=risk_results['risk_categories'])
        plt.title('Risk Categories')

    if vis_type in [2, 3]:
        # 3D Risk Surface
        fig = go.Figure(data=[go.Surface(
            z=risk_results['risk_surface'],
            colorscale='RdYlGn_r'
        )])
        fig.update_layout(title='3D Risk Surface')
        fig.show()

In [None]:
print("Pipeline execution complete!")
