In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.fft import fft
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from scipy import signal

Error in callback <function _enable_matplotlib_integration.<locals>.configure_once at 0x000002280A520220> (for post_run_cell), with arguments args (<ExecutionResult object at 2280a4d1310, execution_count=19 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 2280a4d1580, raw_cell="import numpy as np
import pandas as pd
import matp.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell:/c%3A/Users/Ahmed%20Ayaz/Vibration-Analysis-System/working_notebooks/Phase3_Feature_Engineering.ipynb#W0sZmlsZQ%3D%3D> result=None>,),kwargs {}:


AttributeError: module 'matplotlib' has no attribute 'backends'

In [10]:
data_path_1 = Path("../datasets/1st_test")
files_1 = list(data_path_1.glob('*'))
print("num of files in set 1:", len(files_1))

num of files in set 1: 2156


In [11]:
data_path_2 = Path("../datasets/2nd_test")
files_2 = list(data_path_2.glob('*'))
print("num of files in set 2:", len(files_2))

num of files in set 2: 984


In [12]:
# Update path to match your folder structure
data_path_3 = Path("../datasets/3rd_test")
files_3 = sorted(list(data_path_3.glob('*')))

print("First file:", files_3[0].name)
print("Last file:", files_3[-1].name)
print("Total files in set 3:", len(files_3))

# Let's check if files are unique
unique_files = set(files_3)
print("\nUnique files:", len(unique_files))

First file: 2004.03.04.09.27.46
Last file: 2004.04.18.02.42.55
Total files in set 3: 6324

Unique files: 6324


In [14]:
# Cell 2: Key Analysis Functions from Phase 2
def analyze_frequency_bands(data, channel, sampling_rate=20000):
    """Analyze vibration data in three frequency bands"""
    channel_data = data[:, channel]
    
    bands = {
        'low': (20, 1000),    # Basic rotation band
        'mid': (1000, 3000),  # Early warning band
        'high': (3000, 5000)  # Damage zone band
    }
    
    filtered_signals = {}
    
    for band_name, (low_freq, high_freq) in bands.items():
        nyquist = sampling_rate / 2
        low_normalized = low_freq / nyquist
        high_normalized = high_freq / nyquist
        
        sos = signal.butter(4, [low_normalized, high_normalized], 
                          btype='bandpass', output='sos')
        
        filtered_signals[band_name] = signal.sosfilt(sos, channel_data)
    
    return filtered_signals

def analyze_signal_characteristics(filtered_bands):
    """Calculate key metrics for each frequency band"""
    analysis = {}
    
    for band_name, signal_data in filtered_bands.items():
        analysis[band_name] = {
            'RMS': np.sqrt(np.mean(signal_data**2)),
            'Peak': np.max(np.abs(signal_data)),
            'Crest': np.max(np.abs(signal_data)) / np.sqrt(np.mean(signal_data**2)),
            'Kurtosis': stats.kurtosis(signal_data)
        }
    
    return analysis

In [15]:
def extract_bearing_features(data, channels):
    """
    Extract all relevant features for a bearing from its channel(s)
    
    Args:
        data: Raw vibration data array
        channels: List of channel numbers for this bearing
    
    Returns:
        Dictionary of features
    """
    features = {}
    
    # For each channel of the bearing
    for channel in channels:
        # Get frequency bands
        bands = analyze_frequency_bands(data, channel)
        # Get characteristics for each band
        characteristics = analyze_signal_characteristics(bands)
        
        # Store features with channel identifier
        for band_name, metrics in characteristics.items():
            for metric_name, value in metrics.items():
                feature_name = f'Ch{channel+1}_{band_name}_{metric_name}'
                features[feature_name] = value
                
    return features

In [16]:
def create_feature_dataset():
    """
    Create dataset with features from all bearings (healthy and failing states)
    """
    feature_data = []
    
    # Set 1 - Inner Race (Bearing 3)
    bearing3_healthy = np.loadtxt(sorted(data_path_1.glob("*"))[0])
    bearing3_failing = np.loadtxt(sorted(data_path_1.glob("*"))[-10])
    
    # Extract features
    healthy_features = extract_bearing_features(bearing3_healthy, [4, 5])  # Channels 5,6
    healthy_features['state'] = 'healthy'
    healthy_features['failure_type'] = 'inner_race'
    feature_data.append(healthy_features)
    
    failing_features = extract_bearing_features(bearing3_failing, [4, 5])
    failing_features['state'] = 'failing'
    failing_features['failure_type'] = 'inner_race'
    feature_data.append(failing_features)
    
    # Set 1 - Roller (Bearing 4)
    files_1 = sorted(data_path_1.glob("*"))
    bearing4_failing_idx = int(len(files_1) * 0.81)  # Known failure point
    bearing4_failing = np.loadtxt(files_1[bearing4_failing_idx])
    
    failing_features = extract_bearing_features(bearing4_failing, [6, 7])  # Channels 7,8
    failing_features['state'] = 'failing'
    failing_features['failure_type'] = 'roller'
    feature_data.append(failing_features)
    
    # Set 2 - Outer Race (Bearing 1)
    bearing_s2_healthy = np.loadtxt(sorted(data_path_2.glob("*"))[0])
    bearing_s2_failing = np.loadtxt(sorted(data_path_2.glob("*"))[-10])
    
    failing_features = extract_bearing_features(bearing_s2_failing, [0])  # Channel 1
    failing_features['state'] = 'failing'
    failing_features['failure_type'] = 'outer_race'
    feature_data.append(failing_features)
    
    # Convert to DataFrame
    df = pd.DataFrame(feature_data)
    return df

# Create our feature dataset
feature_df = create_feature_dataset()
print("Features extracted:", len(feature_df.columns)-2)  # -2 for state and failure_type columns
print("\nSample features:")
print(feature_df.columns.tolist()[:5])  # Show first 5 feature names

Features extracted: 60

Sample features:
['Ch5_low_RMS', 'Ch5_low_Peak', 'Ch5_low_Crest', 'Ch5_low_Kurtosis', 'Ch5_mid_RMS']


In [17]:
# Cell 5: Create our first visualization - Feature Distribution by Failure Type
def plot_feature_distributions(df, features_to_plot):
    """
    Plot distribution of selected features colored by failure type
    """
    plt.figure(figsize=(15, 5*len(features_to_plot)))
    
    for i, feature in enumerate(features_to_plot, 1):
        plt.subplot(len(features_to_plot), 1, i)
        
        # Create violin plot
        sns.violinplot(data=df[df['state']=='failing'], 
                      x='failure_type', y=feature)
        
        plt.title(f'{feature} Distribution by Failure Type')
        plt.xticks(rotation=45)
    
    plt.tight_layout()
    plt.show()

# Select some interesting features to visualize
features_to_plot = [
    'Ch5_high_RMS',    # High frequency RMS for inner race
    'Ch7_mid_Crest',   # Mid frequency Crest factor for roller
    'Ch1_low_Kurtosis' # Low frequency Kurtosis for outer race
]

plot_feature_distributions(feature_df, features_to_plot)

AttributeError: module 'matplotlib' has no attribute 'backends'

Error in callback <function _enable_matplotlib_integration.<locals>.configure_once at 0x000002280A520220> (for post_run_cell), with arguments args (<ExecutionResult object at 2280b15a4b0, execution_count=17 error_before_exec=None error_in_exec=module 'matplotlib' has no attribute 'backends' info=<ExecutionInfo object at 2280b15abd0, raw_cell="# Cell 5: Create our first visualization - Feature.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell:/c%3A/Users/Ahmed%20Ayaz/Vibration-Analysis-System/working_notebooks/Phase3_Feature_Engineering.ipynb#X10sZmlsZQ%3D%3D> result=None>,),kwargs {}:


AttributeError: module 'matplotlib' has no attribute 'backends'