In [1]:
pip install tensorflow pandas numpy matplotlib scikit-learn

Collecting tensorflow
  Downloading tensorflow-2.18.0-cp312-cp312-win_amd64.whl.metadata (3.3 kB)
Collecting pandas
  Downloading pandas-2.2.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting numpy
  Downloading numpy-2.2.3-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.0-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp312-cp312-win_amd64.whl.metadata (15 kB)
Collecting tensorflow-intel==2.18.0 (from tensorflow)
  Downloading tensorflow_intel-2.18.0-cp312-cp312-win_amd64.whl.metadata (4.9 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow-intel==2.18.0->tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-n

In [1]:
import numpy as np
import pandas as pd
import scipy
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from scipy.stats import skew, kurtosis, mode

print("numpy version:", np.__version__)
print("pandas version:", pd.__version__)
print("scipy version:", scipy.__version__)
print("scikit-learn version:", sklearn.__version__)

numpy version: 2.0.2
pandas version: 2.2.3
scipy version: 1.15.2
scikit-learn version: 1.6.1


In [2]:
# Load the datasets
data_18_noleak = pd.read_csv('Accelerometer_readings/LO_NL_0.18 LPS_A2.csv')
data_noFlow_noleak = pd.read_csv('Accelerometer_readings/LO_NL_ND_A2.csv')
data_47_noleak = pd.read_csv('Accelerometer_readings/LO_NL_0.47 LPS_A2.csv')

data_18_gasket = pd.read_csv('Accelerometer_readings/LO_GL_0.18 LPS_A2.csv')
data_gasket_noFlow = pd.read_csv('Accelerometer_readings/LO_GL_ND_A2.csv')
data_47_nd = pd.read_csv('Accelerometer_readings/LO_GL_0.47 LPS_A2.csv')

# Combine No Leak and Leak data
no_leak_data = pd.concat([data_18_noleak , data_noFlow_noleak , data_47_noleak])
no_leak_data['Category'] = 'No Leak'

gasket_leak_data = pd.concat([data_18_gasket , data_gasket_noFlow , data_47_nd])
gasket_leak_data['Category'] = 'Leak'

# Combine all data
combined_data = pd.concat([no_leak_data ,gasket_leak_data ]).reset_index(drop=True)

In [4]:
num_rows = combined_data.shape[0]
print(f"Number of rows: {num_rows}")

Number of rows: 5558650


In [3]:
from scipy.stats import skew, kurtosis
import numpy as np
import pandas as pd

def extract_features(df):
    features = pd.DataFrame()

    # Time-Domain Features
    features['Mean'] = df['Value'].rolling(window=6).mean()
    features['Std'] = df['Value'].rolling(window=6).std()
    features['Min'] = df['Value'].rolling(window=6).min()
    features['Max'] = df['Value'].rolling(window=6).max()
    features['Skewness'] = df['Value'].rolling(window=6).apply(lambda x: skew(x.dropna()), raw=False)
    features['Kurtosis'] = df['Value'].rolling(window=6).apply(lambda x: kurtosis(x.dropna()), raw=False)
    features['PeakToPeak'] = features['Max'] - features['Min']
    features['Median'] = df['Value'].rolling(window=6).median()
    features['Mode'] = df['Value'].rolling(window=6).apply(lambda x: x.mode()[0] if not x.mode().empty else np.nan)
    features['FirstQuartile'] = df['Value'].rolling(window=6).quantile(0.25)
    features['ThirdQuartile'] = df['Value'].rolling(window=6).quantile(0.75)
    features['RootMeanSquare'] = np.sqrt((df['Value'] ** 2).rolling(window=6).mean())
    features['RootSumSquares'] = np.sqrt((df['Value'] ** 2).sum())
    features['PeakToRMS'] = features['PeakToPeak'] / features['RootMeanSquare']
    features['Variance'] = df['Value'].rolling(window=6).var()

    # Frequency-Domain Features (from FFT)
    df_fft = np.fft.fft(df['Value'].fillna(0))  # Fill NA for FFT calculation
    fft_abs = np.abs(df_fft)
    features['FFT_Mean'] = np.mean(fft_abs)
    features['FFT_Std'] = np.std(fft_abs)
    features['FFT_Min'] = np.min(fft_abs)
    features['FFT_Max'] = np.max(fft_abs)
    features['FFT_Skewness'] = skew(fft_abs)
    features['FFT_Kurtosis'] = kurtosis(fft_abs)
    features['FFT_PeakToPeak'] = np.ptp(fft_abs)

   

    # Drop rows with NaN caused by rolling operations
    features = features.dropna()

    return features

In [4]:
# Extract features
features = extract_features(combined_data)

# Add category labels
features['Category'] = combined_data['Category'][features.index]  # Ensure alignment

# Convert category to binary (0: No Leak, 1: Leak)
features['Category'] = features['Category'].map({'No Leak': 0, 'Leak': 1})

# Separate features and labels
X = features.drop(columns=['Category'])
y = features['Category']

# Feature scaling
scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [1]:
# ... [Keep all previous code up to feature scaling] ...

# ============================================
# NEW: Bagged Trees Model Setup
# ============================================
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

# Base estimator
base_tree = DecisionTreeClassifier(
    max_depth=10,
    min_samples_split=5,
    class_weight='balanced'
)

# Updated Bagging ensemble with correct parameter name
model = BaggingClassifier(
    estimator=base_tree,  # Changed from base_estimator to estimator
    n_estimators=200,
    max_samples=0.8,
    max_features=0.8,
    bootstrap=True,
    bootstrap_features=False,
    n_jobs=-1,
    random_state=42
)

# ============================================
# Model Training & Evaluation (Same as Before)
# ============================================
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nAccuracy:", accuracy_score(y_test, y_pred))

NameError: name 'train_test_split' is not defined

In [18]:
import joblib

# Save the model
joblib.dump(model, 'bagging_model.pkl')

print("Model saved successfully!")


Model saved successfully!


In [19]:
# Load the saved model
loaded_model = joblib.load('bagging_model.pkl')

# Test the loaded model
y_pred_loaded = loaded_model.predict(X_test)
print("Loaded Model Accuracy:", accuracy_score(y_test, y_pred_loaded))


Loaded Model Accuracy: 0.8302068219862935
