# ADC1



## **Testing of Probe 1**
To conduct experiments, **ADC1** (data acquisition device) is used consistently throughout. **Probe 1** is kept constant, and the machine learning model is **always trained on Probe 1**. The model is then tested on **Probe 2** and **Probe 3** to evaluate its generalization.

The **sampling rate** is varied across different tests, ranging from **10 MSPS to 300 MSPS**.

### Training **Probe 1** data and Testing on **Probe 2** data
To test Probe 1 with changing probe to Probe 2 and Probe 3 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [30]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (9998, 38)
✅ Loaded Probe1 ADC1 CH2: (9998, 38)
✅ Loaded Probe2 ADC1 CH1: (9998, 38)
✅ Loaded Probe2 ADC1 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0527
Min_H: Importance 0.0141
Mean_H: Importance 0.0989
Std_H: Importance 0.0076
Mean Deviation_H: Importance 0.0216
RMS_H: Importance 0.1667
Entropy_H: Importance 0.0460
Mean_Freq_H: Importance 0.0641
Variance_H: Importance 0.0496
Max_L: Importance 0.0324
Min_L: Importance 0.0696
Mean_L: Importance 0.0698
Std_L: Importance 0.0075
RMS_L: Importance 0.0890
Skewness_L: Importance 0.0072
Centroid_L: Importance 0.0114
Entropy_L: Importance 0.0299
Mean_Freq_L: Importance 0.0159
Variance_L: Importance 0.0973

Model Performance After Feature Selection:
Test Accuracy: 0.9518
Balanced Accuracy: 0.9518
MCC: 0.9064
Log Loss: 0.1153
F1 Score: 0.9536
Recall: 0.9912
Precision: 0.9188
              precision    recall  f1-score   support

         CH1       0.99      0.91      0.95      9998
         CH2       

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [31]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (4998, 38)
✅ Loaded Probe1 ADC1 CH2: (4998, 38)
✅ Loaded Probe2 ADC1 CH1: (4998, 38)
✅ Loaded Probe2 ADC1 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1224
Min_H: Importance 0.0284
Mean_H: Importance 0.1486
Mean Deviation_H: Importance 0.0095
RMS_H: Importance 0.2172
Centroid_H: Importance 0.0009
Entropy_H: Importance 0.0173
Mean_Freq_H: Importance 0.0727
Variance_H: Importance 0.0400
Max_L: Importance 0.0024
Min_L: Importance 0.0255
Mean_L: Importance 0.1253
Std_L: Importance 0.0012
RMS_L: Importance 0.1210
Peak-to-Peak_L: Importance 0.0018
Entropy_L: Importance 0.0283
Spread_L: Importance 0.0004
Mean_Freq_L: Importance 0.0108
Variance_L: Importance 0.0256

Model Performance After Feature Selection:
Test Accuracy: 0.9999
Balanced Accuracy: 0.9999
MCC: 0.9998
Log Loss: 0.0020
F1 Score: 0.9999
Recall: 0.9998
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         CH2

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [32]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (3332, 38)
✅ Loaded Probe1 ADC1 CH2: (3332, 38)
✅ Loaded Probe2 ADC1 CH1: (3332, 38)
✅ Loaded Probe2 ADC1 CH2: (3331, 38)

Top Important Features:
Max_H: Importance 0.0683
Min_H: Importance 0.0315
Mean_H: Importance 0.0935
Std_H: Importance 0.0002
Mean Deviation_H: Importance 0.0067
RMS_H: Importance 0.1295
Centroid_H: Importance 0.0015
Entropy_H: Importance 0.0179
Mean_Freq_H: Importance 0.0551
Kurtosis_Freq_H: Importance 0.0001
Variance_H: Importance 0.0675
Max_L: Importance 0.0063
Min_L: Importance 0.0191
Mean_L: Importance 0.2050
RMS_L: Importance 0.1282
Peak-to-Peak_L: Importance 0.0001
Entropy_L: Importance 0.0234
Mean_Freq_L: Importance 0.0096
Variance_L: Importance 0.1366

Model Performance After Feature Selection:
Test Accuracy: 0.9973
Balanced Accuracy: 0.9973
MCC: 0.9946
Log Loss: 0.0052
F1 Score: 0.9973
Recall: 0.9949
Precision: 0.9997
              precision    recall  f1-score   support

         CH1       0.99      1.00      1.00      3332
     

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [33]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (2481, 38)
✅ Loaded Probe1 ADC1 CH2: (2481, 38)
✅ Loaded Probe2 ADC1 CH1: (2481, 38)
✅ Loaded Probe2 ADC1 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0684
Min_H: Importance 0.0280
Mean_H: Importance 0.0961
Std_H: Importance 0.0123
Mean Deviation_H: Importance 0.0169
RMS_H: Importance 0.1322
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0144
Skewness_Freq_H: Importance 0.0000
Mean_Freq_H: Importance 0.0579
Variance_H: Importance 0.0606
Max_L: Importance 0.0001
Min_L: Importance 0.0165
Mean_L: Importance 0.1996
RMS_L: Importance 0.1272
Peak-to-Peak_L: Importance 0.0002
Entropy_L: Importance 0.0301
Mean_Freq_L: Importance 0.0124
Variance_L: Importance 0.1273

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
     

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [34]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (1998, 38)
✅ Loaded Probe1 ADC1 CH2: (1998, 38)
✅ Loaded Probe2 ADC1 CH1: (1998, 38)
✅ Loaded Probe2 ADC1 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0601
Min_H: Importance 0.0371
Mean_H: Importance 0.0818
Std_H: Importance 0.0381
Mean Deviation_H: Importance 0.0205
RMS_H: Importance 0.0808
Entropy_H: Importance 0.1200
Spread_H: Importance 0.0000
Mean_Freq_H: Importance 0.1024
Variance_H: Importance 0.0601
Min_L: Importance 0.0006
Mean_L: Importance 0.1433
Mean Deviation_L: Importance 0.0151
RMS_L: Importance 0.0803
Entropy_L: Importance 0.0390
Variance_L: Importance 0.1207

Model Performance After Feature Selection:
Test Accuracy: 0.9997
Balanced Accuracy: 0.9997
MCC: 0.9995
Log Loss: 0.0367
F1 Score: 0.9997
Recall: 0.9995
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
         CH2       1.00      1.00      1.00      1998

    accuracy                           1.

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [35]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (998, 38)
✅ Loaded Probe1 ADC1 CH2: (998, 38)
✅ Loaded Probe2 ADC1 CH1: (998, 38)
✅ Loaded Probe2 ADC1 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.0861
Min_H: Importance 0.0066
Mean_H: Importance 0.0985
Std_H: Importance 0.0055
Mean Deviation_H: Importance 0.0406
RMS_H: Importance 0.1472
Skewness_H: Importance 0.0007
Centroid_H: Importance 0.0055
Entropy_H: Importance 0.0095
Mean_Freq_H: Importance 0.0104
Variance_H: Importance 0.0821
Max_L: Importance 0.0105
Min_L: Importance 0.0172
Mean_L: Importance 0.1887
Std_L: Importance 0.0048
Mean Deviation_L: Importance 0.0267
RMS_L: Importance 0.1411
Entropy_L: Importance 0.0052
Variance_L: Importance 0.1124

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0012
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         CH2      

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [36]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (665, 38)
✅ Loaded Probe1 ADC1 CH2: (665, 38)
✅ Loaded Probe2 ADC1 CH1: (665, 38)
✅ Loaded Probe2 ADC1 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0849
Std_H: Importance 0.0575
Mean Deviation_H: Importance 0.0650
RMS_H: Importance 0.1222
Centroid_H: Importance 0.0015
Entropy_H: Importance 0.0191
Mean_Freq_H: Importance 0.0381
Kurtosis_Freq_H: Importance 0.0137
Variance_H: Importance 0.0215
Mean_L: Importance 0.1811
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1208
Entropy_L: Importance 0.0147
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0004
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy                           1.00      1330
   m

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [37]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (498, 38)
✅ Loaded Probe1 ADC1 CH2: (498, 38)
✅ Loaded Probe2 ADC1 CH1: (498, 38)
✅ Loaded Probe2 ADC1 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.1000
Min_H: Importance 0.0000
Mean_H: Importance 0.0852
Mean Deviation_H: Importance 0.0406
RMS_H: Importance 0.1257
Entropy_H: Importance 0.0182
Mean_Freq_H: Importance 0.0171
Kurtosis_Freq_H: Importance 0.0293
Variance_H: Importance 0.0401
Min_L: Importance 0.0865
Mean_L: Importance 0.1400
Std_L: Importance 0.0195
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1240
Entropy_L: Importance 0.0194
Mean_Freq_L: Importance 0.0136
Variance_L: Importance 0.0609

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0064
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    acc

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [38]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (398, 38)
✅ Loaded Probe1 ADC1 CH2: (398, 38)
✅ Loaded Probe2 ADC1 CH1: (398, 38)
✅ Loaded Probe2 ADC1 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0800
Mean_H: Importance 0.0803
Mean Deviation_H: Importance 0.0400
RMS_H: Importance 0.1239
Peak-to-Peak_H: Importance 0.0000
Centroid_H: Importance 0.0001
Mean_Freq_H: Importance 0.0189
Kurtosis_Freq_H: Importance 0.0161
Variance_H: Importance 0.0200
Min_L: Importance 0.1001
Mean_L: Importance 0.1400
Std_L: Importance 0.0200
Mean Deviation_L: Importance 0.1000
RMS_L: Importance 0.1211
Entropy_L: Importance 0.0197
Mean_Freq_L: Importance 0.0398
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0011
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       39

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [39]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (332, 38)
✅ Loaded Probe1 ADC1 CH2: (332, 38)
✅ Loaded Probe2 ADC1 CH1: (332, 38)
✅ Loaded Probe2 ADC1 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0203
RMS_H: Importance 0.0802
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0600
Kurtosis_Freq_H: Importance 0.0195
Min_L: Importance 0.0400
Mean_L: Importance 0.1000
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0800
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0015
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       332
         CH2       1.00      1.00      1.00       332

    accuracy                         

### Training **Probe 1** data and Testing on **Probe 3** data
To test Probe 1 with changing probe to Probe 3 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.

In [40]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (9998, 38)
✅ Loaded Probe1 ADC1 CH2: (9998, 38)
✅ Loaded Probe3 ADC1 CH1: (9998, 38)
✅ Loaded Probe3 ADC1 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0527
Min_H: Importance 0.0141
Mean_H: Importance 0.0989
Std_H: Importance 0.0076
Mean Deviation_H: Importance 0.0216
RMS_H: Importance 0.1667
Entropy_H: Importance 0.0460
Mean_Freq_H: Importance 0.0641
Variance_H: Importance 0.0496
Max_L: Importance 0.0324
Min_L: Importance 0.0696
Mean_L: Importance 0.0698
Std_L: Importance 0.0075
RMS_L: Importance 0.0890
Skewness_L: Importance 0.0072
Centroid_L: Importance 0.0114
Entropy_L: Importance 0.0299
Mean_Freq_L: Importance 0.0159
Variance_L: Importance 0.0973

Model Performance After Feature Selection:
Test Accuracy: 0.9597
Balanced Accuracy: 0.9597
MCC: 0.9218
Log Loss: 0.1084
F1 Score: 0.9611
Recall: 0.9958
Precision: 0.9287
              precision    recall  f1-score   support

         CH1       1.00      0.92      0.96      9998
         CH2       

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [41]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (4998, 38)
✅ Loaded Probe1 ADC1 CH2: (4998, 38)
✅ Loaded Probe3 ADC1 CH1: (4998, 38)
✅ Loaded Probe3 ADC1 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1224
Min_H: Importance 0.0284
Mean_H: Importance 0.1486
Mean Deviation_H: Importance 0.0095
RMS_H: Importance 0.2172
Centroid_H: Importance 0.0009
Entropy_H: Importance 0.0173
Mean_Freq_H: Importance 0.0727
Variance_H: Importance 0.0400
Max_L: Importance 0.0024
Min_L: Importance 0.0255
Mean_L: Importance 0.1253
Std_L: Importance 0.0012
RMS_L: Importance 0.1210
Peak-to-Peak_L: Importance 0.0018
Entropy_L: Importance 0.0283
Spread_L: Importance 0.0004
Mean_Freq_L: Importance 0.0108
Variance_L: Importance 0.0256

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0016
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         CH2

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [42]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (3332, 38)
✅ Loaded Probe1 ADC1 CH2: (3332, 38)
✅ Loaded Probe3 ADC1 CH1: (3331, 38)
✅ Loaded Probe3 ADC1 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.0683
Min_H: Importance 0.0315
Mean_H: Importance 0.0935
Std_H: Importance 0.0002
Mean Deviation_H: Importance 0.0067
RMS_H: Importance 0.1295
Centroid_H: Importance 0.0015
Entropy_H: Importance 0.0179
Mean_Freq_H: Importance 0.0551
Kurtosis_Freq_H: Importance 0.0001
Variance_H: Importance 0.0675
Max_L: Importance 0.0063
Min_L: Importance 0.0191
Mean_L: Importance 0.2050
RMS_L: Importance 0.1282
Peak-to-Peak_L: Importance 0.0001
Entropy_L: Importance 0.0234
Mean_Freq_L: Importance 0.0096
Variance_L: Importance 0.1366

Model Performance After Feature Selection:
Test Accuracy: 0.9676
Balanced Accuracy: 0.9676
MCC: 0.9370
Log Loss: 0.0684
F1 Score: 0.9665
Recall: 0.9364
Precision: 0.9987
              precision    recall  f1-score   support

         CH1       0.94      1.00      0.97      3331
     

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [43]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (2481, 38)
✅ Loaded Probe1 ADC1 CH2: (2481, 38)
✅ Loaded Probe3 ADC1 CH1: (2481, 38)
✅ Loaded Probe3 ADC1 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0684
Min_H: Importance 0.0280
Mean_H: Importance 0.0961
Std_H: Importance 0.0123
Mean Deviation_H: Importance 0.0169
RMS_H: Importance 0.1322
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0144
Skewness_Freq_H: Importance 0.0000
Mean_Freq_H: Importance 0.0579
Variance_H: Importance 0.0606
Max_L: Importance 0.0001
Min_L: Importance 0.0165
Mean_L: Importance 0.1996
RMS_L: Importance 0.1272
Peak-to-Peak_L: Importance 0.0002
Entropy_L: Importance 0.0301
Mean_Freq_L: Importance 0.0124
Variance_L: Importance 0.1273

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
     

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [44]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (1998, 38)
✅ Loaded Probe1 ADC1 CH2: (1998, 38)
✅ Loaded Probe3 ADC1 CH1: (1998, 38)
✅ Loaded Probe3 ADC1 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0601
Min_H: Importance 0.0371
Mean_H: Importance 0.0818
Std_H: Importance 0.0381
Mean Deviation_H: Importance 0.0205
RMS_H: Importance 0.0808
Entropy_H: Importance 0.1200
Spread_H: Importance 0.0000
Mean_Freq_H: Importance 0.1024
Variance_H: Importance 0.0601
Min_L: Importance 0.0006
Mean_L: Importance 0.1433
Mean Deviation_L: Importance 0.0151
RMS_L: Importance 0.0803
Entropy_L: Importance 0.0390
Variance_L: Importance 0.1207

Model Performance After Feature Selection:
Test Accuracy: 0.9992
Balanced Accuracy: 0.9992
MCC: 0.9985
Log Loss: 0.0408
F1 Score: 0.9992
Recall: 0.9985
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
         CH2       1.00      1.00      1.00      1998

    accuracy                           1.

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [45]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (998, 38)
✅ Loaded Probe1 ADC1 CH2: (998, 38)
✅ Loaded Probe3 ADC1 CH1: (998, 38)
✅ Loaded Probe3 ADC1 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.0861
Min_H: Importance 0.0066
Mean_H: Importance 0.0985
Std_H: Importance 0.0055
Mean Deviation_H: Importance 0.0406
RMS_H: Importance 0.1472
Skewness_H: Importance 0.0007
Centroid_H: Importance 0.0055
Entropy_H: Importance 0.0095
Mean_Freq_H: Importance 0.0104
Variance_H: Importance 0.0821
Max_L: Importance 0.0105
Min_L: Importance 0.0172
Mean_L: Importance 0.1887
Std_L: Importance 0.0048
Mean Deviation_L: Importance 0.0267
RMS_L: Importance 0.1411
Entropy_L: Importance 0.0052
Variance_L: Importance 0.1124

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0017
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         CH2      

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [46]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (665, 38)
✅ Loaded Probe1 ADC1 CH2: (665, 38)
✅ Loaded Probe3 ADC1 CH1: (665, 38)
✅ Loaded Probe3 ADC1 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0849
Std_H: Importance 0.0575
Mean Deviation_H: Importance 0.0650
RMS_H: Importance 0.1222
Centroid_H: Importance 0.0015
Entropy_H: Importance 0.0191
Mean_Freq_H: Importance 0.0381
Kurtosis_Freq_H: Importance 0.0137
Variance_H: Importance 0.0215
Mean_L: Importance 0.1811
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1208
Entropy_L: Importance 0.0147
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0041
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy                           1.00      1330
   m

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [47]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (498, 38)
✅ Loaded Probe1 ADC1 CH2: (498, 38)
✅ Loaded Probe3 ADC1 CH1: (498, 38)
✅ Loaded Probe3 ADC1 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.1000
Min_H: Importance 0.0000
Mean_H: Importance 0.0852
Mean Deviation_H: Importance 0.0406
RMS_H: Importance 0.1257
Entropy_H: Importance 0.0182
Mean_Freq_H: Importance 0.0171
Kurtosis_Freq_H: Importance 0.0293
Variance_H: Importance 0.0401
Min_L: Importance 0.0865
Mean_L: Importance 0.1400
Std_L: Importance 0.0195
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1240
Entropy_L: Importance 0.0194
Mean_Freq_L: Importance 0.0136
Variance_L: Importance 0.0609

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0023
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    acc

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [48]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (398, 38)
✅ Loaded Probe1 ADC1 CH2: (398, 38)
✅ Loaded Probe3 ADC1 CH1: (398, 38)
✅ Loaded Probe3 ADC1 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0800
Mean_H: Importance 0.0803
Mean Deviation_H: Importance 0.0400
RMS_H: Importance 0.1239
Peak-to-Peak_H: Importance 0.0000
Centroid_H: Importance 0.0001
Mean_Freq_H: Importance 0.0189
Kurtosis_Freq_H: Importance 0.0161
Variance_H: Importance 0.0200
Min_L: Importance 0.1001
Mean_L: Importance 0.1400
Std_L: Importance 0.0200
Mean Deviation_L: Importance 0.1000
RMS_L: Importance 0.1211
Entropy_L: Importance 0.0197
Mean_Freq_L: Importance 0.0398
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0004
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       39

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [49]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (332, 38)
✅ Loaded Probe1 ADC1 CH2: (332, 38)
✅ Loaded Probe3 ADC1 CH1: (332, 38)
✅ Loaded Probe3 ADC1 CH2: (331, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0203
RMS_H: Importance 0.0802
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0600
Kurtosis_Freq_H: Importance 0.0195
Min_L: Importance 0.0400
Mean_L: Importance 0.1000
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0800
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0041
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       332
         CH2       1.00      1.00      1.00       331

    accuracy                         

## **Testing of Probe 2**
To conduct experiments, **ADC1** (data acquisition device) is used consistently throughout. **Probe 2** is kept constant, and the machine learning model is **always trained on Probe 2**. The model is then tested on **Probe 3** and **Probe 1** to evaluate its generalization.

The **sampling rate** is varied across different tests, ranging from **10 MSPS to 300 MSPS**.


### Training **Probe 2** data and Testing on **Probe 3**
To test Probe 2 with changing probe to Probes 3 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [50]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (9998, 38)
✅ Loaded Probe2 ADC1 CH2: (9998, 38)
✅ Loaded Probe3 ADC1 CH1: (9998, 38)
✅ Loaded Probe3 ADC1 CH2: (9998, 38)

Top Important Features:
Min_H: Importance 0.0178
Mean_H: Importance 0.0563
Std_H: Importance 0.0118
Mean Deviation_H: Importance 0.0153
RMS_H: Importance 0.1167
Centroid_H: Importance 0.0096
Entropy_H: Importance 0.0320
Mean_Freq_H: Importance 0.0574
Irregularity_H: Importance 0.0143
Variance_H: Importance 0.0354
Max_L: Importance 0.0187
Min_L: Importance 0.0724
Mean_L: Importance 0.1135
RMS_L: Importance 0.1859
Skewness_L: Importance 0.0125
Centroid_L: Importance 0.0103
Entropy_L: Importance 0.0303
Mean_Freq_L: Importance 0.0399
Variance_L: Importance 0.0820

Model Performance After Feature Selection:
Test Accuracy: 0.9792
Balanced Accuracy: 0.9792
MCC: 0.9585
Log Loss: 0.0553
F1 Score: 0.9793
Recall: 0.9822
Precision: 0.9764
              precision    recall  f1-score   support

         CH1       0.98      0.98      0.98      9998
     

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [51]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (4998, 38)
✅ Loaded Probe2 ADC1 CH2: (4998, 38)
✅ Loaded Probe3 ADC1 CH1: (4998, 38)
✅ Loaded Probe3 ADC1 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1194
Min_H: Importance 0.0143
Mean_H: Importance 0.1108
Mean Deviation_H: Importance 0.0075
RMS_H: Importance 0.2006
Entropy_H: Importance 0.0357
Mean_Freq_H: Importance 0.0776
Variance_H: Importance 0.0614
Max_L: Importance 0.0141
Min_L: Importance 0.0410
Mean_L: Importance 0.1045
Std_L: Importance 0.0008
RMS_L: Importance 0.1372
Skewness_L: Importance 0.0021
Kurtosis_L: Importance 0.0013
Peak-to-Peak_L: Importance 0.0006
Entropy_L: Importance 0.0486
Mean_Freq_L: Importance 0.0137
Variance_L: Importance 0.0078

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0006
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         C

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [52]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (3332, 38)
✅ Loaded Probe2 ADC1 CH2: (3331, 38)
✅ Loaded Probe3 ADC1 CH1: (3331, 38)
✅ Loaded Probe3 ADC1 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.0676
Min_H: Importance 0.0269
Mean_H: Importance 0.1364
Mean Deviation_H: Importance 0.0034
RMS_H: Importance 0.2181
Centroid_H: Importance 0.0021
Entropy_H: Importance 0.0171
Mean_Freq_H: Importance 0.0392
Irregularity_H: Importance 0.0009
Variance_H: Importance 0.0507
Max_L: Importance 0.0018
Min_L: Importance 0.0394
Mean_L: Importance 0.1537
RMS_L: Importance 0.1082
Kurtosis_L: Importance 0.0019
Entropy_L: Importance 0.0264
Mean_Freq_L: Importance 0.0095
Irregularity_L: Importance 0.0008
Variance_L: Importance 0.0949

Model Performance After Feature Selection:
Test Accuracy: 0.9865
Balanced Accuracy: 0.9865
MCC: 0.9732
Log Loss: 0.0347
F1 Score: 0.9866
Recall: 0.9973
Precision: 0.9762
              precision    recall  f1-score   support

         CH1       1.00      0.98      0.99      3331
 

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [53]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (2481, 38)
✅ Loaded Probe2 ADC1 CH2: (2481, 38)
✅ Loaded Probe3 ADC1 CH1: (2481, 38)
✅ Loaded Probe3 ADC1 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0664
Min_H: Importance 0.0285
Mean_H: Importance 0.0956
Mean Deviation_H: Importance 0.0158
RMS_H: Importance 0.1320
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0139
Mean_Freq_H: Importance 0.0567
Irregularity_H: Importance 0.0000
Variance_H: Importance 0.0610
Max_L: Importance 0.0006
Min_L: Importance 0.0283
Mean_L: Importance 0.2098
Mean Deviation_L: Importance 0.0000
RMS_L: Importance 0.1206
Entropy_L: Importance 0.0309
Mean_Freq_L: Importance 0.0122
Variance_L: Importance 0.1276

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0002
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [54]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (1998, 38)
✅ Loaded Probe2 ADC1 CH2: (1998, 38)
✅ Loaded Probe3 ADC1 CH1: (1998, 38)
✅ Loaded Probe3 ADC1 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0682
Min_H: Importance 0.0077
Mean_H: Importance 0.0993
Std_H: Importance 0.0100
Mean Deviation_H: Importance 0.0238
RMS_H: Importance 0.1287
Centroid_H: Importance 0.0006
Entropy_H: Importance 0.0283
Spread_H: Importance 0.0072
Mean_Freq_H: Importance 0.0567
Variance_H: Importance 0.0656
Max_L: Importance 0.0006
Min_L: Importance 0.0133
Mean_L: Importance 0.1748
Mean Deviation_L: Importance 0.0109
RMS_L: Importance 0.1363
Entropy_L: Importance 0.0447
Mean_Freq_L: Importance 0.0143
Variance_L: Importance 0.1081

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0005
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
         C

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [55]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (998, 38)
✅ Loaded Probe2 ADC1 CH2: (998, 38)
✅ Loaded Probe3 ADC1 CH1: (998, 38)
✅ Loaded Probe3 ADC1 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1078
Min_H: Importance 0.0001
Mean_H: Importance 0.0924
Mean Deviation_H: Importance 0.0135
RMS_H: Importance 0.1310
Entropy_H: Importance 0.0146
Mean_Freq_H: Importance 0.0398
Kurtosis_Freq_H: Importance 0.0045
Irregularity_H: Importance 0.0001
Variance_H: Importance 0.0601
Max_L: Importance 0.0009
Min_L: Importance 0.0966
Mean_L: Importance 0.1804
Mean Deviation_L: Importance 0.0169
RMS_L: Importance 0.1265
Peak-to-Peak_L: Importance 0.0011
Entropy_L: Importance 0.0139
Mean_Freq_L: Importance 0.0126
Variance_L: Importance 0.0873

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0088
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00     

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [56]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (665, 38)
✅ Loaded Probe2 ADC1 CH2: (665, 38)
✅ Loaded Probe3 ADC1 CH1: (665, 38)
✅ Loaded Probe3 ADC1 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0848
Std_H: Importance 0.1000
Mean Deviation_H: Importance 0.0811
RMS_H: Importance 0.0635
Entropy_H: Importance 0.1200
Spread_H: Importance 0.0161
Mean_Freq_H: Importance 0.0600
Kurtosis_Freq_H: Importance 0.0154
Variance_H: Importance 0.0200
Mean_L: Importance 0.1000
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.0802
Entropy_L: Importance 0.0189
Variance_L: Importance 0.0600

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0422
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy                           1.00      1330
   mac

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [57]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (498, 38)
✅ Loaded Probe2 ADC1 CH2: (498, 38)
✅ Loaded Probe3 ADC1 CH1: (498, 38)
✅ Loaded Probe3 ADC1 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0800
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0600
RMS_H: Importance 0.1257
Entropy_H: Importance 0.0194
Mean_Freq_H: Importance 0.0184
Kurtosis_Freq_H: Importance 0.0080
Variance_H: Importance 0.0201
Min_L: Importance 0.0732
Mean_L: Importance 0.1800
Std_L: Importance 0.0200
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1216
Entropy_L: Importance 0.0199
Mean_Freq_L: Importance 0.0126
Variance_L: Importance 0.0810

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    accuracy                    

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [58]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (398, 38)
✅ Loaded Probe2 ADC1 CH2: (398, 38)
✅ Loaded Probe3 ADC1 CH1: (398, 38)
✅ Loaded Probe3 ADC1 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.0800
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0400
Irregularity_H: Importance 0.0400
Min_L: Importance 0.0600
Mean_L: Importance 0.0800
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0800
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0009
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       398

    accuracy                          

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [59]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (332, 38)
✅ Loaded Probe2 ADC1 CH2: (332, 38)
✅ Loaded Probe3 ADC1 CH1: (332, 38)
✅ Loaded Probe3 ADC1 CH2: (331, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.0800
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0600
Kurtosis_Freq_H: Importance 0.0174
Min_L: Importance 0.0024
Mean_L: Importance 0.1000
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0800
Variance_L: Importance 0.0802

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0037
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       332
         CH2       1.00      1.00      1.00       331

    accuracy                         

### Training **Probe 2** data and Testing on **Probe 1**
To test Probe 2 with changing probe to Probes 1 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [60]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (9998, 38)
✅ Loaded Probe2 ADC1 CH2: (9998, 38)
✅ Loaded Probe1 ADC1 CH1: (9998, 38)
✅ Loaded Probe1 ADC1 CH2: (9998, 38)

Top Important Features:
Min_H: Importance 0.0178
Mean_H: Importance 0.0563
Std_H: Importance 0.0118
Mean Deviation_H: Importance 0.0153
RMS_H: Importance 0.1167
Centroid_H: Importance 0.0096
Entropy_H: Importance 0.0320
Mean_Freq_H: Importance 0.0574
Irregularity_H: Importance 0.0143
Variance_H: Importance 0.0354
Max_L: Importance 0.0187
Min_L: Importance 0.0724
Mean_L: Importance 0.1135
RMS_L: Importance 0.1859
Skewness_L: Importance 0.0125
Centroid_L: Importance 0.0103
Entropy_L: Importance 0.0303
Mean_Freq_L: Importance 0.0399
Variance_L: Importance 0.0820

Model Performance After Feature Selection:
Test Accuracy: 0.9386
Balanced Accuracy: 0.9386
MCC: 0.8802
Log Loss: 0.1438
F1 Score: 0.9411
Recall: 0.9796
Precision: 0.9054
              precision    recall  f1-score   support

         CH1       0.98      0.90      0.94      9998
     

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [61]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (4998, 38)
✅ Loaded Probe2 ADC1 CH2: (4998, 38)
✅ Loaded Probe1 ADC1 CH1: (4998, 38)
✅ Loaded Probe1 ADC1 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1194
Min_H: Importance 0.0143
Mean_H: Importance 0.1108
Mean Deviation_H: Importance 0.0075
RMS_H: Importance 0.2006
Entropy_H: Importance 0.0357
Mean_Freq_H: Importance 0.0776
Variance_H: Importance 0.0614
Max_L: Importance 0.0141
Min_L: Importance 0.0410
Mean_L: Importance 0.1045
Std_L: Importance 0.0008
RMS_L: Importance 0.1372
Skewness_L: Importance 0.0021
Kurtosis_L: Importance 0.0013
Peak-to-Peak_L: Importance 0.0006
Entropy_L: Importance 0.0486
Mean_Freq_L: Importance 0.0137
Variance_L: Importance 0.0078

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0004
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         C

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [62]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (3332, 38)
✅ Loaded Probe2 ADC1 CH2: (3331, 38)
✅ Loaded Probe1 ADC1 CH1: (3332, 38)
✅ Loaded Probe1 ADC1 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.0676
Min_H: Importance 0.0269
Mean_H: Importance 0.1364
Mean Deviation_H: Importance 0.0034
RMS_H: Importance 0.2181
Centroid_H: Importance 0.0021
Entropy_H: Importance 0.0171
Mean_Freq_H: Importance 0.0392
Irregularity_H: Importance 0.0009
Variance_H: Importance 0.0507
Max_L: Importance 0.0018
Min_L: Importance 0.0394
Mean_L: Importance 0.1537
RMS_L: Importance 0.1082
Kurtosis_L: Importance 0.0019
Entropy_L: Importance 0.0264
Mean_Freq_L: Importance 0.0095
Irregularity_L: Importance 0.0008
Variance_L: Importance 0.0949

Model Performance After Feature Selection:
Test Accuracy: 0.9998
Balanced Accuracy: 0.9998
MCC: 0.9997
Log Loss: 0.0003
F1 Score: 0.9998
Recall: 1.0000
Precision: 0.9997
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      3332
 

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [63]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (2481, 38)
✅ Loaded Probe2 ADC1 CH2: (2481, 38)
✅ Loaded Probe1 ADC1 CH1: (2481, 38)
✅ Loaded Probe1 ADC1 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0664
Min_H: Importance 0.0285
Mean_H: Importance 0.0956
Mean Deviation_H: Importance 0.0158
RMS_H: Importance 0.1320
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0139
Mean_Freq_H: Importance 0.0567
Irregularity_H: Importance 0.0000
Variance_H: Importance 0.0610
Max_L: Importance 0.0006
Min_L: Importance 0.0283
Mean_L: Importance 0.2098
Mean Deviation_L: Importance 0.0000
RMS_L: Importance 0.1206
Entropy_L: Importance 0.0309
Mean_Freq_L: Importance 0.0122
Variance_L: Importance 0.1276

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [64]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (1998, 38)
✅ Loaded Probe2 ADC1 CH2: (1998, 38)
✅ Loaded Probe1 ADC1 CH1: (1998, 38)
✅ Loaded Probe1 ADC1 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0682
Min_H: Importance 0.0077
Mean_H: Importance 0.0993
Std_H: Importance 0.0100
Mean Deviation_H: Importance 0.0238
RMS_H: Importance 0.1287
Centroid_H: Importance 0.0006
Entropy_H: Importance 0.0283
Spread_H: Importance 0.0072
Mean_Freq_H: Importance 0.0567
Variance_H: Importance 0.0656
Max_L: Importance 0.0006
Min_L: Importance 0.0133
Mean_L: Importance 0.1748
Mean Deviation_L: Importance 0.0109
RMS_L: Importance 0.1363
Entropy_L: Importance 0.0447
Mean_Freq_L: Importance 0.0143
Variance_L: Importance 0.1081

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
         C

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [65]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (998, 38)
✅ Loaded Probe2 ADC1 CH2: (998, 38)
✅ Loaded Probe1 ADC1 CH1: (998, 38)
✅ Loaded Probe1 ADC1 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1078
Min_H: Importance 0.0001
Mean_H: Importance 0.0924
Mean Deviation_H: Importance 0.0135
RMS_H: Importance 0.1310
Entropy_H: Importance 0.0146
Mean_Freq_H: Importance 0.0398
Kurtosis_Freq_H: Importance 0.0045
Irregularity_H: Importance 0.0001
Variance_H: Importance 0.0601
Max_L: Importance 0.0009
Min_L: Importance 0.0966
Mean_L: Importance 0.1804
Mean Deviation_L: Importance 0.0169
RMS_L: Importance 0.1265
Peak-to-Peak_L: Importance 0.0011
Entropy_L: Importance 0.0139
Mean_Freq_L: Importance 0.0126
Variance_L: Importance 0.0873

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0082
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00     

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [66]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (665, 38)
✅ Loaded Probe2 ADC1 CH2: (665, 38)
✅ Loaded Probe1 ADC1 CH1: (665, 38)
✅ Loaded Probe1 ADC1 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0848
Std_H: Importance 0.1000
Mean Deviation_H: Importance 0.0811
RMS_H: Importance 0.0635
Entropy_H: Importance 0.1200
Spread_H: Importance 0.0161
Mean_Freq_H: Importance 0.0600
Kurtosis_Freq_H: Importance 0.0154
Variance_H: Importance 0.0200
Mean_L: Importance 0.1000
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.0802
Entropy_L: Importance 0.0189
Variance_L: Importance 0.0600

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0043
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy                           1.00      1330
   mac

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [67]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (498, 38)
✅ Loaded Probe2 ADC1 CH2: (498, 38)
✅ Loaded Probe1 ADC1 CH1: (498, 38)
✅ Loaded Probe1 ADC1 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0800
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0600
RMS_H: Importance 0.1257
Entropy_H: Importance 0.0194
Mean_Freq_H: Importance 0.0184
Kurtosis_Freq_H: Importance 0.0080
Variance_H: Importance 0.0201
Min_L: Importance 0.0732
Mean_L: Importance 0.1800
Std_L: Importance 0.0200
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1216
Entropy_L: Importance 0.0199
Mean_Freq_L: Importance 0.0126
Variance_L: Importance 0.0810

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0059
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    accuracy                    

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [68]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (398, 38)
✅ Loaded Probe2 ADC1 CH2: (398, 38)
✅ Loaded Probe1 ADC1 CH1: (398, 38)
✅ Loaded Probe1 ADC1 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.0800
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0400
Irregularity_H: Importance 0.0400
Min_L: Importance 0.0600
Mean_L: Importance 0.0800
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0800
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0126
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       398

    accuracy                          

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [69]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC1 CH1: (332, 38)
✅ Loaded Probe2 ADC1 CH2: (332, 38)
✅ Loaded Probe1 ADC1 CH1: (332, 38)
✅ Loaded Probe1 ADC1 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.0800
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0600
Kurtosis_Freq_H: Importance 0.0174
Min_L: Importance 0.0024
Mean_L: Importance 0.1000
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0800
Variance_L: Importance 0.0802

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0011
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       332
         CH2       1.00      1.00      1.00       332

    accuracy                         

## **Testing of Probe 3**
To conduct experiments, **ADC1** (data acquisition device) is used consistently throughout. **Probe 3** is kept constant, and the machine learning model is **always trained on Probe 3**. The model is then tested on **Probe 3** and **Probe 1** to evaluate its generalization.

The **sampling rate** is varied across different tests, ranging from **10 MSPS to 300 MSPS**.

### Training **Probe 3** data and Testing on **Probe 1**
To test Probe 3 with changing probe to Probes 1 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [70]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (9998, 38)
✅ Loaded Probe3 ADC1 CH2: (9998, 38)
✅ Loaded Probe1 ADC1 CH1: (9998, 38)
✅ Loaded Probe1 ADC1 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0062
Min_H: Importance 0.0174
Mean_H: Importance 0.0772
Mean Deviation_H: Importance 0.0048
RMS_H: Importance 0.1177
Skewness_H: Importance 0.0029
Entropy_H: Importance 0.0231
Spread_H: Importance 0.0037
Mean_Freq_H: Importance 0.0421
Irregularity_H: Importance 0.0030
Variance_H: Importance 0.0457
Max_L: Importance 0.0028
Min_L: Importance 0.1182
Mean_L: Importance 0.1648
RMS_L: Importance 0.2142
Skewness_L: Importance 0.0026
Entropy_L: Importance 0.0385
Mean_Freq_L: Importance 0.0161
Variance_L: Importance 0.0790

Model Performance After Feature Selection:
Test Accuracy: 0.9204
Balanced Accuracy: 0.9204
MCC: 0.8474
Log Loss: 0.1617
F1 Score: 0.9251
Recall: 0.9830
Precision: 0.8736
              precision    recall  f1-score   support

         CH1       0.98      0.86      0.92      9998
       

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [71]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (4998, 38)
✅ Loaded Probe3 ADC1 CH2: (4998, 38)
✅ Loaded Probe1 ADC1 CH1: (4998, 38)
✅ Loaded Probe1 ADC1 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1215
Min_H: Importance 0.0255
Mean_H: Importance 0.1520
Mean Deviation_H: Importance 0.0050
RMS_H: Importance 0.2228
Entropy_H: Importance 0.0175
Mean_Freq_H: Importance 0.0771
Variance_H: Importance 0.0366
Max_L: Importance 0.0011
Min_L: Importance 0.0233
Mean_L: Importance 0.1243
Std_L: Importance 0.0013
RMS_L: Importance 0.1203
Skewness_L: Importance 0.0012
Kurtosis_L: Importance 0.0030
Entropy_L: Importance 0.0282
Spread_L: Importance 0.0007
Mean_Freq_L: Importance 0.0117
Variance_L: Importance 0.0252

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0010
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         CH2    

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [72]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (3331, 38)
✅ Loaded Probe3 ADC1 CH2: (3332, 38)
✅ Loaded Probe1 ADC1 CH1: (3332, 38)
✅ Loaded Probe1 ADC1 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.1332
Min_H: Importance 0.0077
Mean_H: Importance 0.1297
Mean Deviation_H: Importance 0.0056
RMS_H: Importance 0.2115
Peak-to-Peak_H: Importance 0.0100
Centroid_H: Importance 0.0061
Entropy_H: Importance 0.0443
Spread_H: Importance 0.0060
Mean_Freq_H: Importance 0.0810
Irregularity_H: Importance 0.0072
Variance_H: Importance 0.0425
Min_L: Importance 0.0276
Mean_L: Importance 0.0888
RMS_L: Importance 0.1151
Skewness_L: Importance 0.0093
Entropy_L: Importance 0.0251
Mean_Freq_L: Importance 0.0112
Variance_L: Importance 0.0249

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0008
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      333

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [73]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (2481, 38)
✅ Loaded Probe3 ADC1 CH2: (2481, 38)
✅ Loaded Probe1 ADC1 CH1: (2481, 38)
✅ Loaded Probe1 ADC1 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0636
Min_H: Importance 0.0150
Mean_H: Importance 0.0941
Mean Deviation_H: Importance 0.0277
RMS_H: Importance 0.1154
Peak-to-Peak_H: Importance 0.0033
Centroid_H: Importance 0.0002
Entropy_H: Importance 0.0142
Mean_Freq_H: Importance 0.0588
Variance_H: Importance 0.0632
Max_L: Importance 0.0138
Min_L: Importance 0.0464
Mean_L: Importance 0.1817
RMS_L: Importance 0.1361
Centroid_L: Importance 0.0001
Entropy_L: Importance 0.0457
Spread_L: Importance 0.0000
Mean_Freq_L: Importance 0.0130
Variance_L: Importance 0.1077

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0005
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
       

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [74]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (1998, 38)
✅ Loaded Probe3 ADC1 CH2: (1998, 38)
✅ Loaded Probe1 ADC1 CH1: (1998, 38)
✅ Loaded Probe1 ADC1 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0636
Min_H: Importance 0.0094
Mean_H: Importance 0.0944
Mean Deviation_H: Importance 0.0007
RMS_H: Importance 0.1411
Peak-to-Peak_H: Importance 0.0051
Entropy_H: Importance 0.0147
Mean_Freq_H: Importance 0.0505
Variance_H: Importance 0.0836
Max_L: Importance 0.0092
Min_L: Importance 0.0402
Mean_L: Importance 0.1728
Std_L: Importance 0.0007
Mean Deviation_L: Importance 0.0123
RMS_L: Importance 0.1333
Kurtosis_L: Importance 0.0020
Entropy_L: Importance 0.0449
Mean_Freq_L: Importance 0.0113
Variance_L: Importance 0.1101

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
    

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [75]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (998, 38)
✅ Loaded Probe3 ADC1 CH2: (998, 38)
✅ Loaded Probe1 ADC1 CH1: (998, 38)
✅ Loaded Probe1 ADC1 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.0771
Min_H: Importance 0.0103
Mean_H: Importance 0.1020
Mean Deviation_H: Importance 0.0307
RMS_H: Importance 0.1455
Skewness_H: Importance 0.0126
Centroid_H: Importance 0.0021
Entropy_H: Importance 0.0105
Variance_H: Importance 0.0713
Max_L: Importance 0.0098
Min_L: Importance 0.0197
Mean_L: Importance 0.1949
Mean Deviation_L: Importance 0.0219
RMS_L: Importance 0.1386
Kurtosis_L: Importance 0.0161
Peak-to-Peak_L: Importance 0.0004
Skewness_Freq_L: Importance 0.0007
Mean_Freq_L: Importance 0.0024
Variance_L: Importance 0.1334

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0009
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       99

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [76]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (665, 38)
✅ Loaded Probe3 ADC1 CH2: (665, 38)
✅ Loaded Probe1 ADC1 CH1: (665, 38)
✅ Loaded Probe1 ADC1 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1009
Mean_H: Importance 0.0807
Mean Deviation_H: Importance 0.0430
RMS_H: Importance 0.1256
Entropy_H: Importance 0.0165
Mean_Freq_H: Importance 0.0323
Kurtosis_Freq_H: Importance 0.0108
Variance_H: Importance 0.0206
Min_L: Importance 0.0394
Mean_L: Importance 0.1805
Std_L: Importance 0.0005
Mean Deviation_L: Importance 0.1077
RMS_L: Importance 0.1231
Peak-to-Peak_L: Importance 0.0001
Entropy_L: Importance 0.0189
Skewness_Freq_L: Importance 0.0000
Mean_Freq_L: Importance 0.0155
Variance_L: Importance 0.0836

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0026
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2      

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [77]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (498, 38)
✅ Loaded Probe3 ADC1 CH2: (498, 38)
✅ Loaded Probe1 ADC1 CH1: (498, 38)
✅ Loaded Probe1 ADC1 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0601
RMS_H: Importance 0.1042
Entropy_H: Importance 0.0600
Mean_Freq_H: Importance 0.0186
Irregularity_H: Importance 0.0116
Variance_H: Importance 0.0200
Min_L: Importance 0.0858
Mean_L: Importance 0.1800
Std_L: Importance 0.0208
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1007
Kurtosis_L: Importance 0.0031
Centroid_L: Importance 0.0000
Entropy_L: Importance 0.0197
Mean_Freq_L: Importance 0.0154
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0052
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00     

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [78]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (398, 38)
✅ Loaded Probe3 ADC1 CH2: (398, 38)
✅ Loaded Probe1 ADC1 CH1: (398, 38)
✅ Loaded Probe1 ADC1 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.1000
Min_H: Importance 0.0014
Mean_H: Importance 0.0800
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.0824
Entropy_H: Importance 0.1000
Mean_Freq_H: Importance 0.0600
Kurtosis_Freq_H: Importance 0.0163
Mean_L: Importance 0.1000
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0133
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       398

    accuracy                           1.00       796
   macro avg  

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [79]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (332, 38)
✅ Loaded Probe3 ADC1 CH2: (331, 38)
✅ Loaded Probe1 ADC1 CH1: (332, 38)
✅ Loaded Probe1 ADC1 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.0800
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0400
Irregularity_H: Importance 0.0200
Min_L: Importance 0.0400
Mean_L: Importance 0.1000
Std_L: Importance 0.0600
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0600
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0800
Irregularity_L: Importance 0.0600
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0500
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       332
         CH2       1.00      1.00      1.00       332

    

### Training **Probe 3** data and Testing on **Probe 2**
To test Probe 3 with changing probe to Probes 2 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [80]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (9998, 38)
✅ Loaded Probe3 ADC1 CH2: (9998, 38)
✅ Loaded Probe2 ADC1 CH1: (9998, 38)
✅ Loaded Probe2 ADC1 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0062
Min_H: Importance 0.0174
Mean_H: Importance 0.0772
Mean Deviation_H: Importance 0.0048
RMS_H: Importance 0.1177
Skewness_H: Importance 0.0029
Entropy_H: Importance 0.0231
Spread_H: Importance 0.0037
Mean_Freq_H: Importance 0.0421
Irregularity_H: Importance 0.0030
Variance_H: Importance 0.0457
Max_L: Importance 0.0028
Min_L: Importance 0.1182
Mean_L: Importance 0.1648
RMS_L: Importance 0.2142
Skewness_L: Importance 0.0026
Entropy_L: Importance 0.0385
Mean_Freq_L: Importance 0.0161
Variance_L: Importance 0.0790

Model Performance After Feature Selection:
Test Accuracy: 0.9643
Balanced Accuracy: 0.9643
MCC: 0.9292
Log Loss: 0.0969
F1 Score: 0.9649
Recall: 0.9825
Precision: 0.9480
              precision    recall  f1-score   support

         CH1       0.98      0.95      0.96      9998
       

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [81]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (4998, 38)
✅ Loaded Probe3 ADC1 CH2: (4998, 38)
✅ Loaded Probe2 ADC1 CH1: (4998, 38)
✅ Loaded Probe2 ADC1 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1215
Min_H: Importance 0.0255
Mean_H: Importance 0.1520
Mean Deviation_H: Importance 0.0050
RMS_H: Importance 0.2228
Entropy_H: Importance 0.0175
Mean_Freq_H: Importance 0.0771
Variance_H: Importance 0.0366
Max_L: Importance 0.0011
Min_L: Importance 0.0233
Mean_L: Importance 0.1243
Std_L: Importance 0.0013
RMS_L: Importance 0.1203
Skewness_L: Importance 0.0012
Kurtosis_L: Importance 0.0030
Entropy_L: Importance 0.0282
Spread_L: Importance 0.0007
Mean_Freq_L: Importance 0.0117
Variance_L: Importance 0.0252

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0010
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         CH2    

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [82]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (3331, 38)
✅ Loaded Probe3 ADC1 CH2: (3332, 38)
✅ Loaded Probe2 ADC1 CH1: (3332, 38)
✅ Loaded Probe2 ADC1 CH2: (3331, 38)

Top Important Features:
Max_H: Importance 0.1332
Min_H: Importance 0.0077
Mean_H: Importance 0.1297
Mean Deviation_H: Importance 0.0056
RMS_H: Importance 0.2115
Peak-to-Peak_H: Importance 0.0100
Centroid_H: Importance 0.0061
Entropy_H: Importance 0.0443
Spread_H: Importance 0.0060
Mean_Freq_H: Importance 0.0810
Irregularity_H: Importance 0.0072
Variance_H: Importance 0.0425
Min_L: Importance 0.0276
Mean_L: Importance 0.0888
RMS_L: Importance 0.1151
Skewness_L: Importance 0.0093
Entropy_L: Importance 0.0251
Mean_Freq_L: Importance 0.0112
Variance_L: Importance 0.0249

Model Performance After Feature Selection:
Test Accuracy: 0.9977
Balanced Accuracy: 0.9977
MCC: 0.9955
Log Loss: 0.0145
F1 Score: 0.9978
Recall: 1.0000
Precision: 0.9955
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      333

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [83]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (2481, 38)
✅ Loaded Probe3 ADC1 CH2: (2481, 38)
✅ Loaded Probe2 ADC1 CH1: (2481, 38)
✅ Loaded Probe2 ADC1 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0636
Min_H: Importance 0.0150
Mean_H: Importance 0.0941
Mean Deviation_H: Importance 0.0277
RMS_H: Importance 0.1154
Peak-to-Peak_H: Importance 0.0033
Centroid_H: Importance 0.0002
Entropy_H: Importance 0.0142
Mean_Freq_H: Importance 0.0588
Variance_H: Importance 0.0632
Max_L: Importance 0.0138
Min_L: Importance 0.0464
Mean_L: Importance 0.1817
RMS_L: Importance 0.1361
Centroid_L: Importance 0.0001
Entropy_L: Importance 0.0457
Spread_L: Importance 0.0000
Mean_Freq_L: Importance 0.0130
Variance_L: Importance 0.1077

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0010
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
       

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [84]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (1998, 38)
✅ Loaded Probe3 ADC1 CH2: (1998, 38)
✅ Loaded Probe2 ADC1 CH1: (1998, 38)
✅ Loaded Probe2 ADC1 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0636
Min_H: Importance 0.0094
Mean_H: Importance 0.0944
Mean Deviation_H: Importance 0.0007
RMS_H: Importance 0.1411
Peak-to-Peak_H: Importance 0.0051
Entropy_H: Importance 0.0147
Mean_Freq_H: Importance 0.0505
Variance_H: Importance 0.0836
Max_L: Importance 0.0092
Min_L: Importance 0.0402
Mean_L: Importance 0.1728
Std_L: Importance 0.0007
Mean Deviation_L: Importance 0.0123
RMS_L: Importance 0.1333
Kurtosis_L: Importance 0.0020
Entropy_L: Importance 0.0449
Mean_Freq_L: Importance 0.0113
Variance_L: Importance 0.1101

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0004
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
    

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [85]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (998, 38)
✅ Loaded Probe3 ADC1 CH2: (998, 38)
✅ Loaded Probe2 ADC1 CH1: (998, 38)
✅ Loaded Probe2 ADC1 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.0771
Min_H: Importance 0.0103
Mean_H: Importance 0.1020
Mean Deviation_H: Importance 0.0307
RMS_H: Importance 0.1455
Skewness_H: Importance 0.0126
Centroid_H: Importance 0.0021
Entropy_H: Importance 0.0105
Variance_H: Importance 0.0713
Max_L: Importance 0.0098
Min_L: Importance 0.0197
Mean_L: Importance 0.1949
Mean Deviation_L: Importance 0.0219
RMS_L: Importance 0.1386
Kurtosis_L: Importance 0.0161
Peak-to-Peak_L: Importance 0.0004
Skewness_Freq_L: Importance 0.0007
Mean_Freq_L: Importance 0.0024
Variance_L: Importance 0.1334

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0005
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       99

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [86]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (665, 38)
✅ Loaded Probe3 ADC1 CH2: (665, 38)
✅ Loaded Probe2 ADC1 CH1: (665, 38)
✅ Loaded Probe2 ADC1 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1009
Mean_H: Importance 0.0807
Mean Deviation_H: Importance 0.0430
RMS_H: Importance 0.1256
Entropy_H: Importance 0.0165
Mean_Freq_H: Importance 0.0323
Kurtosis_Freq_H: Importance 0.0108
Variance_H: Importance 0.0206
Min_L: Importance 0.0394
Mean_L: Importance 0.1805
Std_L: Importance 0.0005
Mean Deviation_L: Importance 0.1077
RMS_L: Importance 0.1231
Peak-to-Peak_L: Importance 0.0001
Entropy_L: Importance 0.0189
Skewness_Freq_L: Importance 0.0000
Mean_Freq_L: Importance 0.0155
Variance_L: Importance 0.0836

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0008
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2      

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [87]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (498, 38)
✅ Loaded Probe3 ADC1 CH2: (498, 38)
✅ Loaded Probe2 ADC1 CH1: (498, 38)
✅ Loaded Probe2 ADC1 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0601
RMS_H: Importance 0.1042
Entropy_H: Importance 0.0600
Mean_Freq_H: Importance 0.0186
Irregularity_H: Importance 0.0116
Variance_H: Importance 0.0200
Min_L: Importance 0.0858
Mean_L: Importance 0.1800
Std_L: Importance 0.0208
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1007
Kurtosis_L: Importance 0.0031
Centroid_L: Importance 0.0000
Entropy_L: Importance 0.0197
Mean_Freq_L: Importance 0.0154
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0015
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00     

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [88]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (398, 38)
✅ Loaded Probe3 ADC1 CH2: (398, 38)
✅ Loaded Probe2 ADC1 CH1: (398, 38)
✅ Loaded Probe2 ADC1 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.1000
Min_H: Importance 0.0014
Mean_H: Importance 0.0800
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.0824
Entropy_H: Importance 0.1000
Mean_Freq_H: Importance 0.0600
Kurtosis_Freq_H: Importance 0.0163
Mean_L: Importance 0.1000
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       398

    accuracy                           1.00       796
   macro avg  

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [89]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC1 CH1: (332, 38)
✅ Loaded Probe3 ADC1 CH2: (331, 38)
✅ Loaded Probe2 ADC1 CH1: (332, 38)
✅ Loaded Probe2 ADC1 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.0800
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0400
Irregularity_H: Importance 0.0200
Min_L: Importance 0.0400
Mean_L: Importance 0.1000
Std_L: Importance 0.0600
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0600
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0800
Irregularity_L: Importance 0.0600
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0675
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       332
         CH2       1.00      1.00      1.00       332

    

# ADC2

## **Testing of Probe 1**
To conduct experiments, **ADC2** (data acquisition device) is used consistently throughout. **Probe 1** is kept constant, and the machine learning model is **always trained on Probe 1**. The model is then tested on **Probe 2** and **Probe 3** to evaluate its generalization.

The **sampling rate** is varied across different tests, ranging from **10 MSPS to 300 MSPS**.

### Training **Probe 1** data and Testing on **Probe 2** data
To test Probe 1 with changing probe to Probe 2 and Probe 3 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [91]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (9998, 38)
✅ Loaded Probe1 ADC2 CH2: (9998, 38)
✅ Loaded Probe2 ADC2 CH1: (9998, 38)
✅ Loaded Probe2 ADC2 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.1456
Min_H: Importance 0.0167
Mean_H: Importance 0.0920
Mean Deviation_H: Importance 0.0084
RMS_H: Importance 0.1570
Skewness_H: Importance 0.0075
Kurtosis_H: Importance 0.0072
Entropy_H: Importance 0.1037
Mean_Freq_H: Importance 0.0742
Variance_H: Importance 0.0309
Max_L: Importance 0.0124
Min_L: Importance 0.0557
Mean_L: Importance 0.0467
RMS_L: Importance 0.0719
Skewness_L: Importance 0.0199
Kurtosis_L: Importance 0.0082
Entropy_L: Importance 0.0349
Mean_Freq_L: Importance 0.0153
Variance_L: Importance 0.0319

Model Performance After Feature Selection:
Test Accuracy: 0.9487
Balanced Accuracy: 0.9487
MCC: 0.9017
Log Loss: 0.1354
F1 Score: 0.9511
Recall: 0.9975
Precision: 0.9088
              precision    recall  f1-score   support

         CH1       1.00      0.90      0.95      9998
         

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [92]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (4998, 38)
✅ Loaded Probe1 ADC2 CH2: (4998, 38)
✅ Loaded Probe2 ADC2 CH1: (4998, 38)
✅ Loaded Probe2 ADC2 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.0831
Min_H: Importance 0.0256
Mean_H: Importance 0.1231
Mean Deviation_H: Importance 0.0052
RMS_H: Importance 0.2181
Skewness_H: Importance 0.0008
Entropy_H: Importance 0.0228
Spread_H: Importance 0.0005
Mean_Freq_H: Importance 0.0733
Variance_H: Importance 0.0369
Min_L: Importance 0.0283
Mean_L: Importance 0.1224
Std_L: Importance 0.0017
RMS_L: Importance 0.1415
Kurtosis_L: Importance 0.0016
Entropy_L: Importance 0.0361
Spread_L: Importance 0.0008
Mean_Freq_L: Importance 0.0120
Variance_L: Importance 0.0648

Model Performance After Feature Selection:
Test Accuracy: 0.9897
Balanced Accuracy: 0.9897
MCC: 0.9796
Log Loss: 0.0200
F1 Score: 0.9896
Recall: 0.9794
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       0.98      1.00      0.99      4998
         CH2 

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [93]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (3331, 38)
✅ Loaded Probe1 ADC2 CH2: (3332, 38)
✅ Loaded Probe2 ADC2 CH1: (3331, 38)
✅ Loaded Probe2 ADC2 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.0189
Min_H: Importance 0.0060
Mean_H: Importance 0.0989
Mean Deviation_H: Importance 0.0030
RMS_H: Importance 0.1652
Centroid_H: Importance 0.0021
Entropy_H: Importance 0.0099
Spread_H: Importance 0.0010
Mean_Freq_H: Importance 0.0180
Variance_H: Importance 0.0314
Max_L: Importance 0.0052
Min_L: Importance 0.1486
Mean_L: Importance 0.1112
RMS_L: Importance 0.2474
Peak-to-Peak_L: Importance 0.0012
Entropy_L: Importance 0.0229
Mean_Freq_L: Importance 0.0088
Irregularity_L: Importance 0.0010
Variance_L: Importance 0.0910

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0223
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      3331
   

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [94]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (2481, 38)
✅ Loaded Probe1 ADC2 CH2: (2481, 38)
✅ Loaded Probe2 ADC2 CH1: (2481, 38)
✅ Loaded Probe2 ADC2 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0651
Min_H: Importance 0.0282
Mean_H: Importance 0.0975
Mean Deviation_H: Importance 0.0165
RMS_H: Importance 0.1401
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0282
Spread_H: Importance 0.0001
Mean_Freq_H: Importance 0.0574
Variance_H: Importance 0.0608
Max_L: Importance 0.0136
Min_L: Importance 0.0274
Mean_L: Importance 0.1900
RMS_L: Importance 0.1277
Skewness_L: Importance 0.0002
Entropy_L: Importance 0.0345
Mean_Freq_L: Importance 0.0111
Variance_L: Importance 0.1015

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0018
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      1.00      1.

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [95]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (1998, 38)
✅ Loaded Probe1 ADC2 CH2: (1998, 38)
✅ Loaded Probe2 ADC2 CH1: (1998, 38)
✅ Loaded Probe2 ADC2 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0565
Min_H: Importance 0.0079
Mean_H: Importance 0.0818
Std_H: Importance 0.0270
Mean Deviation_H: Importance 0.0008
RMS_H: Importance 0.1352
Centroid_H: Importance 0.0004
Entropy_H: Importance 0.0402
Mean_Freq_H: Importance 0.0817
Variance_H: Importance 0.0861
Max_L: Importance 0.0042
Min_L: Importance 0.0007
Mean_L: Importance 0.1734
Mean Deviation_L: Importance 0.0106
RMS_L: Importance 0.1227
Skewness_L: Importance 0.0001
Entropy_L: Importance 0.0432
Spread_L: Importance 0.0002
Variance_L: Importance 0.1273

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0034
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
         CH

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [96]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (998, 38)
✅ Loaded Probe1 ADC2 CH2: (998, 38)
✅ Loaded Probe2 ADC2 CH1: (998, 38)
✅ Loaded Probe2 ADC2 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1009
Mean_H: Importance 0.0800
Std_H: Importance 0.0398
Mean Deviation_H: Importance 0.0428
RMS_H: Importance 0.0640
Peak-to-Peak_H: Importance 0.0010
Entropy_H: Importance 0.1402
Spread_H: Importance 0.0399
Mean_Freq_H: Importance 0.1013
Kurtosis_Freq_H: Importance 0.0125
Variance_H: Importance 0.0800
Min_L: Importance 0.0183
Mean_L: Importance 0.1006
RMS_L: Importance 0.0802
Skewness_L: Importance 0.0001
Entropy_L: Importance 0.0379
Mean_Freq_L: Importance 0.0001
Variance_L: Importance 0.0604

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.1017
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         CH2       1.00      1.

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [97]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (665, 38)
✅ Loaded Probe1 ADC2 CH2: (665, 38)
✅ Loaded Probe2 ADC2 CH1: (665, 38)
✅ Loaded Probe2 ADC2 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0800
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0400
RMS_H: Importance 0.0831
Entropy_H: Importance 0.1000
Mean_Freq_H: Importance 0.0800
Kurtosis_Freq_H: Importance 0.0169
Variance_H: Importance 0.0200
Mean_L: Importance 0.1000
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0046
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy                           1.00      1330
   macro avg       1.00      1.00 

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [98]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (498, 38)
✅ Loaded Probe1 ADC2 CH2: (498, 38)
✅ Loaded Probe2 ADC2 CH1: (498, 38)
✅ Loaded Probe2 ADC2 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.1000
Min_H: Importance 0.0002
Mean_H: Importance 0.0804
Std_H: Importance 0.0142
Mean Deviation_H: Importance 0.0400
RMS_H: Importance 0.1212
Entropy_H: Importance 0.0399
Skewness_Freq_H: Importance 0.0000
Mean_Freq_H: Importance 0.0566
Kurtosis_Freq_H: Importance 0.0142
Variance_H: Importance 0.0217
Min_L: Importance 0.0252
Mean_L: Importance 0.1820
Std_L: Importance 0.0007
Mean Deviation_L: Importance 0.1047
RMS_L: Importance 0.1006
Entropy_L: Importance 0.0185
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0077
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [99]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (398, 38)
✅ Loaded Probe1 ADC2 CH2: (398, 38)
✅ Loaded Probe2 ADC2 CH1: (398, 38)
✅ Loaded Probe2 ADC2 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.0615
Peak-to-Peak_H: Importance 0.0200
Entropy_H: Importance 0.0800
Mean_Freq_H: Importance 0.0600
Irregularity_H: Importance 0.0185
Mean_L: Importance 0.1000
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0400
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0111
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       398

    accuracy                 

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [100]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (332, 38)
✅ Loaded Probe1 ADC2 CH2: (332, 38)
✅ Loaded Probe2 ADC2 CH1: (331, 38)
✅ Loaded Probe2 ADC2 CH2: (331, 38)

Top Important Features:
Max_H: Importance 0.1022
Mean_H: Importance 0.0804
Std_H: Importance 0.0008
Mean Deviation_H: Importance 0.0001
RMS_H: Importance 0.1231
Entropy_H: Importance 0.0188
Skewness_Freq_H: Importance 0.0000
Mean_Freq_H: Importance 0.0021
Irregularity_H: Importance 0.0344
Variance_H: Importance 0.0200
Min_L: Importance 0.0205
Mean_L: Importance 0.1809
Std_L: Importance 0.0606
Mean Deviation_L: Importance 0.1000
RMS_L: Importance 0.1215
Entropy_L: Importance 0.0196
Mean_Freq_L: Importance 0.0171
Kurtosis_Freq_L: Importance 0.0170
Variance_L: Importance 0.0808

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0394
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00    

### Training **Probe 1** data and Testing on **Probe 3** data
To test Probe 1 with changing probe to Probe 3 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.

In [101]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (9998, 38)
✅ Loaded Probe1 ADC2 CH2: (9998, 38)
✅ Loaded Probe3 ADC2 CH1: (9998, 38)
✅ Loaded Probe3 ADC2 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.1456
Min_H: Importance 0.0167
Mean_H: Importance 0.0920
Mean Deviation_H: Importance 0.0084
RMS_H: Importance 0.1570
Skewness_H: Importance 0.0075
Kurtosis_H: Importance 0.0072
Entropy_H: Importance 0.1037
Mean_Freq_H: Importance 0.0742
Variance_H: Importance 0.0309
Max_L: Importance 0.0124
Min_L: Importance 0.0557
Mean_L: Importance 0.0467
RMS_L: Importance 0.0719
Skewness_L: Importance 0.0199
Kurtosis_L: Importance 0.0082
Entropy_L: Importance 0.0349
Mean_Freq_L: Importance 0.0153
Variance_L: Importance 0.0319

Model Performance After Feature Selection:
Test Accuracy: 0.9954
Balanced Accuracy: 0.9954
MCC: 0.9908
Log Loss: 0.0212
F1 Score: 0.9954
Recall: 0.9935
Precision: 0.9973
              precision    recall  f1-score   support

         CH1       0.99      1.00      1.00      9998
         

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [102]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (4998, 38)
✅ Loaded Probe1 ADC2 CH2: (4998, 38)
✅ Loaded Probe3 ADC2 CH1: (4998, 38)
✅ Loaded Probe3 ADC2 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.0831
Min_H: Importance 0.0256
Mean_H: Importance 0.1231
Mean Deviation_H: Importance 0.0052
RMS_H: Importance 0.2181
Skewness_H: Importance 0.0008
Entropy_H: Importance 0.0228
Spread_H: Importance 0.0005
Mean_Freq_H: Importance 0.0733
Variance_H: Importance 0.0369
Min_L: Importance 0.0283
Mean_L: Importance 0.1224
Std_L: Importance 0.0017
RMS_L: Importance 0.1415
Kurtosis_L: Importance 0.0016
Entropy_L: Importance 0.0361
Spread_L: Importance 0.0008
Mean_Freq_L: Importance 0.0120
Variance_L: Importance 0.0648

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0002
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         CH2 

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [103]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (3331, 38)
✅ Loaded Probe1 ADC2 CH2: (3332, 38)
✅ Loaded Probe3 ADC2 CH1: (3331, 38)
✅ Loaded Probe3 ADC2 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.0189
Min_H: Importance 0.0060
Mean_H: Importance 0.0989
Mean Deviation_H: Importance 0.0030
RMS_H: Importance 0.1652
Centroid_H: Importance 0.0021
Entropy_H: Importance 0.0099
Spread_H: Importance 0.0010
Mean_Freq_H: Importance 0.0180
Variance_H: Importance 0.0314
Max_L: Importance 0.0052
Min_L: Importance 0.1486
Mean_L: Importance 0.1112
RMS_L: Importance 0.2474
Peak-to-Peak_L: Importance 0.0012
Entropy_L: Importance 0.0229
Mean_Freq_L: Importance 0.0088
Irregularity_L: Importance 0.0010
Variance_L: Importance 0.0910

Model Performance After Feature Selection:
Test Accuracy: 0.9868
Balanced Accuracy: 0.9868
MCC: 0.9739
Log Loss: 0.0370
F1 Score: 0.9870
Recall: 1.0000
Precision: 0.9743
              precision    recall  f1-score   support

         CH1       1.00      0.97      0.99      3331
   

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [104]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (2481, 38)
✅ Loaded Probe1 ADC2 CH2: (2481, 38)
✅ Loaded Probe3 ADC2 CH1: (2481, 38)
✅ Loaded Probe3 ADC2 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0651
Min_H: Importance 0.0282
Mean_H: Importance 0.0975
Mean Deviation_H: Importance 0.0165
RMS_H: Importance 0.1401
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0282
Spread_H: Importance 0.0001
Mean_Freq_H: Importance 0.0574
Variance_H: Importance 0.0608
Max_L: Importance 0.0136
Min_L: Importance 0.0274
Mean_L: Importance 0.1900
RMS_L: Importance 0.1277
Skewness_L: Importance 0.0002
Entropy_L: Importance 0.0345
Mean_Freq_L: Importance 0.0111
Variance_L: Importance 0.1015

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0015
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      1.00      1.

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [105]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (1998, 38)
✅ Loaded Probe1 ADC2 CH2: (1998, 38)
✅ Loaded Probe3 ADC2 CH1: (1998, 38)
✅ Loaded Probe3 ADC2 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0565
Min_H: Importance 0.0079
Mean_H: Importance 0.0818
Std_H: Importance 0.0270
Mean Deviation_H: Importance 0.0008
RMS_H: Importance 0.1352
Centroid_H: Importance 0.0004
Entropy_H: Importance 0.0402
Mean_Freq_H: Importance 0.0817
Variance_H: Importance 0.0861
Max_L: Importance 0.0042
Min_L: Importance 0.0007
Mean_L: Importance 0.1734
Mean Deviation_L: Importance 0.0106
RMS_L: Importance 0.1227
Skewness_L: Importance 0.0001
Entropy_L: Importance 0.0432
Spread_L: Importance 0.0002
Variance_L: Importance 0.1273

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
         CH

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [106]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (998, 38)
✅ Loaded Probe1 ADC2 CH2: (998, 38)
✅ Loaded Probe3 ADC2 CH1: (998, 38)
✅ Loaded Probe3 ADC2 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1009
Mean_H: Importance 0.0800
Std_H: Importance 0.0398
Mean Deviation_H: Importance 0.0428
RMS_H: Importance 0.0640
Peak-to-Peak_H: Importance 0.0010
Entropy_H: Importance 0.1402
Spread_H: Importance 0.0399
Mean_Freq_H: Importance 0.1013
Kurtosis_Freq_H: Importance 0.0125
Variance_H: Importance 0.0800
Min_L: Importance 0.0183
Mean_L: Importance 0.1006
RMS_L: Importance 0.0802
Skewness_L: Importance 0.0001
Entropy_L: Importance 0.0379
Mean_Freq_L: Importance 0.0001
Variance_L: Importance 0.0604

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0256
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         CH2       1.00      1.

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [107]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (665, 38)
✅ Loaded Probe1 ADC2 CH2: (665, 38)
✅ Loaded Probe3 ADC2 CH1: (665, 38)
✅ Loaded Probe3 ADC2 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0800
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0400
RMS_H: Importance 0.0831
Entropy_H: Importance 0.1000
Mean_Freq_H: Importance 0.0800
Kurtosis_Freq_H: Importance 0.0169
Variance_H: Importance 0.0200
Mean_L: Importance 0.1000
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0692
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy                           1.00      1330
   macro avg       1.00      1.00 

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [108]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (498, 38)
✅ Loaded Probe1 ADC2 CH2: (498, 38)
✅ Loaded Probe3 ADC2 CH1: (498, 38)
✅ Loaded Probe3 ADC2 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.1000
Min_H: Importance 0.0002
Mean_H: Importance 0.0804
Std_H: Importance 0.0142
Mean Deviation_H: Importance 0.0400
RMS_H: Importance 0.1212
Entropy_H: Importance 0.0399
Skewness_Freq_H: Importance 0.0000
Mean_Freq_H: Importance 0.0566
Kurtosis_Freq_H: Importance 0.0142
Variance_H: Importance 0.0217
Min_L: Importance 0.0252
Mean_L: Importance 0.1820
Std_L: Importance 0.0007
Mean Deviation_L: Importance 0.1047
RMS_L: Importance 0.1006
Entropy_L: Importance 0.0185
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0112
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [109]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (398, 38)
✅ Loaded Probe1 ADC2 CH2: (398, 38)
✅ Loaded Probe3 ADC2 CH1: (398, 38)
✅ Loaded Probe3 ADC2 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.0615
Peak-to-Peak_H: Importance 0.0200
Entropy_H: Importance 0.0800
Mean_Freq_H: Importance 0.0600
Irregularity_H: Importance 0.0185
Mean_L: Importance 0.1000
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0400
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0416
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       398

    accuracy                 

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [110]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC2 CH1: (332, 38)
✅ Loaded Probe1 ADC2 CH2: (332, 38)
✅ Loaded Probe3 ADC2 CH1: (331, 38)
✅ Loaded Probe3 ADC2 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.1022
Mean_H: Importance 0.0804
Std_H: Importance 0.0008
Mean Deviation_H: Importance 0.0001
RMS_H: Importance 0.1231
Entropy_H: Importance 0.0188
Skewness_Freq_H: Importance 0.0000
Mean_Freq_H: Importance 0.0021
Irregularity_H: Importance 0.0344
Variance_H: Importance 0.0200
Min_L: Importance 0.0205
Mean_L: Importance 0.1809
Std_L: Importance 0.0606
Mean Deviation_L: Importance 0.1000
RMS_L: Importance 0.1215
Entropy_L: Importance 0.0196
Mean_Freq_L: Importance 0.0171
Kurtosis_Freq_L: Importance 0.0170
Variance_L: Importance 0.0808

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0039
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00    

## **Testing of Probe 2**
To conduct experiments, **ADC2** (data acquisition device) is used consistently throughout. **Probe 2** is kept constant, and the machine learning model is **always trained on Probe 2**. The model is then tested on **Probe 3** and **Probe 1** to evaluate its generalization.

The **sampling rate** is varied across different tests, ranging from **10 MSPS to 300 MSPS**.


### Training **Probe 2** data and Testing on **Probe 3**
To test Probe 2 with changing probe to Probes 3 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [111]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (9998, 38)
✅ Loaded Probe2 ADC2 CH2: (9998, 38)
✅ Loaded Probe3 ADC2 CH1: (9998, 38)
✅ Loaded Probe3 ADC2 CH2: (9998, 38)

Top Important Features:
Min_H: Importance 0.0161
Mean_H: Importance 0.0768
RMS_H: Importance 0.1119
Skewness_H: Importance 0.0067
Entropy_H: Importance 0.0116
Mean_Freq_H: Importance 0.0421
Variance_H: Importance 0.0360
Max_L: Importance 0.0059
Min_L: Importance 0.1096
Mean_L: Importance 0.1263
Std_L: Importance 0.0059
RMS_L: Importance 0.2135
Skewness_L: Importance 0.0179
Peak-to-Peak_L: Importance 0.0129
Entropy_L: Importance 0.0403
Spread_L: Importance 0.0075
Mean_Freq_L: Importance 0.0380
Irregularity_L: Importance 0.0114
Variance_L: Importance 0.0734

Model Performance After Feature Selection:
Test Accuracy: 0.9256
Balanced Accuracy: 0.9256
MCC: 0.8528
Log Loss: 0.1925
F1 Score: 0.9278
Recall: 0.9562
Precision: 0.9010
              precision    recall  f1-score   support

         CH1       0.95      0.89      0.92      9998
         

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [112]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (4998, 38)
✅ Loaded Probe2 ADC2 CH2: (4998, 38)
✅ Loaded Probe3 ADC2 CH1: (4998, 38)
✅ Loaded Probe3 ADC2 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1139
Min_H: Importance 0.0350
Mean_H: Importance 0.0743
Mean Deviation_H: Importance 0.0067
RMS_H: Importance 0.1520
Peak-to-Peak_H: Importance 0.0048
Centroid_H: Importance 0.0013
Entropy_H: Importance 0.0346
Spread_H: Importance 0.0016
Mean_Freq_H: Importance 0.0758
Variance_H: Importance 0.0625
Max_L: Importance 0.0140
Min_L: Importance 0.0245
Mean_L: Importance 0.1090
RMS_L: Importance 0.2406
Entropy_L: Importance 0.0278
Skewness_Freq_L: Importance 0.0013
Mean_Freq_L: Importance 0.0078
Variance_L: Importance 0.0092

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0004
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
  

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [113]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (3331, 38)
✅ Loaded Probe2 ADC2 CH2: (3332, 38)
✅ Loaded Probe3 ADC2 CH1: (3331, 38)
✅ Loaded Probe3 ADC2 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.1627
Min_H: Importance 0.0243
Mean_H: Importance 0.0740
Mean Deviation_H: Importance 0.0027
RMS_H: Importance 0.1712
Entropy_H: Importance 0.0154
Spread_H: Importance 0.0019
Skewness_Freq_H: Importance 0.0019
Mean_Freq_H: Importance 0.0451
Variance_H: Importance 0.0392
Min_L: Importance 0.0149
Mean_L: Importance 0.1459
RMS_L: Importance 0.2334
Skewness_L: Importance 0.0181
Kurtosis_L: Importance 0.0035
Peak-to-Peak_L: Importance 0.0027
Entropy_L: Importance 0.0114
Mean_Freq_L: Importance 0.0071
Variance_L: Importance 0.0216

Model Performance After Feature Selection:
Test Accuracy: 0.9935
Balanced Accuracy: 0.9935
MCC: 0.9872
Log Loss: 0.0370
F1 Score: 0.9936
Recall: 1.0000
Precision: 0.9873
              precision    recall  f1-score   support

         CH1       1.00      0.99      0.99      33

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [114]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (2481, 38)
✅ Loaded Probe2 ADC2 CH2: (2481, 38)
✅ Loaded Probe3 ADC2 CH1: (2481, 38)
✅ Loaded Probe3 ADC2 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0663
Min_H: Importance 0.0261
Mean_H: Importance 0.0912
Std_H: Importance 0.0218
Mean Deviation_H: Importance 0.0151
RMS_H: Importance 0.1408
Centroid_H: Importance 0.0027
Entropy_H: Importance 0.0139
Mean_Freq_H: Importance 0.0598
Variance_H: Importance 0.0671
Max_L: Importance 0.0001
Min_L: Importance 0.0154
Mean_L: Importance 0.1973
RMS_L: Importance 0.1267
Entropy_L: Importance 0.0290
Kurtosis_Freq_L: Importance 0.0000
Variance_L: Importance 0.1266

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0011
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      1.00      1.00      2481

    accuracy   

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [115]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (1998, 38)
✅ Loaded Probe2 ADC2 CH2: (1998, 38)
✅ Loaded Probe3 ADC2 CH1: (1998, 38)
✅ Loaded Probe3 ADC2 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0615
Min_H: Importance 0.0399
Mean_H: Importance 0.0919
Mean Deviation_H: Importance 0.0249
RMS_H: Importance 0.1047
Peak-to-Peak_H: Importance 0.0002
Centroid_H: Importance 0.0003
Entropy_H: Importance 0.0158
Mean_Freq_H: Importance 0.0409
Variance_H: Importance 0.0604
Max_L: Importance 0.0273
Min_L: Importance 0.0908
Mean_L: Importance 0.1409
Mean Deviation_L: Importance 0.0036
RMS_L: Importance 0.1265
Kurtosis_L: Importance 0.0036
Entropy_L: Importance 0.0326
Mean_Freq_L: Importance 0.0139
Variance_L: Importance 0.1202

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0111
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [116]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (998, 38)
✅ Loaded Probe2 ADC2 CH2: (998, 38)
✅ Loaded Probe3 ADC2 CH1: (998, 38)
✅ Loaded Probe3 ADC2 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1176
Min_H: Importance 0.0039
Mean_H: Importance 0.1050
Mean Deviation_H: Importance 0.0073
RMS_H: Importance 0.1557
Centroid_H: Importance 0.0071
Entropy_H: Importance 0.0103
Spread_H: Importance 0.0064
Mean_Freq_H: Importance 0.0152
Variance_H: Importance 0.0967
Max_L: Importance 0.0013
Min_L: Importance 0.0797
Mean_L: Importance 0.1486
Mean Deviation_L: Importance 0.0202
RMS_L: Importance 0.1366
Kurtosis_L: Importance 0.0018
Entropy_L: Importance 0.0088
Mean_Freq_L: Importance 0.0069
Variance_L: Importance 0.0689

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [117]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (665, 38)
✅ Loaded Probe2 ADC2 CH2: (665, 38)
✅ Loaded Probe3 ADC2 CH1: (665, 38)
✅ Loaded Probe3 ADC2 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.0800
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0223
RMS_H: Importance 0.0814
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0600
Kurtosis_Freq_H: Importance 0.0163
Min_L: Importance 0.0200
Mean_L: Importance 0.1000
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0800
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 0.9609
Balanced Accuracy: 0.9609
MCC: 0.9246
Log Loss: 0.1764
F1 Score: 0.9624
Recall: 1.0000
Precision: 0.9275
              precision    recall  f1-score   support

         CH1       1.00      0.92      0.96       665
         CH2       0.93      1.00      0.96       665

    accuracy                         

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [118]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (498, 38)
✅ Loaded Probe2 ADC2 CH2: (498, 38)
✅ Loaded Probe3 ADC2 CH1: (498, 38)
✅ Loaded Probe3 ADC2 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0800
Mean_H: Importance 0.0826
Std_H: Importance 0.1000
Mean Deviation_H: Importance 0.0600
RMS_H: Importance 0.0834
Entropy_H: Importance 0.1200
Spread_H: Importance 0.0175
Mean_Freq_H: Importance 0.0399
Kurtosis_Freq_H: Importance 0.0166
Irregularity_H: Importance 0.0001
Variance_H: Importance 0.0200
Min_L: Importance 0.0200
Mean_L: Importance 0.1000
Std_L: Importance 0.0200
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0199
Variance_L: Importance 0.0600

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0177
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [119]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (398, 38)
✅ Loaded Probe2 ADC2 CH2: (398, 38)
✅ Loaded Probe3 ADC2 CH1: (398, 38)
✅ Loaded Probe3 ADC2 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0600
Min_H: Importance 0.0189
Mean_H: Importance 0.0800
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0600
RMS_H: Importance 0.0811
Entropy_H: Importance 0.1000
Mean_Freq_H: Importance 0.0800
Min_L: Importance 0.0400
Mean_L: Importance 0.1200
Std_L: Importance 0.0400
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0200
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 0.9899
Balanced Accuracy: 0.9899
MCC: 0.9801
Log Loss: 0.1417
F1 Score: 0.9900
Recall: 1.0000
Precision: 0.9803
              precision    recall  f1-score   support

         CH1       1.00      0.98      0.99       398
         CH2       0.98      1.00      0.99       398

    accuracy                           0.99       796
   macro avg       0.99 

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [120]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (331, 38)
✅ Loaded Probe2 ADC2 CH2: (331, 38)
✅ Loaded Probe3 ADC2 CH1: (331, 38)
✅ Loaded Probe3 ADC2 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0838
Std_H: Importance 0.0200
Mean Deviation_H: Importance 0.0600
RMS_H: Importance 0.0800
Entropy_H: Importance 0.1000
Skewness_Freq_H: Importance 0.0400
Mean_Freq_H: Importance 0.0800
Kurtosis_Freq_H: Importance 0.0400
Variance_H: Importance 0.0229
Min_L: Importance 0.0600
Mean_L: Importance 0.1400
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0600
Entropy_L: Importance 0.0133
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0406
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       331
         CH2       1.00      1.00      1.00       332

    accuracy                

### Training **Probe 2** data and Testing on **Probe 1**
To test Probe 2 with changing probe to Probes 1 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [121]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (9998, 38)
✅ Loaded Probe2 ADC2 CH2: (9998, 38)
✅ Loaded Probe1 ADC2 CH1: (9998, 38)
✅ Loaded Probe1 ADC2 CH2: (9998, 38)

Top Important Features:
Min_H: Importance 0.0161
Mean_H: Importance 0.0768
RMS_H: Importance 0.1119
Skewness_H: Importance 0.0067
Entropy_H: Importance 0.0116
Mean_Freq_H: Importance 0.0421
Variance_H: Importance 0.0360
Max_L: Importance 0.0059
Min_L: Importance 0.1096
Mean_L: Importance 0.1263
Std_L: Importance 0.0059
RMS_L: Importance 0.2135
Skewness_L: Importance 0.0179
Peak-to-Peak_L: Importance 0.0129
Entropy_L: Importance 0.0403
Spread_L: Importance 0.0075
Mean_Freq_L: Importance 0.0380
Irregularity_L: Importance 0.0114
Variance_L: Importance 0.0734

Model Performance After Feature Selection:
Test Accuracy: 0.8808
Balanced Accuracy: 0.8808
MCC: 0.7684
Log Loss: 0.3835
F1 Score: 0.8882
Recall: 0.9475
Precision: 0.8360
              precision    recall  f1-score   support

         CH1       0.94      0.81      0.87      9998
         

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [122]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (4998, 38)
✅ Loaded Probe2 ADC2 CH2: (4998, 38)
✅ Loaded Probe1 ADC2 CH1: (4998, 38)
✅ Loaded Probe1 ADC2 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1139
Min_H: Importance 0.0350
Mean_H: Importance 0.0743
Mean Deviation_H: Importance 0.0067
RMS_H: Importance 0.1520
Peak-to-Peak_H: Importance 0.0048
Centroid_H: Importance 0.0013
Entropy_H: Importance 0.0346
Spread_H: Importance 0.0016
Mean_Freq_H: Importance 0.0758
Variance_H: Importance 0.0625
Max_L: Importance 0.0140
Min_L: Importance 0.0245
Mean_L: Importance 0.1090
RMS_L: Importance 0.2406
Entropy_L: Importance 0.0278
Skewness_Freq_L: Importance 0.0013
Mean_Freq_L: Importance 0.0078
Variance_L: Importance 0.0092

Model Performance After Feature Selection:
Test Accuracy: 0.9999
Balanced Accuracy: 0.9999
MCC: 0.9998
Log Loss: 0.0035
F1 Score: 0.9999
Recall: 1.0000
Precision: 0.9998
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
  

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [124]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (3331, 38)
✅ Loaded Probe2 ADC2 CH2: (3332, 38)
✅ Loaded Probe1 ADC2 CH1: (3331, 38)
✅ Loaded Probe1 ADC2 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.1627
Min_H: Importance 0.0243
Mean_H: Importance 0.0740
Mean Deviation_H: Importance 0.0027
RMS_H: Importance 0.1712
Entropy_H: Importance 0.0154
Spread_H: Importance 0.0019
Skewness_Freq_H: Importance 0.0019
Mean_Freq_H: Importance 0.0451
Variance_H: Importance 0.0392
Min_L: Importance 0.0149
Mean_L: Importance 0.1459
RMS_L: Importance 0.2334
Skewness_L: Importance 0.0181
Kurtosis_L: Importance 0.0035
Peak-to-Peak_L: Importance 0.0027
Entropy_L: Importance 0.0114
Mean_Freq_L: Importance 0.0071
Variance_L: Importance 0.0216

Model Performance After Feature Selection:
Test Accuracy: 0.9920
Balanced Accuracy: 0.9920
MCC: 0.9842
Log Loss: 0.0443
F1 Score: 0.9920
Recall: 0.9847
Precision: 0.9994
              precision    recall  f1-score   support

         CH1       0.98      1.00      0.99      33

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [125]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (2481, 38)
✅ Loaded Probe2 ADC2 CH2: (2481, 38)
✅ Loaded Probe1 ADC2 CH1: (2481, 38)
✅ Loaded Probe1 ADC2 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0663
Min_H: Importance 0.0261
Mean_H: Importance 0.0912
Std_H: Importance 0.0218
Mean Deviation_H: Importance 0.0151
RMS_H: Importance 0.1408
Centroid_H: Importance 0.0027
Entropy_H: Importance 0.0139
Mean_Freq_H: Importance 0.0598
Variance_H: Importance 0.0671
Max_L: Importance 0.0001
Min_L: Importance 0.0154
Mean_L: Importance 0.1973
RMS_L: Importance 0.1267
Entropy_L: Importance 0.0290
Kurtosis_Freq_L: Importance 0.0000
Variance_L: Importance 0.1266

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0013
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      1.00      1.00      2481

    accuracy   

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [126]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (1998, 38)
✅ Loaded Probe2 ADC2 CH2: (1998, 38)
✅ Loaded Probe1 ADC2 CH1: (1998, 38)
✅ Loaded Probe1 ADC2 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0615
Min_H: Importance 0.0399
Mean_H: Importance 0.0919
Mean Deviation_H: Importance 0.0249
RMS_H: Importance 0.1047
Peak-to-Peak_H: Importance 0.0002
Centroid_H: Importance 0.0003
Entropy_H: Importance 0.0158
Mean_Freq_H: Importance 0.0409
Variance_H: Importance 0.0604
Max_L: Importance 0.0273
Min_L: Importance 0.0908
Mean_L: Importance 0.1409
Mean Deviation_L: Importance 0.0036
RMS_L: Importance 0.1265
Kurtosis_L: Importance 0.0036
Entropy_L: Importance 0.0326
Mean_Freq_L: Importance 0.0139
Variance_L: Importance 0.1202

Model Performance After Feature Selection:
Test Accuracy: 0.9997
Balanced Accuracy: 0.9997
MCC: 0.9995
Log Loss: 0.0119
F1 Score: 0.9997
Recall: 0.9995
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [127]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (998, 38)
✅ Loaded Probe2 ADC2 CH2: (998, 38)
✅ Loaded Probe1 ADC2 CH1: (998, 38)
✅ Loaded Probe1 ADC2 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1176
Min_H: Importance 0.0039
Mean_H: Importance 0.1050
Mean Deviation_H: Importance 0.0073
RMS_H: Importance 0.1557
Centroid_H: Importance 0.0071
Entropy_H: Importance 0.0103
Spread_H: Importance 0.0064
Mean_Freq_H: Importance 0.0152
Variance_H: Importance 0.0967
Max_L: Importance 0.0013
Min_L: Importance 0.0797
Mean_L: Importance 0.1486
Mean Deviation_L: Importance 0.0202
RMS_L: Importance 0.1366
Kurtosis_L: Importance 0.0018
Entropy_L: Importance 0.0088
Mean_Freq_L: Importance 0.0069
Variance_L: Importance 0.0689

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [128]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (665, 38)
✅ Loaded Probe2 ADC2 CH2: (665, 38)
✅ Loaded Probe1 ADC2 CH1: (665, 38)
✅ Loaded Probe1 ADC2 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.0800
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0223
RMS_H: Importance 0.0814
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0600
Kurtosis_Freq_H: Importance 0.0163
Min_L: Importance 0.0200
Mean_L: Importance 0.1000
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0800
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0306
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy                         

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [129]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (498, 38)
✅ Loaded Probe2 ADC2 CH2: (498, 38)
✅ Loaded Probe1 ADC2 CH1: (498, 38)
✅ Loaded Probe1 ADC2 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0800
Mean_H: Importance 0.0826
Std_H: Importance 0.1000
Mean Deviation_H: Importance 0.0600
RMS_H: Importance 0.0834
Entropy_H: Importance 0.1200
Spread_H: Importance 0.0175
Mean_Freq_H: Importance 0.0399
Kurtosis_Freq_H: Importance 0.0166
Irregularity_H: Importance 0.0001
Variance_H: Importance 0.0200
Min_L: Importance 0.0200
Mean_L: Importance 0.1000
Std_L: Importance 0.0200
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0199
Variance_L: Importance 0.0600

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.1023
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [130]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (398, 38)
✅ Loaded Probe2 ADC2 CH2: (398, 38)
✅ Loaded Probe1 ADC2 CH1: (398, 38)
✅ Loaded Probe1 ADC2 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0600
Min_H: Importance 0.0189
Mean_H: Importance 0.0800
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0600
RMS_H: Importance 0.0811
Entropy_H: Importance 0.1000
Mean_Freq_H: Importance 0.0800
Min_L: Importance 0.0400
Mean_L: Importance 0.1200
Std_L: Importance 0.0400
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0200
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0113
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       398

    accuracy                           1.00       796
   macro avg       1.00 

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [131]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC2 CH1: (331, 38)
✅ Loaded Probe2 ADC2 CH2: (331, 38)
✅ Loaded Probe1 ADC2 CH1: (332, 38)
✅ Loaded Probe1 ADC2 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0838
Std_H: Importance 0.0200
Mean Deviation_H: Importance 0.0600
RMS_H: Importance 0.0800
Entropy_H: Importance 0.1000
Skewness_Freq_H: Importance 0.0400
Mean_Freq_H: Importance 0.0800
Kurtosis_Freq_H: Importance 0.0400
Variance_H: Importance 0.0229
Min_L: Importance 0.0600
Mean_L: Importance 0.1400
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0600
Entropy_L: Importance 0.0133
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.1438
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       332
         CH2       1.00      1.00      1.00       332

    accuracy                

## **Testing of Probe 3**
To conduct experiments, **ADC2** (data acquisition device) is used consistently throughout. **Probe 3** is kept constant, and the machine learning model is **always trained on Probe 3**. The model is then tested on **Probe 3** and **Probe 1** to evaluate its generalization.

The **sampling rate** is varied across different tests, ranging from **10 MSPS to 300 MSPS**.

### Training **Probe 3** data and Testing on **Probe 1**
To test Probe 3 with changing probe to Probes 1 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [132]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (9998, 38)
✅ Loaded Probe3 ADC2 CH2: (9998, 38)
✅ Loaded Probe1 ADC2 CH1: (9998, 38)
✅ Loaded Probe1 ADC2 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.1694
Min_H: Importance 0.0161
Mean_H: Importance 0.0882
Mean Deviation_H: Importance 0.0057
RMS_H: Importance 0.1780
Skewness_H: Importance 0.0048
Peak-to-Peak_H: Importance 0.0044
Entropy_H: Importance 0.1111
Spread_H: Importance 0.0043
Mean_Freq_H: Importance 0.0611
Variance_H: Importance 0.0211
Max_L: Importance 0.0040
Min_L: Importance 0.0971
Mean_L: Importance 0.0526
RMS_L: Importance 0.0720
Skewness_L: Importance 0.0055
Entropy_L: Importance 0.0421
Mean_Freq_L: Importance 0.0188
Variance_L: Importance 0.0180

Model Performance After Feature Selection:
Test Accuracy: 0.9697
Balanced Accuracy: 0.9697
MCC: 0.9402
Log Loss: 0.1001
F1 Score: 0.9703
Recall: 0.9898
Precision: 0.9516
              precision    recall  f1-score   support

         CH1       0.99      0.95      0.97      9998
       

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [133]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (4998, 38)
✅ Loaded Probe3 ADC2 CH2: (4998, 38)
✅ Loaded Probe1 ADC2 CH1: (4998, 38)
✅ Loaded Probe1 ADC2 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.0596
Min_H: Importance 0.0351
Mean_H: Importance 0.0945
Mean Deviation_H: Importance 0.0063
RMS_H: Importance 0.1270
Centroid_H: Importance 0.0036
Entropy_H: Importance 0.0172
Spread_H: Importance 0.0005
Mean_Freq_H: Importance 0.0650
Variance_H: Importance 0.0612
Max_L: Importance 0.0004
Min_L: Importance 0.0258
Mean_L: Importance 0.2093
RMS_L: Importance 0.1217
Skewness_L: Importance 0.0000
Kurtosis_L: Importance 0.0065
Entropy_L: Importance 0.0241
Mean_Freq_L: Importance 0.0091
Variance_L: Importance 0.1331

Model Performance After Feature Selection:
Test Accuracy: 0.9724
Balanced Accuracy: 0.9724
MCC: 0.9462
Log Loss: 0.0407
F1 Score: 0.9716
Recall: 0.9448
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       0.95      1.00      0.97      4998
         CH

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [134]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (3331, 38)
✅ Loaded Probe3 ADC2 CH2: (3332, 38)
✅ Loaded Probe1 ADC2 CH1: (3331, 38)
✅ Loaded Probe1 ADC2 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.0055
Min_H: Importance 0.0148
Mean_H: Importance 0.1371
Mean Deviation_H: Importance 0.0022
RMS_H: Importance 0.2265
Skewness_H: Importance 0.0004
Centroid_H: Importance 0.0022
Entropy_H: Importance 0.0114
Spread_H: Importance 0.0003
Mean_Freq_H: Importance 0.0249
Variance_H: Importance 0.0565
Max_L: Importance 0.0005
Min_L: Importance 0.0935
Mean_L: Importance 0.1321
RMS_L: Importance 0.1545
Skewness_L: Importance 0.0003
Entropy_L: Importance 0.0235
Mean_Freq_L: Importance 0.0073
Variance_L: Importance 0.1044

Model Performance After Feature Selection:
Test Accuracy: 0.9668
Balanced Accuracy: 0.9668
MCC: 0.9357
Log Loss: 0.0699
F1 Score: 0.9657
Recall: 0.9337
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       0.94      1.00      0.97      3331
         CH

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [135]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (2481, 38)
✅ Loaded Probe3 ADC2 CH2: (2481, 38)
✅ Loaded Probe1 ADC2 CH1: (2481, 38)
✅ Loaded Probe1 ADC2 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0648
Min_H: Importance 0.0288
Mean_H: Importance 0.0963
Mean Deviation_H: Importance 0.0137
RMS_H: Importance 0.1324
Skewness_H: Importance 0.0001
Entropy_H: Importance 0.0138
Mean_Freq_H: Importance 0.0559
Variance_H: Importance 0.0613
Max_L: Importance 0.0003
Min_L: Importance 0.0224
Mean_L: Importance 0.2112
RMS_L: Importance 0.1272
Skewness_L: Importance 0.0005
Kurtosis_L: Importance 0.0004
Centroid_L: Importance 0.0002
Entropy_L: Importance 0.0292
Mean_Freq_L: Importance 0.0115
Variance_L: Importance 0.1301

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [136]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (1998, 38)
✅ Loaded Probe3 ADC2 CH2: (1998, 38)
✅ Loaded Probe1 ADC2 CH1: (1998, 38)
✅ Loaded Probe1 ADC2 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0599
Min_H: Importance 0.0000
Mean_H: Importance 0.0892
Std_H: Importance 0.0296
Mean Deviation_H: Importance 0.0339
RMS_H: Importance 0.1279
Centroid_H: Importance 0.0005
Entropy_H: Importance 0.0245
Mean_Freq_H: Importance 0.0806
Irregularity_H: Importance 0.0090
Variance_H: Importance 0.0656
Max_L: Importance 0.0022
Min_L: Importance 0.0006
Mean_L: Importance 0.1905
Mean Deviation_L: Importance 0.0232
RMS_L: Importance 0.1217
Entropy_L: Importance 0.0160
Mean_Freq_L: Importance 0.0001
Variance_L: Importance 0.1250

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0006
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
    

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [137]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (998, 38)
✅ Loaded Probe3 ADC2 CH2: (998, 38)
✅ Loaded Probe1 ADC2 CH1: (998, 38)
✅ Loaded Probe1 ADC2 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1024
Mean_H: Importance 0.0815
Std_H: Importance 0.0167
Mean Deviation_H: Importance 0.0179
RMS_H: Importance 0.1285
Entropy_H: Importance 0.0198
Spread_H: Importance 0.0001
Mean_Freq_H: Importance 0.0578
Kurtosis_Freq_H: Importance 0.0084
Variance_H: Importance 0.0604
Min_L: Importance 0.0837
Mean_L: Importance 0.1815
Std_L: Importance 0.0001
RMS_L: Importance 0.1216
Skewness_L: Importance 0.0002
Kurtosis_L: Importance 0.0002
Entropy_L: Importance 0.0373
Mean_Freq_L: Importance 0.0001
Variance_L: Importance 0.0819

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0006
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         C

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [138]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (665, 38)
✅ Loaded Probe3 ADC2 CH2: (665, 38)
✅ Loaded Probe1 ADC2 CH1: (665, 38)
✅ Loaded Probe1 ADC2 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1038
Min_H: Importance 0.0006
Mean_H: Importance 0.1025
Std_H: Importance 0.0110
Mean Deviation_H: Importance 0.0415
RMS_H: Importance 0.1069
Entropy_H: Importance 0.0120
Spread_H: Importance 0.0049
Mean_Freq_H: Importance 0.0215
Variance_H: Importance 0.0687
Min_L: Importance 0.0630
Mean_L: Importance 0.1443
Mean Deviation_L: Importance 0.0921
RMS_L: Importance 0.1328
Skewness_L: Importance 0.0007
Entropy_L: Importance 0.0111
Mean_Freq_L: Importance 0.0071
Irregularity_L: Importance 0.0063
Variance_L: Importance 0.0690

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
     

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [139]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (498, 38)
✅ Loaded Probe3 ADC2 CH2: (498, 38)
✅ Loaded Probe1 ADC2 CH1: (498, 38)
✅ Loaded Probe1 ADC2 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.1004
Mean_H: Importance 0.0807
Std_H: Importance 0.0191
Mean Deviation_H: Importance 0.0641
RMS_H: Importance 0.1223
Centroid_H: Importance 0.0009
Entropy_H: Importance 0.1000
Mean_Freq_H: Importance 0.0390
Kurtosis_Freq_H: Importance 0.0137
Variance_H: Importance 0.0200
Min_L: Importance 0.0392
Mean_L: Importance 0.1410
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.0808
Entropy_L: Importance 0.0189
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0100
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    accuracy                     

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [140]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (398, 38)
✅ Loaded Probe3 ADC2 CH2: (398, 38)
✅ Loaded Probe1 ADC2 CH1: (398, 38)
✅ Loaded Probe1 ADC2 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.1002
Mean_H: Importance 0.0800
Std_H: Importance 0.0191
Mean Deviation_H: Importance 0.0206
RMS_H: Importance 0.1009
Entropy_H: Importance 0.1000
Skewness_Freq_H: Importance 0.0007
Mean_Freq_H: Importance 0.0199
Kurtosis_Freq_H: Importance 0.0185
Irregularity_H: Importance 0.0001
Min_L: Importance 0.0190
Mean_L: Importance 0.1208
Std_L: Importance 0.1202
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0052
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       398


#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [141]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (331, 38)
✅ Loaded Probe3 ADC2 CH2: (332, 38)
✅ Loaded Probe1 ADC2 CH1: (332, 38)
✅ Loaded Probe1 ADC2 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0850
Mean Deviation_H: Importance 0.0800
RMS_H: Importance 0.0856
Skewness_H: Importance 0.0000
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.1400
Spread_H: Importance 0.0157
Mean_Freq_H: Importance 0.0405
Kurtosis_Freq_H: Importance 0.0142
Variance_H: Importance 0.0200
Min_L: Importance 0.0398
Mean_L: Importance 0.1200
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.0800
Peak-to-Peak_L: Importance 0.0003
Entropy_L: Importance 0.0193
Mean_Freq_L: Importance 0.0196
Variance_L: Importance 0.0600

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0265
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00 

### Training **Probe 3** data and Testing on **Probe 2**
To test Probe 3 with changing probe to Probes 2 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [142]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (9998, 38)
✅ Loaded Probe3 ADC2 CH2: (9998, 38)
✅ Loaded Probe2 ADC2 CH1: (9998, 38)
✅ Loaded Probe2 ADC2 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.1694
Min_H: Importance 0.0161
Mean_H: Importance 0.0882
Mean Deviation_H: Importance 0.0057
RMS_H: Importance 0.1780
Skewness_H: Importance 0.0048
Peak-to-Peak_H: Importance 0.0044
Entropy_H: Importance 0.1111
Spread_H: Importance 0.0043
Mean_Freq_H: Importance 0.0611
Variance_H: Importance 0.0211
Max_L: Importance 0.0040
Min_L: Importance 0.0971
Mean_L: Importance 0.0526
RMS_L: Importance 0.0720
Skewness_L: Importance 0.0055
Entropy_L: Importance 0.0421
Mean_Freq_L: Importance 0.0188
Variance_L: Importance 0.0180

Model Performance After Feature Selection:
Test Accuracy: 0.9408
Balanced Accuracy: 0.9408
MCC: 0.8863
Log Loss: 0.1304
F1 Score: 0.9437
Recall: 0.9924
Precision: 0.8995
              precision    recall  f1-score   support

         CH1       0.99      0.89      0.94      9998
       

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [143]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (4998, 38)
✅ Loaded Probe3 ADC2 CH2: (4998, 38)
✅ Loaded Probe2 ADC2 CH1: (4998, 38)
✅ Loaded Probe2 ADC2 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.0596
Min_H: Importance 0.0351
Mean_H: Importance 0.0945
Mean Deviation_H: Importance 0.0063
RMS_H: Importance 0.1270
Centroid_H: Importance 0.0036
Entropy_H: Importance 0.0172
Spread_H: Importance 0.0005
Mean_Freq_H: Importance 0.0650
Variance_H: Importance 0.0612
Max_L: Importance 0.0004
Min_L: Importance 0.0258
Mean_L: Importance 0.2093
RMS_L: Importance 0.1217
Skewness_L: Importance 0.0000
Kurtosis_L: Importance 0.0065
Entropy_L: Importance 0.0241
Mean_Freq_L: Importance 0.0091
Variance_L: Importance 0.1331

Model Performance After Feature Selection:
Test Accuracy: 0.9902
Balanced Accuracy: 0.9902
MCC: 0.9806
Log Loss: 0.0552
F1 Score: 0.9901
Recall: 0.9804
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       0.98      1.00      0.99      4998
         CH

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [144]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (3331, 38)
✅ Loaded Probe3 ADC2 CH2: (3332, 38)
✅ Loaded Probe2 ADC2 CH1: (3331, 38)
✅ Loaded Probe2 ADC2 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.0055
Min_H: Importance 0.0148
Mean_H: Importance 0.1371
Mean Deviation_H: Importance 0.0022
RMS_H: Importance 0.2265
Skewness_H: Importance 0.0004
Centroid_H: Importance 0.0022
Entropy_H: Importance 0.0114
Spread_H: Importance 0.0003
Mean_Freq_H: Importance 0.0249
Variance_H: Importance 0.0565
Max_L: Importance 0.0005
Min_L: Importance 0.0935
Mean_L: Importance 0.1321
RMS_L: Importance 0.1545
Skewness_L: Importance 0.0003
Entropy_L: Importance 0.0235
Mean_Freq_L: Importance 0.0073
Variance_L: Importance 0.1044

Model Performance After Feature Selection:
Test Accuracy: 0.9986
Balanced Accuracy: 0.9986
MCC: 0.9973
Log Loss: 0.0255
F1 Score: 0.9986
Recall: 0.9973
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      3331
         CH

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [145]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (2481, 38)
✅ Loaded Probe3 ADC2 CH2: (2481, 38)
✅ Loaded Probe2 ADC2 CH1: (2481, 38)
✅ Loaded Probe2 ADC2 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0648
Min_H: Importance 0.0288
Mean_H: Importance 0.0963
Mean Deviation_H: Importance 0.0137
RMS_H: Importance 0.1324
Skewness_H: Importance 0.0001
Entropy_H: Importance 0.0138
Mean_Freq_H: Importance 0.0559
Variance_H: Importance 0.0613
Max_L: Importance 0.0003
Min_L: Importance 0.0224
Mean_L: Importance 0.2112
RMS_L: Importance 0.1272
Skewness_L: Importance 0.0005
Kurtosis_L: Importance 0.0004
Centroid_L: Importance 0.0002
Entropy_L: Importance 0.0292
Mean_Freq_L: Importance 0.0115
Variance_L: Importance 0.1301

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0002
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [146]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (1998, 38)
✅ Loaded Probe3 ADC2 CH2: (1998, 38)
✅ Loaded Probe2 ADC2 CH1: (1998, 38)
✅ Loaded Probe2 ADC2 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0599
Min_H: Importance 0.0000
Mean_H: Importance 0.0892
Std_H: Importance 0.0296
Mean Deviation_H: Importance 0.0339
RMS_H: Importance 0.1279
Centroid_H: Importance 0.0005
Entropy_H: Importance 0.0245
Mean_Freq_H: Importance 0.0806
Irregularity_H: Importance 0.0090
Variance_H: Importance 0.0656
Max_L: Importance 0.0022
Min_L: Importance 0.0006
Mean_L: Importance 0.1905
Mean Deviation_L: Importance 0.0232
RMS_L: Importance 0.1217
Entropy_L: Importance 0.0160
Mean_Freq_L: Importance 0.0001
Variance_L: Importance 0.1250

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0063
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
    

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [147]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (998, 38)
✅ Loaded Probe3 ADC2 CH2: (998, 38)
✅ Loaded Probe2 ADC2 CH1: (998, 38)
✅ Loaded Probe2 ADC2 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1024
Mean_H: Importance 0.0815
Std_H: Importance 0.0167
Mean Deviation_H: Importance 0.0179
RMS_H: Importance 0.1285
Entropy_H: Importance 0.0198
Spread_H: Importance 0.0001
Mean_Freq_H: Importance 0.0578
Kurtosis_Freq_H: Importance 0.0084
Variance_H: Importance 0.0604
Min_L: Importance 0.0837
Mean_L: Importance 0.1815
Std_L: Importance 0.0001
RMS_L: Importance 0.1216
Skewness_L: Importance 0.0002
Kurtosis_L: Importance 0.0002
Entropy_L: Importance 0.0373
Mean_Freq_L: Importance 0.0001
Variance_L: Importance 0.0819

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0040
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         C

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [148]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (665, 38)
✅ Loaded Probe3 ADC2 CH2: (665, 38)
✅ Loaded Probe2 ADC2 CH1: (665, 38)
✅ Loaded Probe2 ADC2 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1038
Min_H: Importance 0.0006
Mean_H: Importance 0.1025
Std_H: Importance 0.0110
Mean Deviation_H: Importance 0.0415
RMS_H: Importance 0.1069
Entropy_H: Importance 0.0120
Spread_H: Importance 0.0049
Mean_Freq_H: Importance 0.0215
Variance_H: Importance 0.0687
Min_L: Importance 0.0630
Mean_L: Importance 0.1443
Mean Deviation_L: Importance 0.0921
RMS_L: Importance 0.1328
Skewness_L: Importance 0.0007
Entropy_L: Importance 0.0111
Mean_Freq_L: Importance 0.0071
Irregularity_L: Importance 0.0063
Variance_L: Importance 0.0690

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0157
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
     

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [149]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (498, 38)
✅ Loaded Probe3 ADC2 CH2: (498, 38)
✅ Loaded Probe2 ADC2 CH1: (498, 38)
✅ Loaded Probe2 ADC2 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.1004
Mean_H: Importance 0.0807
Std_H: Importance 0.0191
Mean Deviation_H: Importance 0.0641
RMS_H: Importance 0.1223
Centroid_H: Importance 0.0009
Entropy_H: Importance 0.1000
Mean_Freq_H: Importance 0.0390
Kurtosis_Freq_H: Importance 0.0137
Variance_H: Importance 0.0200
Min_L: Importance 0.0392
Mean_L: Importance 0.1410
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.0808
Entropy_L: Importance 0.0189
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0049
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    accuracy                     

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [150]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (398, 38)
✅ Loaded Probe3 ADC2 CH2: (398, 38)
✅ Loaded Probe2 ADC2 CH1: (398, 38)
✅ Loaded Probe2 ADC2 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.1002
Mean_H: Importance 0.0800
Std_H: Importance 0.0191
Mean Deviation_H: Importance 0.0206
RMS_H: Importance 0.1009
Entropy_H: Importance 0.1000
Skewness_Freq_H: Importance 0.0007
Mean_Freq_H: Importance 0.0199
Kurtosis_Freq_H: Importance 0.0185
Irregularity_H: Importance 0.0001
Min_L: Importance 0.0190
Mean_L: Importance 0.1208
Std_L: Importance 0.1202
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0610
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       398


#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [151]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 2
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC2 CH1: (331, 38)
✅ Loaded Probe3 ADC2 CH2: (332, 38)
✅ Loaded Probe2 ADC2 CH1: (331, 38)
✅ Loaded Probe2 ADC2 CH2: (331, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0850
Mean Deviation_H: Importance 0.0800
RMS_H: Importance 0.0856
Skewness_H: Importance 0.0000
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.1400
Spread_H: Importance 0.0157
Mean_Freq_H: Importance 0.0405
Kurtosis_Freq_H: Importance 0.0142
Variance_H: Importance 0.0200
Min_L: Importance 0.0398
Mean_L: Importance 0.1200
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.0800
Peak-to-Peak_L: Importance 0.0003
Entropy_L: Importance 0.0193
Mean_Freq_L: Importance 0.0196
Variance_L: Importance 0.0600

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0447
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00 

# ADC3

## **Testing of Probe 1**
To conduct experiments, **ADC3** (data acquisition device) is used consistently throughout. **Probe 1** is kept constant, and the machine learning model is **always trained on Probe 1**. The model is then tested on **Probe 2** and **Probe 3** to evaluate its generalization.

The **sampling rate** is varied across different tests, ranging from **10 MSPS to 300 MSPS**.

### Training **Probe 1** data and Testing on **Probe 2** data
To test Probe 1 with changing probe to Probe 2 and Probe 3 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [152]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (9998, 38)
✅ Loaded Probe1 ADC3 CH2: (9998, 38)
✅ Loaded Probe2 ADC3 CH1: (9998, 38)
✅ Loaded Probe2 ADC3 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0487
Min_H: Importance 0.0563
Mean_H: Importance 0.0667
Mean Deviation_H: Importance 0.0065
RMS_H: Importance 0.1228
Skewness_H: Importance 0.0020
Peak-to-Peak_H: Importance 0.0022
Entropy_H: Importance 0.2617
Spread_H: Importance 0.0036
Mean_Freq_H: Importance 0.0801
Variance_H: Importance 0.0383
Max_L: Importance 0.0552
Min_L: Importance 0.0363
Mean_L: Importance 0.0386
RMS_L: Importance 0.0186
Peak-to-Peak_L: Importance 0.0031
Entropy_L: Importance 0.0350
Mean_Freq_L: Importance 0.0191
Variance_L: Importance 0.0967

Model Performance After Feature Selection:
Test Accuracy: 0.9996
Balanced Accuracy: 0.9996
MCC: 0.9992
Log Loss: 0.0041
F1 Score: 0.9996
Recall: 0.9993
Precision: 0.9999
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      9998
   

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [153]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (4998, 38)
✅ Loaded Probe1 ADC3 CH2: (4998, 38)
✅ Loaded Probe2 ADC3 CH1: (4998, 38)
✅ Loaded Probe2 ADC3 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1472
Min_H: Importance 0.0303
Mean_H: Importance 0.1141
Mean Deviation_H: Importance 0.0155
RMS_H: Importance 0.1951
Peak-to-Peak_H: Importance 0.0046
Entropy_H: Importance 0.0420
Mean_Freq_H: Importance 0.0746
Variance_H: Importance 0.0437
Max_L: Importance 0.0014
Min_L: Importance 0.0472
Mean_L: Importance 0.0907
Std_L: Importance 0.0006
RMS_L: Importance 0.1313
Peak-to-Peak_L: Importance 0.0010
Entropy_L: Importance 0.0283
Spread_L: Importance 0.0009
Mean_Freq_L: Importance 0.0126
Variance_L: Importance 0.0184

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0019
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
        

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [154]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (3331, 38)
✅ Loaded Probe1 ADC3 CH2: (3332, 38)
✅ Loaded Probe2 ADC3 CH1: (3332, 38)
✅ Loaded Probe2 ADC3 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.1463
Min_H: Importance 0.0133
Mean_H: Importance 0.1364
Std_H: Importance 0.0115
Mean Deviation_H: Importance 0.0126
RMS_H: Importance 0.1499
Entropy_H: Importance 0.0166
Mean_Freq_H: Importance 0.0692
Variance_H: Importance 0.0625
Max_L: Importance 0.0024
Min_L: Importance 0.0475
Mean_L: Importance 0.1051
Std_L: Importance 0.0005
RMS_L: Importance 0.1668
Kurtosis_L: Importance 0.0018
Peak-to-Peak_L: Importance 0.0006
Centroid_L: Importance 0.0004
Entropy_L: Importance 0.0293
Variance_L: Importance 0.0270

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0008
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      3332
         CH2    

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [155]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (2481, 38)
✅ Loaded Probe1 ADC3 CH2: (2481, 38)
✅ Loaded Probe2 ADC3 CH1: (2481, 38)
✅ Loaded Probe2 ADC3 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.1030
Min_H: Importance 0.0296
Mean_H: Importance 0.0901
Std_H: Importance 0.0272
Mean Deviation_H: Importance 0.0172
RMS_H: Importance 0.1376
Kurtosis_H: Importance 0.0000
Centroid_H: Importance 0.0028
Entropy_H: Importance 0.0139
Mean_Freq_H: Importance 0.0570
Variance_H: Importance 0.0667
Min_L: Importance 0.0154
Mean_L: Importance 0.1909
Std_L: Importance 0.0000
RMS_L: Importance 0.1324
Kurtosis_L: Importance 0.0000
Entropy_L: Importance 0.0299
Variance_L: Importance 0.0863

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0008
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      1.00      1.00  

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [156]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (1998, 38)
✅ Loaded Probe1 ADC3 CH2: (1998, 38)
✅ Loaded Probe2 ADC3 CH1: (1998, 38)
✅ Loaded Probe2 ADC3 CH2: (1998, 38)



Top Important Features:
Max_H: Importance 0.1081
Min_H: Importance 0.0196
Mean_H: Importance 0.1027
RMS_H: Importance 0.1425
Peak-to-Peak_H: Importance 0.0076
Centroid_H: Importance 0.0005
Entropy_H: Importance 0.0288
Mean_Freq_H: Importance 0.0501
Variance_H: Importance 0.0835
Max_L: Importance 0.0024
Min_L: Importance 0.0441
Mean_L: Importance 0.1598
Std_L: Importance 0.0001
RMS_L: Importance 0.1222
Kurtosis_L: Importance 0.0018
Entropy_L: Importance 0.0277
Spread_L: Importance 0.0002
Mean_Freq_L: Importance 0.0098
Variance_L: Importance 0.0884

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0027
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
         CH2       1.00      1.00      1.00      1998

    accuracy                           1.00      3996
   macro avg       1.00      1.00      1.00      3996
wei

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [157]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (998, 38)
✅ Loaded Probe1 ADC3 CH2: (998, 38)
✅ Loaded Probe2 ADC3 CH1: (998, 38)
✅ Loaded Probe2 ADC3 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1108
Min_H: Importance 0.0052
Mean_H: Importance 0.0933
Std_H: Importance 0.0222
Mean Deviation_H: Importance 0.0288
RMS_H: Importance 0.1419
Skewness_H: Importance 0.0005
Entropy_H: Importance 0.0160
Spread_H: Importance 0.0007
Mean_Freq_H: Importance 0.0336
Irregularity_H: Importance 0.0012
Variance_H: Importance 0.0700
Max_L: Importance 0.0024
Min_L: Importance 0.0122
Mean_L: Importance 0.1918
Mean Deviation_L: Importance 0.0417
RMS_L: Importance 0.1277
Entropy_L: Importance 0.0091
Variance_L: Importance 0.0905

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         CH

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [158]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (665, 38)
✅ Loaded Probe1 ADC3 CH2: (665, 38)
✅ Loaded Probe2 ADC3 CH1: (665, 38)
✅ Loaded Probe2 ADC3 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1001
Mean_H: Importance 0.0833
Std_H: Importance 0.0339
Mean Deviation_H: Importance 0.0691
RMS_H: Importance 0.1260
Entropy_H: Importance 0.0161
Mean_Freq_H: Importance 0.0159
Irregularity_H: Importance 0.0117
Variance_H: Importance 0.0214
Min_L: Importance 0.0386
Mean_L: Importance 0.1804
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1241
Entropy_L: Importance 0.0163
Variance_L: Importance 0.0832

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0447
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy                           1.00      1330
   macro a

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [159]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (498, 38)
✅ Loaded Probe1 ADC3 CH2: (498, 38)
✅ Loaded Probe2 ADC3 CH1: (498, 38)
✅ Loaded Probe2 ADC3 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0801
Mean Deviation_H: Importance 0.0601
RMS_H: Importance 0.1258
Entropy_H: Importance 0.0176
Mean_Freq_H: Importance 0.0185
Kurtosis_Freq_H: Importance 0.0109
Variance_H: Importance 0.0200
Min_L: Importance 0.0881
Mean_L: Importance 0.1800
Std_L: Importance 0.0199
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1215
Entropy_L: Importance 0.0199
Mean_Freq_L: Importance 0.0144
Variance_L: Importance 0.0833

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0002
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    accuracy                    

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [160]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (398, 38)
✅ Loaded Probe1 ADC3 CH2: (398, 38)
✅ Loaded Probe2 ADC3 CH1: (398, 38)
✅ Loaded Probe2 ADC3 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0805
Mean_H: Importance 0.0828
Mean Deviation_H: Importance 0.0800
RMS_H: Importance 0.1045
Kurtosis_H: Importance 0.0001
Peak-to-Peak_H: Importance 0.0000
Mean_Freq_H: Importance 0.0187
Kurtosis_Freq_H: Importance 0.0324
Variance_H: Importance 0.0200
Min_L: Importance 0.0796
Mean_L: Importance 0.1604
Std_L: Importance 0.0202
Mean Deviation_L: Importance 0.0802
RMS_L: Importance 0.1219
Centroid_L: Importance 0.0006
Entropy_L: Importance 0.0198
Mean_Freq_L: Importance 0.0381
Variance_L: Importance 0.0602

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0039
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.0

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [161]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (332, 38)
✅ Loaded Probe1 ADC3 CH2: (332, 38)
✅ Loaded Probe2 ADC3 CH1: (332, 38)
✅ Loaded Probe2 ADC3 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0801
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0599
RMS_H: Importance 0.1244
Peak-to-Peak_H: Importance 0.0001
Entropy_H: Importance 0.0399
Mean_Freq_H: Importance 0.0181
Kurtosis_Freq_H: Importance 0.0157
Irregularity_H: Importance 0.0007
Variance_H: Importance 0.0200
Min_L: Importance 0.0401
Mean_L: Importance 0.1600
Std_L: Importance 0.0411
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1200
Entropy_L: Importance 0.0200
Mean_Freq_L: Importance 0.0199
Irregularity_L: Importance 0.0000
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      

### Training **Probe 1** data and Testing on **Probe 3** data
To test Probe 1 with changing probe to Probe 3 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.

In [162]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (9998, 38)
✅ Loaded Probe1 ADC3 CH2: (9998, 38)
✅ Loaded Probe3 ADC3 CH1: (9998, 38)
✅ Loaded Probe3 ADC3 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0487
Min_H: Importance 0.0563
Mean_H: Importance 0.0667
Mean Deviation_H: Importance 0.0065
RMS_H: Importance 0.1228
Skewness_H: Importance 0.0020
Peak-to-Peak_H: Importance 0.0022
Entropy_H: Importance 0.2617
Spread_H: Importance 0.0036
Mean_Freq_H: Importance 0.0801
Variance_H: Importance 0.0383
Max_L: Importance 0.0552
Min_L: Importance 0.0363
Mean_L: Importance 0.0386
RMS_L: Importance 0.0186
Peak-to-Peak_L: Importance 0.0031
Entropy_L: Importance 0.0350
Mean_Freq_L: Importance 0.0191
Variance_L: Importance 0.0967

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0019
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      9998
   

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [163]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (4998, 38)
✅ Loaded Probe1 ADC3 CH2: (4998, 38)
✅ Loaded Probe3 ADC3 CH1: (4998, 38)
✅ Loaded Probe3 ADC3 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1472
Min_H: Importance 0.0303
Mean_H: Importance 0.1141
Mean Deviation_H: Importance 0.0155
RMS_H: Importance 0.1951
Peak-to-Peak_H: Importance 0.0046
Entropy_H: Importance 0.0420
Mean_Freq_H: Importance 0.0746
Variance_H: Importance 0.0437
Max_L: Importance 0.0014
Min_L: Importance 0.0472
Mean_L: Importance 0.0907
Std_L: Importance 0.0006
RMS_L: Importance 0.1313
Peak-to-Peak_L: Importance 0.0010
Entropy_L: Importance 0.0283
Spread_L: Importance 0.0009
Mean_Freq_L: Importance 0.0126
Variance_L: Importance 0.0184

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0009
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
        

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [164]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (3331, 38)
✅ Loaded Probe1 ADC3 CH2: (3332, 38)
✅ Loaded Probe3 ADC3 CH1: (3332, 38)
✅ Loaded Probe3 ADC3 CH2: (3331, 38)

Top Important Features:
Max_H: Importance 0.1463
Min_H: Importance 0.0133
Mean_H: Importance 0.1364
Std_H: Importance 0.0115
Mean Deviation_H: Importance 0.0126
RMS_H: Importance 0.1499
Entropy_H: Importance 0.0166
Mean_Freq_H: Importance 0.0692
Variance_H: Importance 0.0625
Max_L: Importance 0.0024
Min_L: Importance 0.0475
Mean_L: Importance 0.1051
Std_L: Importance 0.0005
RMS_L: Importance 0.1668
Kurtosis_L: Importance 0.0018
Peak-to-Peak_L: Importance 0.0006
Centroid_L: Importance 0.0004
Entropy_L: Importance 0.0293
Variance_L: Importance 0.0270

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      3332
         CH2    

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [165]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (2481, 38)
✅ Loaded Probe1 ADC3 CH2: (2481, 38)
✅ Loaded Probe3 ADC3 CH1: (2481, 38)
✅ Loaded Probe3 ADC3 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.1030
Min_H: Importance 0.0296
Mean_H: Importance 0.0901
Std_H: Importance 0.0272
Mean Deviation_H: Importance 0.0172
RMS_H: Importance 0.1376
Kurtosis_H: Importance 0.0000
Centroid_H: Importance 0.0028
Entropy_H: Importance 0.0139
Mean_Freq_H: Importance 0.0570
Variance_H: Importance 0.0667
Min_L: Importance 0.0154
Mean_L: Importance 0.1909
Std_L: Importance 0.0000
RMS_L: Importance 0.1324
Kurtosis_L: Importance 0.0000
Entropy_L: Importance 0.0299
Variance_L: Importance 0.0863

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      1.00      1.00  

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [166]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (1998, 38)
✅ Loaded Probe1 ADC3 CH2: (1998, 38)
✅ Loaded Probe3 ADC3 CH1: (1998, 38)
✅ Loaded Probe3 ADC3 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.1081
Min_H: Importance 0.0196
Mean_H: Importance 0.1027
RMS_H: Importance 0.1425
Peak-to-Peak_H: Importance 0.0076
Centroid_H: Importance 0.0005
Entropy_H: Importance 0.0288
Mean_Freq_H: Importance 0.0501
Variance_H: Importance 0.0835
Max_L: Importance 0.0024
Min_L: Importance 0.0441
Mean_L: Importance 0.1598
Std_L: Importance 0.0001
RMS_L: Importance 0.1222
Kurtosis_L: Importance 0.0018
Entropy_L: Importance 0.0277
Spread_L: Importance 0.0002
Mean_Freq_L: Importance 0.0098
Variance_L: Importance 0.0884

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0004
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
         CH2      

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [167]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (998, 38)
✅ Loaded Probe1 ADC3 CH2: (998, 38)
✅ Loaded Probe3 ADC3 CH1: (998, 38)
✅ Loaded Probe3 ADC3 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1108
Min_H: Importance 0.0052
Mean_H: Importance 0.0933
Std_H: Importance 0.0222
Mean Deviation_H: Importance 0.0288
RMS_H: Importance 0.1419
Skewness_H: Importance 0.0005
Entropy_H: Importance 0.0160
Spread_H: Importance 0.0007
Mean_Freq_H: Importance 0.0336
Irregularity_H: Importance 0.0012
Variance_H: Importance 0.0700
Max_L: Importance 0.0024
Min_L: Importance 0.0122
Mean_L: Importance 0.1918
Mean Deviation_L: Importance 0.0417
RMS_L: Importance 0.1277
Entropy_L: Importance 0.0091
Variance_L: Importance 0.0905

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         CH

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [168]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (665, 38)
✅ Loaded Probe1 ADC3 CH2: (665, 38)
✅ Loaded Probe3 ADC3 CH1: (665, 38)
✅ Loaded Probe3 ADC3 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1001
Mean_H: Importance 0.0833
Std_H: Importance 0.0339
Mean Deviation_H: Importance 0.0691
RMS_H: Importance 0.1260
Entropy_H: Importance 0.0161
Mean_Freq_H: Importance 0.0159
Irregularity_H: Importance 0.0117
Variance_H: Importance 0.0214
Min_L: Importance 0.0386
Mean_L: Importance 0.1804
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1241
Entropy_L: Importance 0.0163
Variance_L: Importance 0.0832

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0174
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy                           1.00      1330
   macro a

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [169]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (498, 38)
✅ Loaded Probe1 ADC3 CH2: (498, 38)
✅ Loaded Probe3 ADC3 CH1: (498, 38)
✅ Loaded Probe3 ADC3 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0801
Mean Deviation_H: Importance 0.0601
RMS_H: Importance 0.1258
Entropy_H: Importance 0.0176
Mean_Freq_H: Importance 0.0185
Kurtosis_Freq_H: Importance 0.0109
Variance_H: Importance 0.0200
Min_L: Importance 0.0881
Mean_L: Importance 0.1800
Std_L: Importance 0.0199
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1215
Entropy_L: Importance 0.0199
Mean_Freq_L: Importance 0.0144
Variance_L: Importance 0.0833

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    accuracy                    

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [170]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (398, 38)
✅ Loaded Probe1 ADC3 CH2: (398, 38)
✅ Loaded Probe3 ADC3 CH1: (398, 38)
✅ Loaded Probe3 ADC3 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0805
Mean_H: Importance 0.0828
Mean Deviation_H: Importance 0.0800
RMS_H: Importance 0.1045
Kurtosis_H: Importance 0.0001
Peak-to-Peak_H: Importance 0.0000
Mean_Freq_H: Importance 0.0187
Kurtosis_Freq_H: Importance 0.0324
Variance_H: Importance 0.0200
Min_L: Importance 0.0796
Mean_L: Importance 0.1604
Std_L: Importance 0.0202
Mean Deviation_L: Importance 0.0802
RMS_L: Importance 0.1219
Centroid_L: Importance 0.0006
Entropy_L: Importance 0.0198
Mean_Freq_L: Importance 0.0381
Variance_L: Importance 0.0602

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0060
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.0

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [171]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC3 CH1: (332, 38)
✅ Loaded Probe1 ADC3 CH2: (332, 38)
✅ Loaded Probe3 ADC3 CH1: (331, 38)
✅ Loaded Probe3 ADC3 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0801
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0599
RMS_H: Importance 0.1244
Peak-to-Peak_H: Importance 0.0001
Entropy_H: Importance 0.0399
Mean_Freq_H: Importance 0.0181
Kurtosis_Freq_H: Importance 0.0157
Irregularity_H: Importance 0.0007
Variance_H: Importance 0.0200
Min_L: Importance 0.0401
Mean_L: Importance 0.1600
Std_L: Importance 0.0411
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1200
Entropy_L: Importance 0.0200
Mean_Freq_L: Importance 0.0199
Irregularity_L: Importance 0.0000
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0087
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      

## **Testing of Probe 2**
To conduct experiments, **ADC3** (data acquisition device) is used consistently throughout. **Probe 2** is kept constant, and the machine learning model is **always trained on Probe 2**. The model is then tested on **Probe 3** and **Probe 1** to evaluate its generalization.

The **sampling rate** is varied across different tests, ranging from **10 MSPS to 300 MSPS**.


### Training **Probe 2** data and Testing on **Probe 3**
To test Probe 2 with changing probe to Probes 3 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [172]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (9998, 38)
✅ Loaded Probe2 ADC3 CH2: (9998, 38)
✅ Loaded Probe3 ADC3 CH1: (9998, 38)
✅ Loaded Probe3 ADC3 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0320
Min_H: Importance 0.0522
Mean_H: Importance 0.1101
Mean Deviation_H: Importance 0.0019
RMS_H: Importance 0.1822
Peak-to-Peak_H: Importance 0.0037
Entropy_H: Importance 0.1499
Spread_H: Importance 0.0031
Mean_Freq_H: Importance 0.0801
Variance_H: Importance 0.0613
Max_L: Importance 0.0194
Min_L: Importance 0.0472
Mean_L: Importance 0.0582
RMS_L: Importance 0.0378
Skewness_L: Importance 0.0034
Entropy_L: Importance 0.0394
Mean_Freq_L: Importance 0.0220
Irregularity_L: Importance 0.0045
Variance_L: Importance 0.0828

Model Performance After Feature Selection:
Test Accuracy: 0.9999
Balanced Accuracy: 0.9999
MCC: 0.9998
Log Loss: 0.0025
F1 Score: 0.9999
Recall: 1.0000
Precision: 0.9998
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      9998
   

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [173]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (4998, 38)
✅ Loaded Probe2 ADC3 CH2: (4998, 38)
✅ Loaded Probe3 ADC3 CH1: (4998, 38)
✅ Loaded Probe3 ADC3 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.0825
Min_H: Importance 0.0317
Mean_H: Importance 0.1433
Mean Deviation_H: Importance 0.0046
RMS_H: Importance 0.2186
Centroid_H: Importance 0.0004
Entropy_H: Importance 0.0184
Mean_Freq_H: Importance 0.0679
Variance_H: Importance 0.0391
Max_L: Importance 0.0008
Min_L: Importance 0.0281
Mean_L: Importance 0.1291
Std_L: Importance 0.0007
RMS_L: Importance 0.1220
Kurtosis_L: Importance 0.0018
Peak-to-Peak_L: Importance 0.0025
Entropy_L: Importance 0.0305
Mean_Freq_L: Importance 0.0160
Variance_L: Importance 0.0615

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0003
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         C

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [174]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (3332, 38)
✅ Loaded Probe2 ADC3 CH2: (3332, 38)
✅ Loaded Probe3 ADC3 CH1: (3332, 38)
✅ Loaded Probe3 ADC3 CH2: (3331, 38)

Top Important Features:
Max_H: Importance 0.1070
Min_H: Importance 0.0266
Mean_H: Importance 0.1418
Mean Deviation_H: Importance 0.0104
RMS_H: Importance 0.1501
Centroid_H: Importance 0.0001
Entropy_H: Importance 0.0169
Spread_H: Importance 0.0005
Mean_Freq_H: Importance 0.0627
Variance_H: Importance 0.0667
Min_L: Importance 0.0279
Mean_L: Importance 0.1151
Std_L: Importance 0.0008
RMS_L: Importance 0.1620
Kurtosis_L: Importance 0.0018
Peak-to-Peak_L: Importance 0.0006
Entropy_L: Importance 0.0302
Mean_Freq_L: Importance 0.0102
Variance_L: Importance 0.0685

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      3332
       

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [175]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (2481, 38)
✅ Loaded Probe2 ADC3 CH2: (2481, 38)
✅ Loaded Probe3 ADC3 CH1: (2481, 38)
✅ Loaded Probe3 ADC3 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0634
Min_H: Importance 0.0298
Mean_H: Importance 0.0915
Std_H: Importance 0.0221
Mean Deviation_H: Importance 0.0164
RMS_H: Importance 0.1400
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0141
Mean_Freq_H: Importance 0.0578
Variance_H: Importance 0.0671
Max_L: Importance 0.0004
Min_L: Importance 0.0183
Mean_L: Importance 0.1975
RMS_L: Importance 0.1205
Peak-to-Peak_L: Importance 0.0002
Entropy_L: Importance 0.0293
Mean_Freq_L: Importance 0.0045
Variance_L: Importance 0.1270

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      1.00      1

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [176]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (1998, 38)
✅ Loaded Probe2 ADC3 CH2: (1998, 38)
✅ Loaded Probe3 ADC3 CH1: (1998, 38)
✅ Loaded Probe3 ADC3 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0669
Min_H: Importance 0.0105
Mean_H: Importance 0.0930
Std_H: Importance 0.0183
RMS_H: Importance 0.1493
Centroid_H: Importance 0.0006
Entropy_H: Importance 0.0313
Mean_Freq_H: Importance 0.0492
Variance_H: Importance 0.0832
Max_L: Importance 0.0121
Min_L: Importance 0.0222
Mean_L: Importance 0.1501
Mean Deviation_L: Importance 0.0117
RMS_L: Importance 0.1342
Kurtosis_L: Importance 0.0022
Centroid_L: Importance 0.0014
Entropy_L: Importance 0.0419
Mean_Freq_L: Importance 0.0112
Variance_L: Importance 0.1101

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
         CH2  

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [177]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (998, 38)
✅ Loaded Probe2 ADC3 CH2: (998, 38)
✅ Loaded Probe3 ADC3 CH1: (998, 38)
✅ Loaded Probe3 ADC3 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1139
Min_H: Importance 0.0042
Mean_H: Importance 0.1130
Std_H: Importance 0.0109
Mean Deviation_H: Importance 0.0261
RMS_H: Importance 0.1467
Entropy_H: Importance 0.0170
Spread_H: Importance 0.0052
Mean_Freq_H: Importance 0.0232
Variance_H: Importance 0.0819
Max_L: Importance 0.0041
Min_L: Importance 0.0677
Mean_L: Importance 0.1654
Mean Deviation_L: Importance 0.0099
RMS_L: Importance 0.1270
Peak-to-Peak_L: Importance 0.0016
Centroid_L: Importance 0.0013
Entropy_L: Importance 0.0100
Variance_L: Importance 0.0709

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         CH

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [178]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (665, 38)
✅ Loaded Probe2 ADC3 CH2: (665, 38)
✅ Loaded Probe3 ADC3 CH1: (665, 38)
✅ Loaded Probe3 ADC3 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1035
Mean_H: Importance 0.0925
Std_H: Importance 0.0121
Mean Deviation_H: Importance 0.0165
RMS_H: Importance 0.1071
Entropy_H: Importance 0.0152
Skewness_Freq_H: Importance 0.0012
Mean_Freq_H: Importance 0.0122
Irregularity_H: Importance 0.0736
Variance_H: Importance 0.0608
Max_L: Importance 0.0000
Min_L: Importance 0.0375
Mean_L: Importance 0.1700
Mean Deviation_L: Importance 0.0522
RMS_L: Importance 0.1300
Entropy_L: Importance 0.0094
Irregularity_L: Importance 0.0174
Variance_L: Importance 0.0889

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0084
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00  

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [179]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (498, 38)
✅ Loaded Probe2 ADC3 CH2: (498, 38)
✅ Loaded Probe3 ADC3 CH1: (498, 38)
✅ Loaded Probe3 ADC3 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0832
Mean Deviation_H: Importance 0.0400
RMS_H: Importance 0.1259
Entropy_H: Importance 0.0180
Spread_H: Importance 0.0102
Mean_Freq_H: Importance 0.0180
Kurtosis_Freq_H: Importance 0.0083
Min_L: Importance 0.1283
Mean_L: Importance 0.1000
Std_L: Importance 0.0806
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.1280
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0154
Variance_L: Importance 0.0640

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0004
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    accuracy                      

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [180]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (398, 38)
✅ Loaded Probe2 ADC3 CH2: (398, 38)
✅ Loaded Probe3 ADC3 CH1: (398, 38)
✅ Loaded Probe3 ADC3 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0606
RMS_H: Importance 0.1258
Entropy_H: Importance 0.0175
Mean_Freq_H: Importance 0.0202
Kurtosis_Freq_H: Importance 0.0161
Variance_H: Importance 0.0400
Min_L: Importance 0.1006
Mean_L: Importance 0.1600
Std_L: Importance 0.0194
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.1219
Centroid_L: Importance 0.0001
Entropy_L: Importance 0.0199
Mean_Freq_L: Importance 0.0162
Variance_L: Importance 0.0817

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0002
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       398

  

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 2** and tested on **Probe 3**.



In [181]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 3
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (332, 38)
✅ Loaded Probe2 ADC3 CH2: (332, 38)
✅ Loaded Probe3 ADC3 CH1: (331, 38)
✅ Loaded Probe3 ADC3 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0625
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.1022
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0199
Kurtosis_Freq_H: Importance 0.0353
Min_L: Importance 0.0600
Mean_L: Importance 0.1200
Std_L: Importance 0.1200
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0801
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.1000
Variance_L: Importance 0.0600

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0131
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       331
         CH2       1.00      1.00      1.00       332

    accuracy                           1.00       663
   macro

### Training **Probe 2** data and Testing on **Probe 1**
To test Probe 2 with changing probe to Probes 1 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [182]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (9998, 38)
✅ Loaded Probe2 ADC3 CH2: (9998, 38)
✅ Loaded Probe1 ADC3 CH1: (9998, 38)
✅ Loaded Probe1 ADC3 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0320
Min_H: Importance 0.0522
Mean_H: Importance 0.1101
Mean Deviation_H: Importance 0.0019
RMS_H: Importance 0.1822
Peak-to-Peak_H: Importance 0.0037
Entropy_H: Importance 0.1499
Spread_H: Importance 0.0031
Mean_Freq_H: Importance 0.0801
Variance_H: Importance 0.0613
Max_L: Importance 0.0194
Min_L: Importance 0.0472
Mean_L: Importance 0.0582
RMS_L: Importance 0.0378
Skewness_L: Importance 0.0034
Entropy_L: Importance 0.0394
Mean_Freq_L: Importance 0.0220
Irregularity_L: Importance 0.0045
Variance_L: Importance 0.0828

Model Performance After Feature Selection:
Test Accuracy: 0.9998
Balanced Accuracy: 0.9998
MCC: 0.9996
Log Loss: 0.0028
F1 Score: 0.9998
Recall: 1.0000
Precision: 0.9996
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      9998
   

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [183]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (4998, 38)
✅ Loaded Probe2 ADC3 CH2: (4998, 38)
✅ Loaded Probe1 ADC3 CH1: (4998, 38)
✅ Loaded Probe1 ADC3 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.0825
Min_H: Importance 0.0317
Mean_H: Importance 0.1433
Mean Deviation_H: Importance 0.0046
RMS_H: Importance 0.2186
Centroid_H: Importance 0.0004
Entropy_H: Importance 0.0184
Mean_Freq_H: Importance 0.0679
Variance_H: Importance 0.0391
Max_L: Importance 0.0008
Min_L: Importance 0.0281
Mean_L: Importance 0.1291
Std_L: Importance 0.0007
RMS_L: Importance 0.1220
Kurtosis_L: Importance 0.0018
Peak-to-Peak_L: Importance 0.0025
Entropy_L: Importance 0.0305
Mean_Freq_L: Importance 0.0160
Variance_L: Importance 0.0615

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0002
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         C

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [184]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (3332, 38)
✅ Loaded Probe2 ADC3 CH2: (3332, 38)
✅ Loaded Probe1 ADC3 CH1: (3331, 38)
✅ Loaded Probe1 ADC3 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.1070
Min_H: Importance 0.0266
Mean_H: Importance 0.1418
Mean Deviation_H: Importance 0.0104
RMS_H: Importance 0.1501
Centroid_H: Importance 0.0001
Entropy_H: Importance 0.0169
Spread_H: Importance 0.0005
Mean_Freq_H: Importance 0.0627
Variance_H: Importance 0.0667
Min_L: Importance 0.0279
Mean_L: Importance 0.1151
Std_L: Importance 0.0008
RMS_L: Importance 0.1620
Kurtosis_L: Importance 0.0018
Peak-to-Peak_L: Importance 0.0006
Entropy_L: Importance 0.0302
Mean_Freq_L: Importance 0.0102
Variance_L: Importance 0.0685

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      3331
       

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [185]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (2481, 38)
✅ Loaded Probe2 ADC3 CH2: (2481, 38)
✅ Loaded Probe1 ADC3 CH1: (2481, 38)
✅ Loaded Probe1 ADC3 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0634
Min_H: Importance 0.0298
Mean_H: Importance 0.0915
Std_H: Importance 0.0221
Mean Deviation_H: Importance 0.0164
RMS_H: Importance 0.1400
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0141
Mean_Freq_H: Importance 0.0578
Variance_H: Importance 0.0671
Max_L: Importance 0.0004
Min_L: Importance 0.0183
Mean_L: Importance 0.1975
RMS_L: Importance 0.1205
Peak-to-Peak_L: Importance 0.0002
Entropy_L: Importance 0.0293
Mean_Freq_L: Importance 0.0045
Variance_L: Importance 0.1270

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0002
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      1.00      1

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [186]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (1998, 38)
✅ Loaded Probe2 ADC3 CH2: (1998, 38)
✅ Loaded Probe1 ADC3 CH1: (1998, 38)
✅ Loaded Probe1 ADC3 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0669
Min_H: Importance 0.0105
Mean_H: Importance 0.0930
Std_H: Importance 0.0183
RMS_H: Importance 0.1493
Centroid_H: Importance 0.0006
Entropy_H: Importance 0.0313
Mean_Freq_H: Importance 0.0492
Variance_H: Importance 0.0832
Max_L: Importance 0.0121
Min_L: Importance 0.0222
Mean_L: Importance 0.1501
Mean Deviation_L: Importance 0.0117
RMS_L: Importance 0.1342
Kurtosis_L: Importance 0.0022
Centroid_L: Importance 0.0014
Entropy_L: Importance 0.0419
Mean_Freq_L: Importance 0.0112
Variance_L: Importance 0.1101

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
         CH2  

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [187]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (998, 38)
✅ Loaded Probe2 ADC3 CH2: (998, 38)
✅ Loaded Probe1 ADC3 CH1: (998, 38)
✅ Loaded Probe1 ADC3 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1139
Min_H: Importance 0.0042
Mean_H: Importance 0.1130
Std_H: Importance 0.0109
Mean Deviation_H: Importance 0.0261
RMS_H: Importance 0.1467
Entropy_H: Importance 0.0170
Spread_H: Importance 0.0052
Mean_Freq_H: Importance 0.0232
Variance_H: Importance 0.0819
Max_L: Importance 0.0041
Min_L: Importance 0.0677
Mean_L: Importance 0.1654
Mean Deviation_L: Importance 0.0099
RMS_L: Importance 0.1270
Peak-to-Peak_L: Importance 0.0016
Centroid_L: Importance 0.0013
Entropy_L: Importance 0.0100
Variance_L: Importance 0.0709

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         CH

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [188]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (665, 38)
✅ Loaded Probe2 ADC3 CH2: (665, 38)
✅ Loaded Probe1 ADC3 CH1: (665, 38)
✅ Loaded Probe1 ADC3 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1035
Mean_H: Importance 0.0925
Std_H: Importance 0.0121
Mean Deviation_H: Importance 0.0165
RMS_H: Importance 0.1071
Entropy_H: Importance 0.0152
Skewness_Freq_H: Importance 0.0012
Mean_Freq_H: Importance 0.0122
Irregularity_H: Importance 0.0736
Variance_H: Importance 0.0608
Max_L: Importance 0.0000
Min_L: Importance 0.0375
Mean_L: Importance 0.1700
Mean Deviation_L: Importance 0.0522
RMS_L: Importance 0.1300
Entropy_L: Importance 0.0094
Irregularity_L: Importance 0.0174
Variance_L: Importance 0.0889

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0077
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00  

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [189]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (498, 38)
✅ Loaded Probe2 ADC3 CH2: (498, 38)
✅ Loaded Probe1 ADC3 CH1: (498, 38)
✅ Loaded Probe1 ADC3 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0832
Mean Deviation_H: Importance 0.0400
RMS_H: Importance 0.1259
Entropy_H: Importance 0.0180
Spread_H: Importance 0.0102
Mean_Freq_H: Importance 0.0180
Kurtosis_Freq_H: Importance 0.0083
Min_L: Importance 0.1283
Mean_L: Importance 0.1000
Std_L: Importance 0.0806
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.1280
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0154
Variance_L: Importance 0.0640

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0009
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    accuracy                      

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [190]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (398, 38)
✅ Loaded Probe2 ADC3 CH2: (398, 38)
✅ Loaded Probe1 ADC3 CH1: (398, 38)
✅ Loaded Probe1 ADC3 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0606
RMS_H: Importance 0.1258
Entropy_H: Importance 0.0175
Mean_Freq_H: Importance 0.0202
Kurtosis_Freq_H: Importance 0.0161
Variance_H: Importance 0.0400
Min_L: Importance 0.1006
Mean_L: Importance 0.1600
Std_L: Importance 0.0194
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.1219
Centroid_L: Importance 0.0001
Entropy_L: Importance 0.0199
Mean_Freq_L: Importance 0.0162
Variance_L: Importance 0.0817

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0053
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       398

  

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 2** and tested on **Probe 1**.



In [191]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 2
test_probe_dataset = 1
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe2 ADC3 CH1: (332, 38)
✅ Loaded Probe2 ADC3 CH2: (332, 38)
✅ Loaded Probe1 ADC3 CH1: (332, 38)
✅ Loaded Probe1 ADC3 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0625
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.1022
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0199
Kurtosis_Freq_H: Importance 0.0353
Min_L: Importance 0.0600
Mean_L: Importance 0.1200
Std_L: Importance 0.1200
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0801
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.1000
Variance_L: Importance 0.0600

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0092
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       332
         CH2       1.00      1.00      1.00       332

    accuracy                           1.00       664
   macro

## **Testing of Probe 3**
To conduct experiments, **ADC3** (data acquisition device) is used consistently throughout. **Probe 3** is kept constant, and the machine learning model is **always trained on Probe 3**. The model is then tested on **Probe 3** and **Probe 1** to evaluate its generalization.

The **sampling rate** is varied across different tests, ranging from **10 MSPS to 300 MSPS**.

### Training **Probe 3** data and Testing on **Probe 1**
To test Probe 3 with changing probe to Probes 1 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [192]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (9998, 38)
✅ Loaded Probe3 ADC3 CH2: (9998, 38)
✅ Loaded Probe1 ADC3 CH1: (9998, 38)
✅ Loaded Probe1 ADC3 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0117
Min_H: Importance 0.0313
Mean_H: Importance 0.0859
Mean Deviation_H: Importance 0.0079
RMS_H: Importance 0.1399
Skewness_H: Importance 0.0035
Peak-to-Peak_H: Importance 0.0054
Centroid_H: Importance 0.0052
Entropy_H: Importance 0.2163
Spread_H: Importance 0.0080
Mean_Freq_H: Importance 0.0864
Variance_H: Importance 0.0453
Max_L: Importance 0.0042
Min_L: Importance 0.0804
Mean_L: Importance 0.0361
RMS_L: Importance 0.0507
Entropy_L: Importance 0.0519
Mean_Freq_L: Importance 0.0253
Variance_L: Importance 0.0882

Model Performance After Feature Selection:
Test Accuracy: 0.9999
Balanced Accuracy: 0.9999
MCC: 0.9998
Log Loss: 0.0007
F1 Score: 0.9999
Recall: 1.0000
Precision: 0.9998
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      9998
       

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [193]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (4998, 38)
✅ Loaded Probe3 ADC3 CH2: (4998, 38)
✅ Loaded Probe1 ADC3 CH1: (4998, 38)
✅ Loaded Probe1 ADC3 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1189
Min_H: Importance 0.0311
Mean_H: Importance 0.1143
RMS_H: Importance 0.2099
Centroid_H: Importance 0.0002
Entropy_H: Importance 0.0368
Mean_Freq_H: Importance 0.0777
Variance_H: Importance 0.0597
Max_L: Importance 0.0157
Min_L: Importance 0.0274
Mean_L: Importance 0.1254
Std_L: Importance 0.0009
RMS_L: Importance 0.1253
Kurtosis_L: Importance 0.0016
Peak-to-Peak_L: Importance 0.0004
Entropy_L: Importance 0.0339
Spread_L: Importance 0.0004
Mean_Freq_L: Importance 0.0127
Variance_L: Importance 0.0070

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0001
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         CH2      

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [194]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (3332, 38)
✅ Loaded Probe3 ADC3 CH2: (3331, 38)
✅ Loaded Probe1 ADC3 CH1: (3331, 38)
✅ Loaded Probe1 ADC3 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.1478
Min_H: Importance 0.0273
Mean_H: Importance 0.1356
Mean Deviation_H: Importance 0.0106
RMS_H: Importance 0.1502
Entropy_H: Importance 0.0193
Spread_H: Importance 0.0010
Mean_Freq_H: Importance 0.0682
Variance_H: Importance 0.0617
Max_L: Importance 0.0018
Min_L: Importance 0.0353
Mean_L: Importance 0.1086
Std_L: Importance 0.0005
RMS_L: Importance 0.1615
Kurtosis_L: Importance 0.0016
Peak-to-Peak_L: Importance 0.0004
Entropy_L: Importance 0.0305
Mean_Freq_L: Importance 0.0107
Variance_L: Importance 0.0273

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      3331
         CH2

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [195]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (2481, 38)
✅ Loaded Probe3 ADC3 CH2: (2481, 38)
✅ Loaded Probe1 ADC3 CH1: (2481, 38)
✅ Loaded Probe1 ADC3 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.1009
Min_H: Importance 0.0300
Mean_H: Importance 0.0895
Std_H: Importance 0.0259
Mean Deviation_H: Importance 0.0172
RMS_H: Importance 0.1371
Centroid_H: Importance 0.0030
Entropy_H: Importance 0.0139
Mean_Freq_H: Importance 0.0570
Variance_H: Importance 0.0665
Max_L: Importance 0.0005
Min_L: Importance 0.0183
Mean_L: Importance 0.1919
Std_L: Importance 0.0000
RMS_L: Importance 0.1319
Entropy_L: Importance 0.0301
Kurtosis_Freq_L: Importance 0.0000
Variance_L: Importance 0.0862

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0003
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      1.00      1.00  

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [197]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (1998, 38)
✅ Loaded Probe3 ADC3 CH2: (1998, 38)
✅ Loaded Probe1 ADC3 CH1: (1998, 38)
✅ Loaded Probe1 ADC3 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0918
Min_H: Importance 0.0211
Mean_H: Importance 0.0998
Std_H: Importance 0.0101
Mean Deviation_H: Importance 0.0110
RMS_H: Importance 0.1347
Centroid_H: Importance 0.0005
Entropy_H: Importance 0.0176
Mean_Freq_H: Importance 0.0520
Variance_H: Importance 0.0640
Max_L: Importance 0.0028
Min_L: Importance 0.0187
Mean_L: Importance 0.1890
Mean Deviation_L: Importance 0.0100
RMS_L: Importance 0.1280
Kurtosis_L: Importance 0.0015
Entropy_L: Importance 0.0286
Mean_Freq_L: Importance 0.0113
Variance_L: Importance 0.1073

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
        

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [198]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (998, 38)
✅ Loaded Probe3 ADC3 CH2: (998, 38)
✅ Loaded Probe1 ADC3 CH1: (998, 38)
✅ Loaded Probe1 ADC3 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1196
Min_H: Importance 0.0044
Mean_H: Importance 0.0928
Std_H: Importance 0.0110
Mean Deviation_H: Importance 0.0270
RMS_H: Importance 0.1247
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0333
Spread_H: Importance 0.0015
Mean_Freq_H: Importance 0.0496
Variance_H: Importance 0.0980
Max_L: Importance 0.0087
Min_L: Importance 0.0298
Mean_L: Importance 0.1661
Mean Deviation_L: Importance 0.0101
RMS_L: Importance 0.1245
Entropy_L: Importance 0.0075
Variance_L: Importance 0.0911

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         CH2       1.00      1.00      1.00  

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [199]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (665, 38)
✅ Loaded Probe3 ADC3 CH2: (665, 38)
✅ Loaded Probe1 ADC3 CH1: (665, 38)
✅ Loaded Probe1 ADC3 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1017
Mean_H: Importance 0.0875
Std_H: Importance 0.0299
Mean Deviation_H: Importance 0.0443
RMS_H: Importance 0.1300
Entropy_H: Importance 0.0151
Mean_Freq_H: Importance 0.0147
Irregularity_H: Importance 0.0056
Variance_H: Importance 0.0216
Max_L: Importance 0.0000
Min_L: Importance 0.0409
Mean_L: Importance 0.1808
Std_L: Importance 0.0041
Mean Deviation_L: Importance 0.1105
RMS_L: Importance 0.1201
Entropy_L: Importance 0.0124
Variance_L: Importance 0.0810

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0020
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy  

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [200]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (498, 38)
✅ Loaded Probe3 ADC3 CH2: (498, 38)
✅ Loaded Probe1 ADC3 CH1: (498, 38)
✅ Loaded Probe1 ADC3 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0402
RMS_H: Importance 0.1245
Entropy_H: Importance 0.0182
Mean_Freq_H: Importance 0.0180
Kurtosis_Freq_H: Importance 0.0103
Variance_H: Importance 0.0200
Min_L: Importance 0.0876
Mean_L: Importance 0.1600
Std_L: Importance 0.0398
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1020
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0154
Variance_L: Importance 0.0840

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0008
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    accuracy                    

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [201]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (398, 38)
✅ Loaded Probe3 ADC3 CH2: (398, 38)
✅ Loaded Probe1 ADC3 CH1: (398, 38)
✅ Loaded Probe1 ADC3 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0829
Mean Deviation_H: Importance 0.0400
RMS_H: Importance 0.1024
Peak-to-Peak_H: Importance 0.0007
Entropy_H: Importance 0.1000
Mean_Freq_H: Importance 0.0198
Kurtosis_Freq_H: Importance 0.0142
Irregularity_H: Importance 0.0160
Variance_H: Importance 0.0200
Min_L: Importance 0.1220
Mean_L: Importance 0.1000
Std_L: Importance 0.0799
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0831
Entropy_L: Importance 0.0193
Mean_Freq_L: Importance 0.0197
Variance_L: Importance 0.0600

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0194
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 3** and tested on **Probe 1**.



In [202]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 1
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (331, 38)
✅ Loaded Probe3 ADC3 CH2: (332, 38)
✅ Loaded Probe1 ADC3 CH1: (332, 38)
✅ Loaded Probe1 ADC3 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.1011
Entropy_H: Importance 0.0800
Mean_Freq_H: Importance 0.0401
Kurtosis_Freq_H: Importance 0.0182
Min_L: Importance 0.0600
Mean_L: Importance 0.1200
Std_L: Importance 0.1000
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Peak-to-Peak_L: Importance 0.0007
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0399
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0318
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       332
         CH2       1.00      1.00      1.00       332

    accuracy                

### Training **Probe 3** data and Testing on **Probe 2**
To test Probe 3 with changing probe to Probes 2 results are evaluated.

#### **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [203]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (9998, 38)
✅ Loaded Probe3 ADC3 CH2: (9998, 38)
✅ Loaded Probe2 ADC3 CH1: (9998, 38)
✅ Loaded Probe2 ADC3 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0117
Min_H: Importance 0.0313
Mean_H: Importance 0.0859
Mean Deviation_H: Importance 0.0079
RMS_H: Importance 0.1399
Skewness_H: Importance 0.0035
Peak-to-Peak_H: Importance 0.0054
Centroid_H: Importance 0.0052
Entropy_H: Importance 0.2163
Spread_H: Importance 0.0080
Mean_Freq_H: Importance 0.0864
Variance_H: Importance 0.0453
Max_L: Importance 0.0042
Min_L: Importance 0.0804
Mean_L: Importance 0.0361
RMS_L: Importance 0.0507
Entropy_L: Importance 0.0519
Mean_Freq_L: Importance 0.0253
Variance_L: Importance 0.0882

Model Performance After Feature Selection:
Test Accuracy: 0.9997
Balanced Accuracy: 0.9997
MCC: 0.9994
Log Loss: 0.0019
F1 Score: 0.9997
Recall: 0.9996
Precision: 0.9998
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      9998
       

#### **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [204]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (4998, 38)
✅ Loaded Probe3 ADC3 CH2: (4998, 38)
✅ Loaded Probe2 ADC3 CH1: (4998, 38)
✅ Loaded Probe2 ADC3 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1189
Min_H: Importance 0.0311
Mean_H: Importance 0.1143
RMS_H: Importance 0.2099
Centroid_H: Importance 0.0002
Entropy_H: Importance 0.0368
Mean_Freq_H: Importance 0.0777
Variance_H: Importance 0.0597
Max_L: Importance 0.0157
Min_L: Importance 0.0274
Mean_L: Importance 0.1254
Std_L: Importance 0.0009
RMS_L: Importance 0.1253
Kurtosis_L: Importance 0.0016
Peak-to-Peak_L: Importance 0.0004
Entropy_L: Importance 0.0339
Spread_L: Importance 0.0004
Mean_Freq_L: Importance 0.0127
Variance_L: Importance 0.0070

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0009
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         CH2      

#### **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [205]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (3332, 38)
✅ Loaded Probe3 ADC3 CH2: (3331, 38)
✅ Loaded Probe2 ADC3 CH1: (3332, 38)
✅ Loaded Probe2 ADC3 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.1478
Min_H: Importance 0.0273
Mean_H: Importance 0.1356
Mean Deviation_H: Importance 0.0106
RMS_H: Importance 0.1502
Entropy_H: Importance 0.0193
Spread_H: Importance 0.0010
Mean_Freq_H: Importance 0.0682
Variance_H: Importance 0.0617
Max_L: Importance 0.0018
Min_L: Importance 0.0353
Mean_L: Importance 0.1086
Std_L: Importance 0.0005
RMS_L: Importance 0.1615
Kurtosis_L: Importance 0.0016
Peak-to-Peak_L: Importance 0.0004
Entropy_L: Importance 0.0305
Mean_Freq_L: Importance 0.0107
Variance_L: Importance 0.0273

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0003
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      3332
         CH2

#### **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [206]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (2481, 38)
✅ Loaded Probe3 ADC3 CH2: (2481, 38)
✅ Loaded Probe2 ADC3 CH1: (2481, 38)
✅ Loaded Probe2 ADC3 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.1009
Min_H: Importance 0.0300
Mean_H: Importance 0.0895
Std_H: Importance 0.0259
Mean Deviation_H: Importance 0.0172
RMS_H: Importance 0.1371
Centroid_H: Importance 0.0030
Entropy_H: Importance 0.0139
Mean_Freq_H: Importance 0.0570
Variance_H: Importance 0.0665
Max_L: Importance 0.0005
Min_L: Importance 0.0183
Mean_L: Importance 0.1919
Std_L: Importance 0.0000
RMS_L: Importance 0.1319
Entropy_L: Importance 0.0301
Kurtosis_Freq_L: Importance 0.0000
Variance_L: Importance 0.0862

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0005
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
         CH2       1.00      1.00      1.00  

#### **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [207]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (1998, 38)
✅ Loaded Probe3 ADC3 CH2: (1998, 38)
✅ Loaded Probe2 ADC3 CH1: (1998, 38)
✅ Loaded Probe2 ADC3 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0918
Min_H: Importance 0.0211
Mean_H: Importance 0.0998
Std_H: Importance 0.0101
Mean Deviation_H: Importance 0.0110
RMS_H: Importance 0.1347
Centroid_H: Importance 0.0005
Entropy_H: Importance 0.0176
Mean_Freq_H: Importance 0.0520
Variance_H: Importance 0.0640
Max_L: Importance 0.0028
Min_L: Importance 0.0187
Mean_L: Importance 0.1890
Mean Deviation_L: Importance 0.0100
RMS_L: Importance 0.1280
Kurtosis_L: Importance 0.0015
Entropy_L: Importance 0.0286
Mean_Freq_L: Importance 0.0113
Variance_L: Importance 0.1073

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0005
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
        

#### **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [208]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (998, 38)
✅ Loaded Probe3 ADC3 CH2: (998, 38)
✅ Loaded Probe2 ADC3 CH1: (998, 38)
✅ Loaded Probe2 ADC3 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.1196
Min_H: Importance 0.0044
Mean_H: Importance 0.0928
Std_H: Importance 0.0110
Mean Deviation_H: Importance 0.0270
RMS_H: Importance 0.1247
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0333
Spread_H: Importance 0.0015
Mean_Freq_H: Importance 0.0496
Variance_H: Importance 0.0980
Max_L: Importance 0.0087
Min_L: Importance 0.0298
Mean_L: Importance 0.1661
Mean Deviation_L: Importance 0.0101
RMS_L: Importance 0.1245
Entropy_L: Importance 0.0075
Variance_L: Importance 0.0911

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         CH2       1.00      1.00      1.00  

#### **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [209]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (665, 38)
✅ Loaded Probe3 ADC3 CH2: (665, 38)
✅ Loaded Probe2 ADC3 CH1: (665, 38)
✅ Loaded Probe2 ADC3 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1017
Mean_H: Importance 0.0875
Std_H: Importance 0.0299
Mean Deviation_H: Importance 0.0443
RMS_H: Importance 0.1300
Entropy_H: Importance 0.0151
Mean_Freq_H: Importance 0.0147
Irregularity_H: Importance 0.0056
Variance_H: Importance 0.0216
Max_L: Importance 0.0000
Min_L: Importance 0.0409
Mean_L: Importance 0.1808
Std_L: Importance 0.0041
Mean Deviation_L: Importance 0.1105
RMS_L: Importance 0.1201
Entropy_L: Importance 0.0124
Variance_L: Importance 0.0810

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0089
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy  

#### **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [210]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (498, 38)
✅ Loaded Probe3 ADC3 CH2: (498, 38)
✅ Loaded Probe2 ADC3 CH1: (498, 38)
✅ Loaded Probe2 ADC3 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0402
RMS_H: Importance 0.1245
Entropy_H: Importance 0.0182
Mean_Freq_H: Importance 0.0180
Kurtosis_Freq_H: Importance 0.0103
Variance_H: Importance 0.0200
Min_L: Importance 0.0876
Mean_L: Importance 0.1600
Std_L: Importance 0.0398
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1020
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0154
Variance_L: Importance 0.0840

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0004
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    accuracy                    

#### **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [211]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (398, 38)
✅ Loaded Probe3 ADC3 CH2: (398, 38)
✅ Loaded Probe2 ADC3 CH1: (398, 38)
✅ Loaded Probe2 ADC3 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0829
Mean Deviation_H: Importance 0.0400
RMS_H: Importance 0.1024
Peak-to-Peak_H: Importance 0.0007
Entropy_H: Importance 0.1000
Mean_Freq_H: Importance 0.0198
Kurtosis_Freq_H: Importance 0.0142
Irregularity_H: Importance 0.0160
Variance_H: Importance 0.0200
Min_L: Importance 0.1220
Mean_L: Importance 0.1000
Std_L: Importance 0.0799
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0831
Entropy_L: Importance 0.0193
Mean_Freq_L: Importance 0.0197
Variance_L: Importance 0.0600

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0092
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       

#### **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 3** and tested on **Probe 2**.



In [212]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 3
train_probe_dataset = 3
test_probe_dataset = 2
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe3 ADC3 CH1: (331, 38)
✅ Loaded Probe3 ADC3 CH2: (332, 38)
✅ Loaded Probe2 ADC3 CH1: (332, 38)
✅ Loaded Probe2 ADC3 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0800
Mean Deviation_H: Importance 0.0200
RMS_H: Importance 0.1011
Entropy_H: Importance 0.0800
Mean_Freq_H: Importance 0.0401
Kurtosis_Freq_H: Importance 0.0182
Min_L: Importance 0.0600
Mean_L: Importance 0.1200
Std_L: Importance 0.1000
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Peak-to-Peak_L: Importance 0.0007
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0399
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0050
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       332
         CH2       1.00      1.00      1.00       332

    accuracy                