# ADC1

## **Testing of Probe 1**
To conduct experiments, **ADC1** (data acquisition device) is used consistently throughout. **Probe 1** is kept constant, and the machine learning model is **always trained on Probe 1**. The model is then tested on **Probe 2** and **Probe 3** to evaluate its generalization.

The **sampling rate** is varied across different tests, ranging from **10 MSPS to 300 MSPS**.


## Training Probe1 data and Testing on Probe 2 data
To test Probe 1 with changing probe to Probes 2 results are evaluated.

## **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [8]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (9998, 38)
✅ Loaded Probe1 ADC1 CH2: (9998, 38)
✅ Loaded Probe2 ADC1 CH1: (9998, 38)
✅ Loaded Probe2 ADC1 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0527
Min_H: Importance 0.0141
Mean_H: Importance 0.0989
Std_H: Importance 0.0076
Mean Deviation_H: Importance 0.0216
RMS_H: Importance 0.1667
Entropy_H: Importance 0.0460
Mean_Freq_H: Importance 0.0641
Variance_H: Importance 0.0496
Max_L: Importance 0.0324
Min_L: Importance 0.0696
Mean_L: Importance 0.0698
Std_L: Importance 0.0075
RMS_L: Importance 0.0890
Skewness_L: Importance 0.0072
Centroid_L: Importance 0.0114
Entropy_L: Importance 0.0299
Mean_Freq_L: Importance 0.0159
Variance_L: Importance 0.0973

Model Performance After Feature Selection:
Test Accuracy: 0.9518
Balanced Accuracy: 0.9518
MCC: 0.9064
Log Loss: 0.1153
F1 Score: 0.9536
Recall: 0.9912
Precision: 0.9188
              precision    recall  f1-score   support

         CH1       0.99      0.91      0.95      9998
         CH2       

## **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [9]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (4998, 38)
✅ Loaded Probe1 ADC1 CH2: (4998, 38)
✅ Loaded Probe2 ADC1 CH1: (4998, 38)
✅ Loaded Probe2 ADC1 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1224
Min_H: Importance 0.0284
Mean_H: Importance 0.1486
Mean Deviation_H: Importance 0.0095
RMS_H: Importance 0.2172
Centroid_H: Importance 0.0009
Entropy_H: Importance 0.0173
Mean_Freq_H: Importance 0.0727
Variance_H: Importance 0.0400
Max_L: Importance 0.0024
Min_L: Importance 0.0255
Mean_L: Importance 0.1253
Std_L: Importance 0.0012
RMS_L: Importance 0.1210
Peak-to-Peak_L: Importance 0.0018
Entropy_L: Importance 0.0283
Spread_L: Importance 0.0004
Mean_Freq_L: Importance 0.0108
Variance_L: Importance 0.0256

Model Performance After Feature Selection:
Test Accuracy: 0.9999
Balanced Accuracy: 0.9999
MCC: 0.9998
Log Loss: 0.0020
F1 Score: 0.9999
Recall: 0.9998
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         CH2

## **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [11]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (3332, 38)
✅ Loaded Probe1 ADC1 CH2: (3332, 38)
✅ Loaded Probe2 ADC1 CH1: (3332, 38)
✅ Loaded Probe2 ADC1 CH2: (3331, 38)

Top Important Features:
Max_H: Importance 0.0683
Min_H: Importance 0.0315
Mean_H: Importance 0.0935
Std_H: Importance 0.0002
Mean Deviation_H: Importance 0.0067
RMS_H: Importance 0.1295
Centroid_H: Importance 0.0015
Entropy_H: Importance 0.0179
Mean_Freq_H: Importance 0.0551
Kurtosis_Freq_H: Importance 0.0001
Variance_H: Importance 0.0675
Max_L: Importance 0.0063
Min_L: Importance 0.0191
Mean_L: Importance 0.2050
RMS_L: Importance 0.1282
Peak-to-Peak_L: Importance 0.0001
Entropy_L: Importance 0.0234
Mean_Freq_L: Importance 0.0096
Variance_L: Importance 0.1366

Model Performance After Feature Selection:
Test Accuracy: 0.9973
Balanced Accuracy: 0.9973
MCC: 0.9946
Log Loss: 0.0052
F1 Score: 0.9973
Recall: 0.9949
Precision: 0.9997
              precision    recall  f1-score   support

         CH1       0.99      1.00      1.00      3332
     

## **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [12]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (2481, 38)
✅ Loaded Probe1 ADC1 CH2: (2481, 38)
✅ Loaded Probe2 ADC1 CH1: (2481, 38)
✅ Loaded Probe2 ADC1 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0684
Min_H: Importance 0.0280
Mean_H: Importance 0.0961
Std_H: Importance 0.0123
Mean Deviation_H: Importance 0.0169
RMS_H: Importance 0.1322
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0144
Skewness_Freq_H: Importance 0.0000
Mean_Freq_H: Importance 0.0579
Variance_H: Importance 0.0606
Max_L: Importance 0.0001
Min_L: Importance 0.0165
Mean_L: Importance 0.1996
RMS_L: Importance 0.1272
Peak-to-Peak_L: Importance 0.0002
Entropy_L: Importance 0.0301
Mean_Freq_L: Importance 0.0124
Variance_L: Importance 0.1273

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
     

## **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [13]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 50

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (1998, 38)
✅ Loaded Probe1 ADC1 CH2: (1998, 38)
✅ Loaded Probe2 ADC1 CH1: (1998, 38)
✅ Loaded Probe2 ADC1 CH2: (1998, 38)

Top Important Features:
Max_H: Importance 0.0601
Min_H: Importance 0.0371
Mean_H: Importance 0.0818
Std_H: Importance 0.0381
Mean Deviation_H: Importance 0.0205
RMS_H: Importance 0.0808
Entropy_H: Importance 0.1200
Spread_H: Importance 0.0000
Mean_Freq_H: Importance 0.1024
Variance_H: Importance 0.0601
Min_L: Importance 0.0006
Mean_L: Importance 0.1433
Mean Deviation_L: Importance 0.0151
RMS_L: Importance 0.0803
Entropy_L: Importance 0.0390
Variance_L: Importance 0.1207

Model Performance After Feature Selection:
Test Accuracy: 0.9997
Balanced Accuracy: 0.9997
MCC: 0.9995
Log Loss: 0.0367
F1 Score: 0.9997
Recall: 0.9995
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      1998
         CH2       1.00      1.00      1.00      1998

    accuracy                           1.

## **Sampling Rate 100MSPS**
This section contains data collected using a sampling rate of **100MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.

In [14]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 100

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (998, 38)
✅ Loaded Probe1 ADC1 CH2: (998, 38)
✅ Loaded Probe2 ADC1 CH1: (998, 38)
✅ Loaded Probe2 ADC1 CH2: (998, 38)

Top Important Features:
Max_H: Importance 0.0861
Min_H: Importance 0.0066
Mean_H: Importance 0.0985
Std_H: Importance 0.0055
Mean Deviation_H: Importance 0.0406
RMS_H: Importance 0.1472
Skewness_H: Importance 0.0007
Centroid_H: Importance 0.0055
Entropy_H: Importance 0.0095
Mean_Freq_H: Importance 0.0104
Variance_H: Importance 0.0821
Max_L: Importance 0.0105
Min_L: Importance 0.0172
Mean_L: Importance 0.1887
Std_L: Importance 0.0048
Mean Deviation_L: Importance 0.0267
RMS_L: Importance 0.1411
Entropy_L: Importance 0.0052
Variance_L: Importance 0.1124

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0012
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       998
         CH2      

## **Sampling Rate 150MSPS**
This section contains data collected using a sampling rate of **150MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [15]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 150

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (665, 38)
✅ Loaded Probe1 ADC1 CH2: (665, 38)
✅ Loaded Probe2 ADC1 CH1: (665, 38)
✅ Loaded Probe2 ADC1 CH2: (665, 38)

Top Important Features:
Max_H: Importance 0.1000
Mean_H: Importance 0.0849
Std_H: Importance 0.0575
Mean Deviation_H: Importance 0.0650
RMS_H: Importance 0.1222
Centroid_H: Importance 0.0015
Entropy_H: Importance 0.0191
Mean_Freq_H: Importance 0.0381
Kurtosis_Freq_H: Importance 0.0137
Variance_H: Importance 0.0215
Mean_L: Importance 0.1811
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1208
Entropy_L: Importance 0.0147
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0004
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       665
         CH2       1.00      1.00      1.00       665

    accuracy                           1.00      1330
   m

## **Sampling Rate 200MSPS**
This section contains data collected using a sampling rate of **200MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [16]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 200

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (498, 38)
✅ Loaded Probe1 ADC1 CH2: (498, 38)
✅ Loaded Probe2 ADC1 CH1: (498, 38)
✅ Loaded Probe2 ADC1 CH2: (498, 38)

Top Important Features:
Max_H: Importance 0.1000
Min_H: Importance 0.0000
Mean_H: Importance 0.0852
Mean Deviation_H: Importance 0.0406
RMS_H: Importance 0.1257
Entropy_H: Importance 0.0182
Mean_Freq_H: Importance 0.0171
Kurtosis_Freq_H: Importance 0.0293
Variance_H: Importance 0.0401
Min_L: Importance 0.0865
Mean_L: Importance 0.1400
Std_L: Importance 0.0195
Mean Deviation_L: Importance 0.0800
RMS_L: Importance 0.1240
Entropy_L: Importance 0.0194
Mean_Freq_L: Importance 0.0136
Variance_L: Importance 0.0609

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0064
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       498
         CH2       1.00      1.00      1.00       498

    acc

## **Sampling Rate 250MSPS**
This section contains data collected using a sampling rate of **250MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [17]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 250

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (398, 38)
✅ Loaded Probe1 ADC1 CH2: (398, 38)
✅ Loaded Probe2 ADC1 CH1: (398, 38)
✅ Loaded Probe2 ADC1 CH2: (398, 38)

Top Important Features:
Max_H: Importance 0.0800
Mean_H: Importance 0.0803
Mean Deviation_H: Importance 0.0400
RMS_H: Importance 0.1239
Peak-to-Peak_H: Importance 0.0000
Centroid_H: Importance 0.0001
Mean_Freq_H: Importance 0.0189
Kurtosis_Freq_H: Importance 0.0161
Variance_H: Importance 0.0200
Min_L: Importance 0.1001
Mean_L: Importance 0.1400
Std_L: Importance 0.0200
Mean Deviation_L: Importance 0.1000
RMS_L: Importance 0.1211
Entropy_L: Importance 0.0197
Mean_Freq_L: Importance 0.0398
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0011
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       398
         CH2       1.00      1.00      1.00       39

## **Sampling Rate 300MSPS**
This section contains data collected using a sampling rate of **300MSPS**. The model is trained on **Probe 1** and tested on **Probe 2**.



In [19]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 2
sample_rate = 300

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (332, 38)
✅ Loaded Probe1 ADC1 CH2: (332, 38)
✅ Loaded Probe2 ADC1 CH1: (332, 38)
✅ Loaded Probe2 ADC1 CH2: (332, 38)

Top Important Features:
Max_H: Importance 0.0600
Mean_H: Importance 0.0600
Std_H: Importance 0.0800
Mean Deviation_H: Importance 0.0203
RMS_H: Importance 0.0802
Entropy_H: Importance 0.0400
Mean_Freq_H: Importance 0.0600
Kurtosis_Freq_H: Importance 0.0195
Min_L: Importance 0.0400
Mean_L: Importance 0.1000
Std_L: Importance 0.0800
Mean Deviation_L: Importance 0.0600
RMS_L: Importance 0.0800
Entropy_L: Importance 0.0600
Mean_Freq_L: Importance 0.0800
Variance_L: Importance 0.0800

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0015
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00       332
         CH2       1.00      1.00      1.00       332

    accuracy                         

## Training Probe1 data and Testing on Probe 3 data
To test Probe 1 with changing probe to Probe 3 results are evaluated.

## **Sampling Rate 10MSPS**
This section contains data collected using a sampling rate of **10MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.

In [20]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 10

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (9998, 38)
✅ Loaded Probe1 ADC1 CH2: (9998, 38)
✅ Loaded Probe3 ADC1 CH1: (9998, 38)
✅ Loaded Probe3 ADC1 CH2: (9998, 38)

Top Important Features:
Max_H: Importance 0.0527
Min_H: Importance 0.0141
Mean_H: Importance 0.0989
Std_H: Importance 0.0076
Mean Deviation_H: Importance 0.0216
RMS_H: Importance 0.1667
Entropy_H: Importance 0.0460
Mean_Freq_H: Importance 0.0641
Variance_H: Importance 0.0496
Max_L: Importance 0.0324
Min_L: Importance 0.0696
Mean_L: Importance 0.0698
Std_L: Importance 0.0075
RMS_L: Importance 0.0890
Skewness_L: Importance 0.0072
Centroid_L: Importance 0.0114
Entropy_L: Importance 0.0299
Mean_Freq_L: Importance 0.0159
Variance_L: Importance 0.0973

Model Performance After Feature Selection:
Test Accuracy: 0.9597
Balanced Accuracy: 0.9597
MCC: 0.9218
Log Loss: 0.1084
F1 Score: 0.9611
Recall: 0.9958
Precision: 0.9287
              precision    recall  f1-score   support

         CH1       1.00      0.92      0.96      9998
         CH2       

## **Sampling Rate 20MSPS**
This section contains data collected using a sampling rate of **20MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [21]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 20

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (4998, 38)
✅ Loaded Probe1 ADC1 CH2: (4998, 38)
✅ Loaded Probe3 ADC1 CH1: (4998, 38)
✅ Loaded Probe3 ADC1 CH2: (4998, 38)

Top Important Features:
Max_H: Importance 0.1224
Min_H: Importance 0.0284
Mean_H: Importance 0.1486
Mean Deviation_H: Importance 0.0095
RMS_H: Importance 0.2172
Centroid_H: Importance 0.0009
Entropy_H: Importance 0.0173
Mean_Freq_H: Importance 0.0727
Variance_H: Importance 0.0400
Max_L: Importance 0.0024
Min_L: Importance 0.0255
Mean_L: Importance 0.1253
Std_L: Importance 0.0012
RMS_L: Importance 0.1210
Peak-to-Peak_L: Importance 0.0018
Entropy_L: Importance 0.0283
Spread_L: Importance 0.0004
Mean_Freq_L: Importance 0.0108
Variance_L: Importance 0.0256

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0016
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      4998
         CH2

## **Sampling Rate 30MSPS**
This section contains data collected using a sampling rate of **30MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [22]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 30

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (3332, 38)
✅ Loaded Probe1 ADC1 CH2: (3332, 38)
✅ Loaded Probe3 ADC1 CH1: (3331, 38)
✅ Loaded Probe3 ADC1 CH2: (3332, 38)

Top Important Features:
Max_H: Importance 0.0683
Min_H: Importance 0.0315
Mean_H: Importance 0.0935
Std_H: Importance 0.0002
Mean Deviation_H: Importance 0.0067
RMS_H: Importance 0.1295
Centroid_H: Importance 0.0015
Entropy_H: Importance 0.0179
Mean_Freq_H: Importance 0.0551
Kurtosis_Freq_H: Importance 0.0001
Variance_H: Importance 0.0675
Max_L: Importance 0.0063
Min_L: Importance 0.0191
Mean_L: Importance 0.2050
RMS_L: Importance 0.1282
Peak-to-Peak_L: Importance 0.0001
Entropy_L: Importance 0.0234
Mean_Freq_L: Importance 0.0096
Variance_L: Importance 0.1366

Model Performance After Feature Selection:
Test Accuracy: 0.9676
Balanced Accuracy: 0.9676
MCC: 0.9370
Log Loss: 0.0684
F1 Score: 0.9665
Recall: 0.9364
Precision: 0.9987
              precision    recall  f1-score   support

         CH1       0.94      1.00      0.97      3331
     

## **Sampling Rate 40MSPS**
This section contains data collected using a sampling rate of **40MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.



In [23]:
import numpy as np
import os
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, matthews_corrcoef, 
    balanced_accuracy_score, log_loss, f1_score, recall_score, precision_score
)

# ==================================================================
# Fixed Configuration
# ==================================================================
base_directory = "C:\\Users\\awm21\\Documents\\Probe_Vari\\Neural_Networks\\Features_Vitis"

adc_num = 1
train_probe_dataset = 1
test_probe_dataset = 3
sample_rate = 40

# Automatically determine probe names based on dataset numbers
train_probe = f"Probe{train_probe_dataset}"
test_probe = f"Probe{test_probe_dataset}"

# Feature Names (Time High, Freq High, Time Low, Freq Low)
feature_names = [
    "Max_H", "Min_H", "Mean_H", "Std_H", "Mean Deviation_H", "RMS_H", "Skewness_H", "Kurtosis_H", "Peak-to-Peak_H", "Zero Crossing Rate_H",
    "Centroid_H", "Entropy_H", "Spread_H", "Skewness_Freq_H", "Mean_Freq_H", "Kurtosis_Freq_H", "Irregularity_H", "Variance_H", "Dominant_Freq_H",
    "Max_L", "Min_L", "Mean_L", "Std_L", "Mean Deviation_L", "RMS_L", "Skewness_L", "Kurtosis_L", "Peak-to-Peak_L", "Zero Crossing Rate_L",
    "Centroid_L", "Entropy_L", "Spread_L", "Skewness_Freq_L", "Mean_Freq_L", "Kurtosis_Freq_L", "Irregularity_L", "Variance_L", "Dominant_Freq_L"
]

def load_channel_data(probe_name, adc_num, channel):
    """Load specific ADC/channel data for a probe and verify ADC consistency."""
    folder = os.path.join(base_directory, f"P{probe_name[-1]}_fd")
    filename = f'fd_{probe_name}_ADC{adc_num}_CH{channel}_{sample_rate}_MSPS.npy'
    file_path = os.path.join(folder, filename)
    
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    data = np.load(file_path)
    print(f"✅ Loaded {probe_name} ADC{adc_num} CH{channel}: {data.shape}")
    return data

# ==================================================================
# Training & Test Data
# ==================================================================
train_ch0 = load_channel_data(train_probe, adc_num, 1)
train_ch1 = load_channel_data(train_probe, adc_num, 2)
test_ch0 = load_channel_data(test_probe, adc_num, 1)
test_ch1 = load_channel_data(test_probe, adc_num, 2)

X_train = np.vstack([train_ch0, train_ch1])
y_train = np.concatenate([np.zeros(train_ch0.shape[0]), np.ones(train_ch1.shape[0])])
X_test = np.vstack([test_ch0, test_ch1])
y_test = np.concatenate([np.zeros(test_ch0.shape[0]), np.ones(test_ch1.shape[0])])

# ==================================================================
# Data Preprocessing
# ==================================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ==================================================================
# Train Initial Model for Feature Selection
# ==================================================================
rf = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf.fit(X_train_scaled, y_train)

# Identify Important Features
feature_importances = rf.feature_importances_
important_features = np.where(feature_importances > np.percentile(feature_importances, 50))[0]
X_train_selected = X_train_scaled[:, important_features]
X_test_selected = X_test_scaled[:, important_features]

# Print Important Features
print("\nTop Important Features:")
for feature_idx in important_features:
    print(f"{feature_names[feature_idx]}: Importance {feature_importances[feature_idx]:.4f}")

# ==================================================================
# Retrain Model with Selected Features
# ==================================================================
rf_selected = RandomForestClassifier(
    n_estimators=50,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=5,
    max_features='sqrt',
    random_state=42
)
rf_selected.fit(X_train_selected, y_train)

# Create a unique folder name dynamically
model_dir = f"Models/Tuning_{train_probe}_to_{test_probe}_ADC{adc_num}_{sample_rate}_MSPS"
os.makedirs(model_dir, exist_ok=True)

# Save the Model and Scaler
joblib.dump(rf_selected, os.path.join(model_dir, "RandomForest_Model.pkl"))
joblib.dump(scaler, os.path.join(model_dir, "Scaler.pkl"))
joblib.dump(important_features, os.path.join(model_dir, "Selected_Features.pkl"))

# Predictions
y_pred_selected = rf_selected.predict(X_test_selected)
y_probs_selected = rf_selected.predict_proba(X_test_selected)[:, 1]

# ==================================================================
# Performance Metrics After Feature Selection
# ==================================================================
print("\nModel Performance After Feature Selection:")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred_selected):.4f}")
print(f"Balanced Accuracy: {balanced_accuracy_score(y_test, y_pred_selected):.4f}")
print(f"MCC: {matthews_corrcoef(y_test, y_pred_selected):.4f}")
print(f"Log Loss: {log_loss(y_test, y_probs_selected):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_selected):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_selected):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_selected):.4f}")
print(classification_report(y_test, y_pred_selected, target_names=['CH1', 'CH2']))

# Compute Confusion Matrix, FPR, and FNR
cm = confusion_matrix(y_test, y_pred_selected)
print("\nConfusion Matrix:")
print(cm)
fp_rate = cm[0][1] / (cm[0][1] + cm[0][0])
fn_rate = cm[1][0] / (cm[1][0] + cm[1][1])
print(f"False Positive Rate (FPR): {fp_rate:.4f}")
print(f"False Negative Rate (FNR): {fn_rate:.4f}")

✅ Loaded Probe1 ADC1 CH1: (2481, 38)
✅ Loaded Probe1 ADC1 CH2: (2481, 38)
✅ Loaded Probe3 ADC1 CH1: (2481, 38)
✅ Loaded Probe3 ADC1 CH2: (2481, 38)

Top Important Features:
Max_H: Importance 0.0684
Min_H: Importance 0.0280
Mean_H: Importance 0.0961
Std_H: Importance 0.0123
Mean Deviation_H: Importance 0.0169
RMS_H: Importance 0.1322
Centroid_H: Importance 0.0000
Entropy_H: Importance 0.0144
Skewness_Freq_H: Importance 0.0000
Mean_Freq_H: Importance 0.0579
Variance_H: Importance 0.0606
Max_L: Importance 0.0001
Min_L: Importance 0.0165
Mean_L: Importance 0.1996
RMS_L: Importance 0.1272
Peak-to-Peak_L: Importance 0.0002
Entropy_L: Importance 0.0301
Mean_Freq_L: Importance 0.0124
Variance_L: Importance 0.1273

Model Performance After Feature Selection:
Test Accuracy: 1.0000
Balanced Accuracy: 1.0000
MCC: 1.0000
Log Loss: 0.0000
F1 Score: 1.0000
Recall: 1.0000
Precision: 1.0000
              precision    recall  f1-score   support

         CH1       1.00      1.00      1.00      2481
     

## **Sampling Rate 50MSPS**
This section contains data collected using a sampling rate of **50MSPS**. The model is trained on **Probe 1** and tested on **Probe 3**.

