In [None]:
import pandas as pd
from scipy import signal as sig
import numpy as np
from math import pi


file_paths = [
        'pbs_25.csv',
        'pbs_22.csv',
        'pbs_23.csv',
        'pbs_24.csv',
]
# Define the feature names
feature_names = ['peak_to_peak_amplitude', 'mean_amplitude_peaks', 'mean_amplitude_valleys', 'std_deviation_peak_amplitudes', 'std_deviation_valley_amplitudes', 'signal_energy', 'signal_power', 'zero_crossing_rate']

feature_vectors = pd.DataFrame(columns=feature_names)

# Loop for the selected CSV files
for file_path in file_paths:
    # Read the CSV file
    data = pd.read_csv(file_path)
    t = data['timestamp']
    values = data['uA']
# High pass filter
    N = 4
    fc = 5
    Fs = 20
    w_c = 2 * fc / Fs
    b, a = sig.butter(N, w_c, 'high')
    o = sig.filtfilt(b, a, values)
# Low pass filter
    n = 4
    Fc = 7
    fs = 20
    w_c = 2 * Fc / fs
    b, a = sig.butter(n, w_c, 'low')
    signal= sig.filtfilt(b, a, o)
# Take first deriative of cleaned signal
    derivative = np.gradient(signal)
# Find peaks and valleys of deriative of signal
    peaks, _ = sig.find_peaks(derivative, height=0.2)
    valleys, _ = sig.find_peaks(-derivative, height=0.2)
# Calculate features
    peak_amplitudes = signal[peaks]
    valley_amplitudes = signal[valleys]
    peak_to_peak_amplitude = np.max(peak_amplitudes) - np.min(valley_amplitudes)
    mean_amplitude_peaks = np.mean(peak_amplitudes)
    mean_amplitude_valleys = np.mean(valley_amplitudes)
    std_deviation_peak_amplitudes = np.std(peak_amplitudes)
    std_deviation_valley_amplitudes = np.std(valley_amplitudes)
    signal_energy = np.sum(signal**2)
    signal_power = signal_energy / len(signal)
    zero_crossings = np.where(np.diff(np.sign(signal)))[0]
    zero_crossing_rate = len(zero_crossings) / len(signal)

    # Create a feature vector
    feature_vector = [peak_to_peak_amplitude, mean_amplitude_peaks, mean_amplitude_valleys,
                       std_deviation_peak_amplitudes, std_deviation_valley_amplitudes, signal_energy, signal_power, zero_crossing_rate]

    feature_vectors = feature_vectors.append(pd.Series(feature_vector, index=feature_names), ignore_index=True)

    # Export the feature vectors to a CSV file
    feature_vectors.to_csv('feature_vectors.csv', index=False)


  feature_vectors = feature_vectors.append(pd.Series(feature_vector, index=feature_names), ignore_index=True)
  feature_vectors = feature_vectors.append(pd.Series(feature_vector, index=feature_names), ignore_index=True)
  feature_vectors = feature_vectors.append(pd.Series(feature_vector, index=feature_names), ignore_index=True)
  feature_vectors = feature_vectors.append(pd.Series(feature_vector, index=feature_names), ignore_index=True)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score

#Load and preprocess features vectors csv file
data = pd.read_csv('features_set_100x7.csv')  # Replace with your vectors file
data.dropna(inplace=True)

# Load dataset and separate features (X) and target variable (y)
X = data.drop('Type', axis=1)  # Excluding 'Type' as it is the target variable
y = data['Type']

# Split the Data (70% for training of model and 30% for testing and validation, giving each of them half (15%) of 30%.
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=1880)
X_validation, X_test, y_validation, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=1880)

# Train the Random Forest Classifier
rf_classifier = RandomForestClassifier(
    n_estimators=100,
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=4,
    random_state=1880
)
rf_classifier.fit(X_train, y_train)

# Validate the Model
y_validation_pred = rf_classifier.predict(X_validation)

# Test the Model
y_test_pred = rf_classifier.predict(X_test)

# Calculation acurracy and precision scores for validation and testing respectively
validation_accuracy = accuracy_score(y_validation, y_validation_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
validation_precision = precision_score(y_validation, y_validation_pred, average='weighted')
test_precision = precision_score(y_test, y_test_pred, average='weighted')


#Print test results with classes
test_results = pd.DataFrame({'Actual': y_test, 'Predicted': y_test_pred})
print("\nTest Results:")
print(test_results)
# Print validation results with classes
validation_results = pd.DataFrame({'Actual': y_validation, 'Predicted': y_validation_pred})
print("\nValidation Results:")
print(validation_results)

print("\nValidation Accuracy: {:.3f}".format(validation_accuracy))
print("\nTest Accuracy: {:.3f}".format(test_accuracy))
print("\nValidation Precision: {:.3f}".format(validation_precision))
print("\nTest Precision: {:.3f}".format(test_precision))





Test Results:
       Actual Predicted
166        AA        AA
374  AA_UA_DA  AA_UA_DA
242     AA_DA        AA
331  AA_UA_DA  AA_UA_DA
562        DA        DA
..        ...       ...
254     UA_DA     UA_DA
419     UA_DA     UA_DA
13         AA        AA
214        AA        AA
622    PBS_DA    PBS_DA

[108 rows x 2 columns]

Validation Results:
     Actual Predicted
522      AA        AA
444   UA_DA     UA_DA
295   UA_DA     UA_DA
238      DA        DA
146   AA_DA     AA_DA
..      ...       ...
652  PBS_DA    PBS_DA
681  PBS_DA    PBS_DA
118      DA        UA
665  PBS_DA    PBS_DA
418   UA_DA     AA_DA

[107 rows x 2 columns]

Validation Accuracy: 0.766

Test Accuracy: 0.815

Validation Precision: 0.787

Test Precision: 0.818


In [None]:
import pandas as pd
from sklearn.model_selection import cross_val_score, KFold
from sklearn.ensemble import RandomForestClassifier

# Load dataset and separate features (X) and target variable (y)
data = pd.read_csv('features_set_100x7.csv')  # Replace with your vectors file
X = data.drop('Type', axis=1)  # Features
y = data['Type']  # Target variable

model = RandomForestClassifier(random_state=1880)
kfold = KFold(n_splits=5, shuffle=True, random_state=1880)

# Perform 5-fold cross-validation and evaluate using accuracy
scores = cross_val_score(model, X, y, cv=kfold, scoring='accuracy')

# Calculate mean and standard deviation of accuracy scores
mean_accuracy = scores.mean()
std_accuracy = scores.std()

# Print results
print(f'Mean Accuracy: {mean_accuracy}')
print(f'Standard Deviation: {std_accuracy}')


Mean Accuracy: 0.803049728049728
Standard Deviation: 0.024944659701907616
