In [1]:
import numpy as np
import pandas as pd
import os
from scipy.io import loadmat
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Define the directory containing your dataset
dataset_dir = r'F:\RV System\RV_Systems_ML_Training_Sets'

# Initialize lists to store data and labels
data_list = []
labels_list = []

# Load data from .dat files
for i in range(1, 13):  # Assuming there are 12 files (S1 to S12)
    file_path = os.path.join(dataset_dir, f'S{i}.dat')
    data = loadmat(file_path)
    
    # Extract and store vibration data and corresponding labels
    if i in [1, 8, 12]:
        vibration_data = np.array(data['data'], dtype=np.complex128)
    else:
        vibration_data = np.array(data['data'], dtype=np.float64)
    
    data_list.append(vibration_data)
    labels_list.append(np.full(vibration_data.shape[0], i))  # Label each dataset as S1, S2, ...

# Convert lists to numpy arrays
X = np.vstack(data_list)
y = np.concatenate(labels_list)

FileNotFoundError: [Errno 2] No such file or directory: 'F:\\RV System\\RV_Systems_ML_Training_Sets\\S1.dat.mat'

In [None]:
# Handle missing values (if any)
X = np.nan_to_num(X)

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Feature extraction: Statistical moments
def extract_features(data):
    features = []
    for i in range(data.shape[0]):
        row = data[i]
        mean = np.mean(row)
        var = np.var(row)
        skew = pd.Series(row).skew()
        kurt = pd.Series(row).kurtosis()
        features.append([mean, var, skew, kurt])
    return np.array(features)

X_features = extract_features(X_scaled)


In [None]:
# Apply PCA for dimensionality reduction
pca = PCA(n_components=10)  # Adjust number of components as needed
X_pca = pca.fit_transform(X_features)

# Visualize explained variance
plt.figure(figsize=(10, 6))
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of Components')
plt.ylabel('Explained Variance')
plt.title('PCA - Explained Variance')
plt.grid()
plt.show()


In [None]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.3, random_state=42)

# SVM Model
svm = SVC(kernel='rbf', C=1.0, gamma='auto')
svm.fit(X_train, y_train)

# Predictions
y_pred_svm = svm.predict(X_test)

# Evaluate
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print(confusion_matrix(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))

In [None]:
# k-NN Model
knn = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
knn.fit(X_train, y_train)

# Predictions
y_pred_knn = knn.predict(X_test)

# Evaluate
print("k-NN Accuracy:", accuracy_score(y_test, y_pred_knn))
print(confusion_matrix(y_test, y_pred_knn))
print(classification_report(y_test, y_pred_knn))


In [None]:
# Decision Tree Model
tree = DecisionTreeClassifier(max_depth=10, min_samples_split=5)
tree.fit(X_train, y_train)

# Predictions
y_pred_tree = tree.predict(X_test)

# Evaluate
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_tree))
print(confusion_matrix(y_test, y_pred_tree))
print(classification_report(y_test, y_pred_tree))


In [None]:
# Compare models using cross-validation
models = {
    'SVM': svm,
    'k-NN': knn,
    'Decision Tree': tree
}

cv_scores = {}
for name, model in models.items():
    scores = cross_val_score(model, X_pca, y, cv=5)
    cv_scores[name] = np.mean(scores)
    print(f"{name} CV Score: {np.mean(scores)}")

# Visualize cross-validation scores
plt.figure(figsize=(8, 6))
sns.barplot(x=list(cv_scores.keys()), y=list(cv_scores.values()))
plt.title('Cross-Validation Scores')
plt.ylabel('Accuracy')
plt.xlabel('Model')
plt.show()


In [None]:
# Grid Search for SVM
param_grid = {'C': [0.1, 1, 10, 100], 'gamma': ['scale', 'auto'], 'kernel': ['rbf', 'poly']}
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2, cv=5)
grid.fit(X_train, y_train)

# Best parameters and model evaluation
print("Best Parameters for SVM:", grid.best_params_)
best_svm = grid.best_estimator_

# Predictions and Evaluation
y_pred_best_svm = best_svm.predict(X_test)
print("Tuned SVM Accuracy:", accuracy_score(y_test, y_pred_best_svm))
print(confusion_matrix(y_test, y_pred_best_svm))
print(classification_report(y_test, y_pred_best_svm))


In [None]:
# Select the best model based on accuracy
best_model = max(cv_scores, key=cv_scores.get)
print(f"The best model is: {best_model} with a cross-validation score of {cv_scores[best_model]}")

# Further reporting steps can be documented and visualized as needed.
