In [35]:
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA
import pandas as pd
from sklearn.pipeline import Pipeline

In [36]:
df = pd.read_csv(r'C:\Users\Jubin\Desktop\RP18\Datasets\intel\interpolated\injected_malfunction\mote=1_sensortype=temperature_faulttype=malfunction.csv')

In [37]:
df.columns

Index(['timestamp', 'mote_id', 'has_fault_type', 'temperature', 'light'], dtype='object')

In [38]:
X = df.drop(columns=['has_fault_type', 'light', 'mote_id', 'timestamp'])
Y = df['has_fault_type']

In [39]:
window_size = 1008
stride = 100

In [40]:
all_y_true = []
all_y_pred = []

In [41]:
param_grid = {
    'pca__n_components': [None, 1, 2, 3, 4],  # Adjust the number of PCA components
    'svm__C': [0.1, 1, 10],           # Regularization parameter
    'svm__kernel': ['linear', 'rbf']  # Kernel types (you can add more)
}

In [42]:
tscv = TimeSeriesSplit(n_splits=5)

pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Data preprocessing: Standardize the features (mean=0, std=1)
    ('pca', PCA(svd_solver='full')),               # PCA for dimensionality reduction
    ('svm', SVC())                # SVM classifier
])

In [43]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

In [44]:
for i in range(0, len(X_train) - window_size + 1, stride):
    # Extract the current time window of data
    X_window = X_train.iloc[i:i + window_size, :]
    y_window = Y_train.iloc[i:i + window_size]

    # Split the window into training and testing sets using time-based cross-validation
    for train_index, test_index in tscv.split(X_window):
        train_X, test_X = X_window.iloc[train_index], X_window.iloc[test_index]
        train_Y, test_Y = y_window.iloc[train_index], y_window.iloc[test_index]

        # Determine the maximum number of PCA components based on data size
        max_components = min(train_X.shape[0], train_X.shape[1])

        # Fit the Pipeline on the training data and perform GridSearchCV
        grid_search = GridSearchCV(pipeline, param_grid, cv=tscv, n_jobs=-1)
        grid_search.fit(train_X, train_Y)

        # Make predictions on the test set
        pred_Y = grid_search.predict(test_X)

        # Store true labels and predictions
        all_y_true.extend(test_Y)
        all_y_pred.extend(pred_Y)

90 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
30 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Jubin\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Jubin\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\pipeline.py", line 390, in fit
    Xt = self._fit(X, y, **fit_params_steps)
  File "c:\Users\Jubin\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\pipeline.py", line 355, in _fit
    **fit_params_steps[name],
  File "c:\Users\Jubin\AppData\Local\Programs\Python\Python37\lib\site-p

In [45]:
# Evaluate the model's performance on the entire dataset
accuracy = accuracy_score(all_y_true, all_y_pred)
report = classification_report(all_y_true, all_y_pred)

In [46]:
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)

Accuracy: 0.82
Classification Report:
              precision    recall  f1-score   support

           0       0.82      1.00      0.90     85865
           2       0.99      0.03      0.06     19135

    accuracy                           0.82    105000
   macro avg       0.91      0.52      0.48    105000
weighted avg       0.85      0.82      0.75    105000

