## Import Packages

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Acess to google drive
from google.colab import drive
drive.mount('/content/drive')

## Load the SELECTED (Top 30) Feature Dataset
* Results of ML3-1 and ML3-2

In [None]:
FeatureSelected = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/SavedFiles/FeatureSelected.csv', header=None)
FeatureSelected = FeatureSelected.T
FeatureSelected.shape

In [None]:
# Standardize feature values
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler

FeatureSelected_std = StandardScaler().fit_transform(FeatureSelected)
FeatureSelected_std.shape

## Split Training & Test Data
- Use 'train_test_split' function
- It randomly samples the training and testing data according to the designated ratio.

In [None]:
# Number of data for each condition: 180
NoOfData   = int(FeatureSelected_std.shape[0]/2)

NormalSet   = FeatureSelected_std[:NoOfData , :]
AbnormalSet = FeatureSelected_std[NoOfData: , :]

NormalSet.shape, AbnormalSet.shape

In [None]:
from sklearn.model_selection    import train_test_split

# Designate test data ratio
TestData_Ratio = 0.2 

TrainData_Nor, TestData_Nor = train_test_split(NormalSet  , test_size=TestData_Ratio, random_state=777)
TrainData_Abn, TestData_Abn = train_test_split(AbnormalSet, test_size=TestData_Ratio, random_state=777)

print(TrainData_Nor.shape, TestData_Nor.shape)
print(TrainData_Abn.shape, TestData_Abn.shape)

## Data Labling
- Use 'np.zeros' and 'np.ones'
- '0' refers to 'Normal' and '1' refers to 'Abnormal' in this tutorial

In [None]:
TrainLabel_Nor = np.zeros(TrainData_Nor.shape[0]) # 0: Normal
TrainLabel_Abn = np.ones( TrainData_Abn.shape[0]) # 1: Abnormal
TestLabel_Nor  = np.zeros(TestData_Nor.shape[0])  # 0: Normal
TestLabel_Abn  = np.ones( TestData_Abn.shape[0])  # 1: Abnormal

print(TrainLabel_Nor.shape, TestLabel_Nor.shape)
print(TrainLabel_Abn.shape, TestLabel_Abn.shape)

## Data and Label Preparation

In [None]:
TrainData  = np.concatenate([TrainData_Nor , TrainData_Abn ], axis=0)
TestData   = np.concatenate([TestData_Nor  , TestData_Abn  ], axis=0)
TrainLabel = np.concatenate([TrainLabel_Nor, TrainLabel_Abn], axis=0)
TestLabel  = np.concatenate([TestLabel_Nor , TestLabel_Abn ], axis=0)

print(TrainData.shape,  TestData.shape)
print(TrainLabel.shape, TestLabel.shape)

## Support Vector Machine (SVM) Model Training

In [None]:
from sklearn import svm,metrics

svmModel = svm.SVC(kernel = 'rbf') # Kernel types: ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’
svmModel.fit(TrainData , TrainLabel)

# Predicted result for new data (not trained) can be extracted
Predicted = np.array(svmModel.predict(TestData))
print(Predicted.shape[0])
Predicted

Performance (Accuracy) Test Result

In [None]:
# Calculate accuracy by comparing the predicted result and label
svmscore = metrics.accuracy_score(Predicted, TestLabel)

print("Accuracy of SVM model trained by top30 features:\n{}%".format(svmscore*100))

Save ML model (SVM) as a file

In [None]:
import joblib

joblib.dump(svmModel, '/content/drive/MyDrive/Colab Notebooks/SavedFiles/ML_Models/SVM_model.plk')

Load the saved ML model (SVM) and predict

In [None]:
SVM_model_Load = joblib.load('/content/drive/MyDrive/Colab Notebooks/SavedFiles/ML_Models/SVM_model.plk')

np.array(SVM_model_Load.predict(TestData))

.

.

.

.

.

### What if we trained with all 270 features (rather than the top 30 selected features)?

Load FeatureData (NOT FeatureSelected)

In [None]:
FeatureTotal = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/SavedFiles/FeatureData.csv', header=None)
FeatureTotal = FeatureTotal.T
FeatureTotal.shape

Check the test result:

In [None]:
# Standardize features
FeatureTotal_std = StandardScaler().fit_transform(FeatureTotal)

# Divide into normal and abnormal dataset
NoOfData   = int(FeatureTotal_std.shape[0]/2)
NormalSet_total   = FeatureTotal_std[:NoOfData , :]
AbnormalSet_total = FeatureTotal_std[NoOfData: , :]

# Split training & test data
TestData_Ratio = 0.2
TrainData_Nor_total, TestData_Nor_total = train_test_split(NormalSet_total  , test_size=TestData_Ratio, random_state=777)
TrainData_Abn_total, TestData_Abn_total = train_test_split(AbnormalSet_total, test_size=TestData_Ratio, random_state=777)

# Labling
TrainLabel_Nor_total, TrainLabel_Abn_total = np.zeros(TrainData_Nor.shape[0]), np.ones(TrainData_Abn.shape[0])
TestLabel_Nor_total , TestLabel_Abn_total  = np.zeros(TestData_Nor.shape[0]) , np.ones(TestData_Abn.shape[0])

# Data & Label for ML model
TrainData_total  = np.concatenate([TrainData_Nor_total , TrainData_Abn_total ], axis=0)
TestData_total   = np.concatenate([TestData_Nor_total  , TestData_Abn_total  ], axis=0)
TrainLabel_total = np.concatenate([TrainLabel_Nor_total, TrainLabel_Abn_total], axis=0)
TestLabel_total  = np.concatenate([TestLabel_Nor_total , TestLabel_Abn_total ], axis=0)

# Train the SVM model
svmModel_total = svm.SVC(kernel = 'rbf')
svmModel_total.fit(TrainData_total , TrainLabel_total)

# Check test result
Predicted_total = np.array(svmModel_total.predict(TestData_total))
svmscore_total = metrics.accuracy_score(Predicted_total, TestLabel_total)
print("Accuracy of SVM model trained by all(270) features:\n%.2f%%"%(svmscore_total*100))