#1. Load collected dataset
The dataset is stored in Google drive in a folder called p25-dataset and is loaded into this collab notebook

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import os
from google.colab import drive

from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_curve, auc
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model

In [None]:

drive.mount('/gdrive/')
!ls /gdrive
file_path = '/gdrive/My Drive/Colab Notebooks/p25-dataset'


Mounted at /gdrive/
MyDrive


In [None]:
my_data_path = os.path.join(file_path, "p25-dataset")

os.chdir(file_path)


The folder p25-dataset contains the sensor data for each activity. The activities include:
1. Dancing
2. Walking
3. Ascending stairs
4. Descending stairs
5. Sitting
6. Jumping
7. Standing

We tried to keep the activities as diverse as possible so that the model can be trained with different types of activities (for example sitting and dancing). 

In [None]:

my_data_fpaths = [a for a in os.listdir('/gdrive/My Drive/Colab Notebooks/p25-dataset') if a.endswith('.csv')]
my_data_fpaths

['p25-dancing.csv',
 'p25-walking.csv',
 'p25-ascending.csv',
 'p25-sitting.csv',
 'p25-jumping.csv',
 'p25-descending.csv',
 'p25-standing.csv']

In [None]:
import glob
import pandas as pd

In [None]:
class2fname = {}
fname2class = {}

In [None]:
for i, each in enumerate(my_data_fpaths):
  classname = each.split(".")[0]
  fname2class[classname] = i
  class2fname[i] = classname

In [None]:
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

Combine all csv into one CSV file called combined_csv

In [None]:
#combine all files in the list
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
#export to csv
combined_csv.to_csv( "combined_csv2.csv", index=False, encoding='utf-8-sig')

# 2. Transpose the data

Transpose the columns to rows and rows to columns. There should be [6 rows x 171358 columns].



In [None]:
dataframe = pd.read_csv("combined_csv2.csv", header=None)
raw_data = dataframe.values
dataframe.head()

Unnamed: 0,0,1,2,3,4,5
0,aX,aY,aZ,gX,gY,gZ
1,-0.82,-0.54,0.03,-1.34,-2.69,-2.69
2,-0.82,-0.54,0.03,-1.59,-2.69,-2.69
3,-0.82,-0.54,0.02,-1.53,-2.5,-2.75
4,-0.82,-0.53,0.02,-1.46,-2.32,-2.93


In [None]:
df_tr = dataframe.transpose()
print(df_tr)

  0      1      2      3      4      5      6      7      8      9      ...  \
0    aX  -0.82  -0.82  -0.82  -0.82  -0.82  -0.82  -0.82  -0.83  -0.83  ...   
1    aY  -0.54  -0.54  -0.54  -0.53  -0.53  -0.53  -0.53  -0.53  -0.53  ...   
2    aZ   0.03   0.03   0.02   0.02   0.02   0.02   0.02   0.03   0.03  ...   
3    gX  -1.34  -1.59  -1.53  -1.46  -1.34  -1.04  -0.85  -0.92  -1.16  ...   
4    gY  -2.69  -2.69   -2.5  -2.32  -2.32  -2.26   -2.2  -2.32   -2.5  ...   
5    gZ  -2.69  -2.69  -2.75  -2.93  -2.99  -3.11  -2.99  -3.17  -3.23  ...   

    99951   99952   99953   99954   99955   99956   99957   99958   99959  \
0   -0.67   -0.67   -0.67   -0.68   -0.68   -0.67   -0.68   -0.68   -0.68   
1   -0.69   -0.68   -0.68   -0.68   -0.68   -0.68   -0.68   -0.68   -0.68   
2    0.21    0.21    0.22    0.22    0.22    0.22    0.22    0.24    0.25   
3  -11.72  -12.27  -12.51  -12.63   -13.0  -12.33   -11.6   -10.8  -11.23   
4    8.67     9.4   10.19   10.93   11.96   12.15   11.96   1

# 3. Convert Dataframe to Array

Convert the dataframe into an array for segmenting the data before feature extraction.

In [None]:
arr = df_tr.to_numpy()
print(arr)
print(arr.shape)

[['aX' '-0.82' '-0.82' ... '-0.68' '-0.68' '-0.68']
 ['aY' '-0.54' '-0.54' ... '-0.68' '-0.68' '-0.69']
 ['aZ' '0.03' '0.03' ... '0.24' '0.25' '0.27']
 ['gX' '-1.34' '-1.59' ... '-10.8' '-11.23' '-12.27']
 ['gY' '-2.69' '-2.69' ... '11.35' '11.11' '11.11']
 ['gZ' '-2.69' '-2.69' ... '-11.23' '-11.23' '-11.05']]
(6, 99961)


# 4. Array segmentation 

Array segmented, so each row should be divided into segments of 238

Here we remove the first column or `arr`, since those labels are not numerical and hold the headers for axis/sensor which are not sensor data.

In [None]:
b = np.delete(arr, 0, axis=1)
print("b = ", "\n", b)
print("\n", "----------------------------", "\n", "dimensions of b: ")

b.shape

b =  
 [['-0.82' '-0.82' '-0.82' ... '-0.68' '-0.68' '-0.68']
 ['-0.54' '-0.54' '-0.54' ... '-0.68' '-0.68' '-0.69']
 ['0.03' '0.03' '0.02' ... '0.24' '0.25' '0.27']
 ['-1.34' '-1.59' '-1.53' ... '-10.8' '-11.23' '-12.27']
 ['-2.69' '-2.69' '-2.5' ... '11.35' '11.11' '11.11']
 ['-2.69' '-2.69' '-2.75' ... '-11.23' '-11.23' '-11.05']]

 ---------------------------- 
 dimensions of b: 


(6, 99960)

Next, each row of array `b`, is saved into seperate variables.

In [None]:
acc_x = b[0]
acc_y = b[1]
acc_z = b[2]
gyr_x = b[3]
gyr_y = b[4]
gyr_z = b[5]

In [None]:
acc_x.shape

(99960,)

Use `reshape` to segment the data 238, and use `dstak` to concatenate the data of all 6 axis

In [None]:
Acc_X = acc_x.reshape(420, 238)
Acc_Y = acc_y.reshape(420, 238)
Acc_Z = acc_z.reshape(420, 238)
Gyr_X = gyr_x.reshape(420, 238)
Gyr_Y = gyr_y.reshape(420, 238)
Gyr_Z = gyr_z.reshape(420, 238)

segmented=np.dstack((Acc_X,Acc_Y,Acc_Z,Gyr_X,Gyr_Y,Gyr_Z))
print(type(segmented[0][0][0]))

<class 'str'>


Since segmented has string type data, it is converted to float so that we can use to for feature extraction

In [None]:
floatArray =segmented.astype(float)

In [None]:
floatArray[0][0][0]

-0.82

In [None]:
print(type(floatArray[0][0][0]))

<class 'numpy.float64'>


# 5. Label data 

1. Create a vector. 
2. First 60 elements would be the labels for activity 1, second 60 activity 2 ... last 60 would be for the last one.
3. rotate the vector and dump it alongside the 3d array

In [None]:
import pickle

In [None]:
from numpy.ma.core import transpose
# Create the vector:
# 1- first 60 elements would be the labels for activity 1, second 60 activity 2 ... last 60 would be for the last one.
# 2- rotate the vector and dump it alongside your 3d array
ans = []
for i in range(0,7):
  ans.append( np.full(60,i) )

ans = np.array(ans).flatten()
#ans1 = np.transpose(ans)
#ans2=np.reshape(ans1,(720,1))

ans3=np.array(ans,dtype=int)
ans3.shape


(420,)

In [None]:
# store the 3d-array into the pkl file

file_name = "/gdrive/My Drive/Colab Notebooks/"+ "segmentdata.pkl"
file = open(file_name, "wb")
pickle.dump((floatArray,ans3), file) # ---> this will be your array
file.close()

# 6. Feature extraction

Data stored in the pkl file

In [None]:
data_folder="/gdrive/My Drive/Colab Notebooks/final/"
os.listdir(data_folder)

['activity_acc_gyr.pkl']

In [None]:
def get_features(sensor_segment):
    """
    Here, the sensor segment is one axis of a sensor.
    """
    features = np.zeros(9) # we extract nine features
    
    # the first feature is the amplitude
    features[0] = np.max(sensor_segment) - np.mean(sensor_segment)
    
    # the second feature is the median
    features[1] = np.median(sensor_segment)
    
    # the third feature is the mean
    features[2] = np.mean(sensor_segment)
    
    # the fourth feature is the max value
    features[3] = np.max(sensor_segment)
    
    # the fifth feature is the min value
    features[4] = np.min(sensor_segment)
    
    # the sixth feature is the peak to peak value
    features[5] = features[3] - features[4]
    
    # the seventh feature is the standard deviation
    features[6] = np.std(sensor_segment)
    
    # the eighth feature is the RMS value
    features[7] = np.sqrt(np.mean(sensor_segment**2))
    
    # the ninth feature is the stand to end value
    features[8] = sensor_segment[0] - sensor_segment[1]
    #features[0] = sensor_segment[0]
    
    return features

In [None]:
def get_sensor_segments(file_path):
    file = open(file_path, "rb")
    X, Y_ = pickle.load(file)
    file.close()
    return X, Y_

In [None]:
def get_feature_names(sensor_type):
    if sensor_type not in ["acc", "gyro", "mag"]:
        raise ValueError("Sensor type not supported.")
        
    feature_names = []
    axis_name = "Z"
    features = ["AMP", "MED", "MEAN", "MAX", "MIN", "P2P", "STD", "RMF", "S2E"]
    
    if sensor_type == "acc":
        sensor_name = "ACC"
    elif sensor_type == "gyro":
        sensor_name = "GYRO"
    
    for axis in ["Acc_X", "Acc_Y", "Acc_Z","Gyr_X", "Gyr_Y", "Gyr_Z"]:
        for f in features:
            feature_names.append(axis+"-"+f)
    
    return feature_names

In [None]:
def extract_features(file_path):
    X, Y = get_sensor_segments(file_path)
    #signal.resample_poly(X,1,40)
    #X=X[:, ::2, :]
    #X=X[:, :100, :3]
    print(X.shape)
    
    
    # the array to store the extracted features
    final_features = []
    
    for segment in X:
        # compute the features for each axis
        temp_features = []
        for i in range(6): 
            # get the axis segment
            seg_axis = segment[:, i]
            
            # compute features
            seg_features = get_features(seg_axis)
            
            # store the features
            temp_features.extend(seg_features)
        
        # store the features in the final array
        final_features.append(temp_features)
        
    return final_features, Y

In [None]:
def extract_features_and_get_df(path, sensor_type):
    feature, labels = extract_features(path)
    data_df = pd.DataFrame(feature)
    data_df['Label'] = labels
    
    return data_df

In [None]:
for item in os.listdir(data_folder):
    sensor_node_name = item[:item.rfind(".")]
    sensor_name = sensor_node_name[sensor_node_name.rfind("_")+1:]
    sensor_segment_path = data_folder + item
    save_path = "../feature_extracted_data/" + sensor_node_name +"_features.pkl"
    
    print("Processing {} ..".format(sensor_node_name))

    # get the pandas dataframe
    data_df = extract_features_and_get_df(sensor_segment_path, sensor_name)   
    print(data_df.shape)
   
  # save the dataframe
  #  file = open(save_path, "wb")
  #  pickle.dump(data_df, file)
  #  file.close()    

Processing activity_acc_gyr ..
(420, 238, 6)
(420, 55)


In [None]:
from sklearn.model_selection import train_test_split
Yt = data_df['Label'].values
Xt = data_df.drop(['Label'], axis = 1)
n_total_featues = Xt.shape[1]
column_names = Xt.columns
k_columns = pd.DataFrame(Xt.columns)
n_total_featues, column_names

(54,
 Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53],
       dtype='object'))

Extracted features for all the accelerometer and gyroscope data for each activity

In [None]:
Xt.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,53
count,420.0,420.0,420.0,420.0,420.0,420.0,420.0,420.0,420.0,420.0,...,420.0,420.0,420.0,420.0,420.0,420.0,420.0,420.0,420.0,420.0
mean,0.417438,-0.37956,-0.393295,0.024143,-1.126786,1.150929,0.197997,0.522527,0.002262,0.4450525,...,-0.520762,53.428504,-4.560595,-3.306194,50.12231,-47.304833,97.427143,19.177841,20.902654,-0.023857
std,0.363119,0.347367,0.346268,0.370542,0.976433,1.101082,0.176829,0.268012,0.074612,0.3690567,...,7.802796,48.612564,7.553088,7.227032,48.878425,38.814339,81.721932,14.574486,14.363366,4.233732
min,0.0,-0.935,-0.90958,-0.7,-4.0,0.0,0.0,0.09816,-0.51,-5.5511150000000004e-17,...,-54.14,0.239664,-37.02,-35.360462,-3.05,-257.14,0.48,0.100972,2.047442,-18.43
25%,0.02375,-0.67125,-0.679622,-0.19,-1.7675,0.04,0.009373,0.189216,-0.01,0.02426471,...,-1.65,1.910105,-5.5925,-4.54729,-0.8975,-69.52,4.1875,0.605253,3.153563,-1.175
50%,0.366891,-0.5,-0.513739,0.16,-0.83,0.91,0.16894,0.615325,0.0,0.4389496,...,-0.06,52.435903,-3.095,-3.033676,49.045,-47.36,102.17,21.712564,22.5998,0.0
75%,0.676639,-0.01,-0.025861,0.25,-0.46,1.745,0.298981,0.719773,0.02,0.636292,...,1.04,75.776534,-2.5525,-1.689359,72.905,-4.745,141.12,29.420805,30.92049,1.1
max,1.74605,0.32,0.318109,1.33,0.3,5.02,0.807961,1.119914,0.75,1.612521,...,82.7,363.268992,24.23,29.290756,366.64,-3.05,468.93,71.328211,71.870764,26.36


# 7. Feature Selection


In [None]:

np.unique(Yt, return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6]), array([60, 60, 60, 60, 60, 60, 60]))

Selecting four features out of the calculated 54 features using Recursive method

In [None]:
n_features_to_select = 4

In [None]:
# With Recursive Method
from sklearn.feature_selection import RFE
estimator = DecisionTreeClassifier()
selector = RFE(estimator, n_features_to_select=n_features_to_select)

In [None]:
selector.fit(x_train, y_train)

RFE(estimator=DecisionTreeClassifier(), n_features_to_select=4)

In [None]:
print("The selected features with Recursive Method are")
rfe_selected = k_columns.values[selector.support_].reshape(-1)
rfe_selected

The selected features with Recursive Method are


array([2, 6, 7, 19], dtype=object)

#### The selected Features are: 

*  Mean for Acc_X
*  Standard Deviation for Acc_X
*  RMS for Acc_X
*  Median for Acc_Z

  

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

x_train, x_test, y_train, y_test = train_test_split(Xt, Yt,test_size=0.5, random_state=42)

clf0 = DecisionTreeClassifier(max_depth=4)
clf1 = SVC()
clf2 = RandomForestClassifier()
clf4 = KNeighborsClassifier()
clf5 =  LogisticRegression(solver='liblinear', random_state=0)

clf0.fit(x_train[rfe_selected], y_train)
clf1.fit(x_train, y_train)
clf2.fit(x_train, y_train)
clf4.fit(x_train, y_train)
clf5.fit(x_train, y_train)
print('DT  {}'.format(clf0.score(x_test[rfe_selected], y_test)))
print('SVM {}'.format(clf1.score(x_test, y_test)))
print('RF  {}'.format(clf2.score(x_test, y_test)))
print('KNN {}'.format(clf4.score(x_test, y_test)))
print('LR  {}'.format(clf5.score(x_test, y_test)))
y_pred = clf2.fit(x_train, y_train).predict(x_test)

y_pred = pd.Series(y_pred)
print(confusion_matrix(y_test, y_pred))

DT  0.7714285714285715
SVM 0.5380952380952381
RF  0.9380952380952381
KNN 0.7428571428571429
LR  0.7904761904761904
[[30  0  0  0  1  1  0]
 [ 2 33  0  1  0  0  1]
 [ 0  2 27  0  1  1  0]
 [ 0  0  0 29  0  0  0]
 [ 1  0  0  0 22  0  0]
 [ 0  0  0  0  1 25  0]
 [ 0  0  0  0  1  0 31]]


The Decision Tree was taken and coded into the C file which was loaded into the Arduino. The 4 features were also calculated in the C file. 

In [None]:
from sklearn import tree
text_representation = tree.export_text(clf0)
print(text_representation)

|--- feature_3 <= 0.79
|   |--- feature_1 <= 0.05
|   |   |--- feature_0 <= -0.46
|   |   |   |--- class: 6
|   |   |--- feature_0 >  -0.46
|   |   |   |--- class: 1
|   |--- feature_1 >  0.05
|   |   |--- feature_2 <= 0.61
|   |   |   |--- feature_2 <= 0.39
|   |   |   |   |--- class: 1
|   |   |   |--- feature_2 >  0.39
|   |   |   |   |--- class: 0
|   |   |--- feature_2 >  0.61
|   |   |   |--- feature_0 <= -0.63
|   |   |   |   |--- class: 5
|   |   |   |--- feature_0 >  -0.63
|   |   |   |   |--- class: 4
|--- feature_3 >  0.79
|   |--- class: 3



# 8. One Class Classification

In [None]:
dft = data_df
p1 = dft[data_df.Label == 2]
p1 = p1[1:201]
p1.shape

(59, 55)

In [None]:
y1 = p1['Label'].values
x1 = p1.drop(['Label'], axis = 1)
x1.shape

(59, 54)

In [None]:
Y = dft['Label'].values
X = dft.drop(['Label'], axis = 1)
X.shape

(420, 54)

In [None]:
Yt = dftt['Label'].values
Xt = dftt.drop(['Label'], axis = 1)
Xt.shape

(240, 54)

In [None]:
dftt = data_df[data_df.Label >= 3]

In [None]:
X_values = X.values
n_total_featues = X.shape[1]
column_names = X.columns
k_columns = pd.DataFrame(X.columns)
n_total_featues, column_names

(54,
 Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53],
       dtype='object'))

In [None]:
x1_values = x1.values
n_total_featues = x1.shape[1]
column_names = x1.columns
k_columns = pd.DataFrame(x1.columns)
n_total_featues, column_names

(54,
 Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53],
       dtype='object'))

In [None]:
np.unique(Y, return_counts=True)
np.unique(y1, return_counts=True)

(array([2]), array([59]))

In [None]:
def get_x_y_for_level(X_total, Y_total, level_labels):
    X = pd.DataFrame(columns=column_names)
    Y = []
    
    for p in level_labels:
        index = np.where(Y_total == p)
        
        x = X_total[index]
        y = Y_total[index]
        x_df = pd.DataFrame(x, columns=column_names)

        X = pd.concat([X, x_df], axis=0)
        Y.extend(y)
        
    print(X.shape, len(Y))
    X = X.reset_index(drop = True)
    return X, Y

In [None]:
test_labels = [2]
train_labels = [0,1,2,3,4]

## Get Seen and Unseen Data

In [None]:
#Get the unseen data
x_tst, y_tst= get_x_y_for_level (X_values, Y, train_labels)
print(len(x_tst))
#Get the seen data
x_n, y_n= get_x_y_for_level (X_values, Y, test_labels)
#Label "0" for the seen data
y_train = np.zeros_like(y1)
#Merge the unseen data with a part of the seen data
x_test = pd.concat([x_tst, x_n], axis = 0).reset_index(drop=True)
print(len(x_test))
print(len(x1))
#Label "1" for the unseen data
y_test = np.ones_like(y_tst)
#Merge the unseen data with a part of the seen data with the right labels
y_test = np.concatenate([y_test, np.zeros_like(y_n)])

(300, 54) 300
300
(60, 54) 60
360
59


In [None]:
from sklearn.feature_selection import RFE
from sklearn.svm import LinearSVC, SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import OneClassSVM
from sklearn import linear_model
from sklearn.model_selection import LeaveOneOut
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neural_network import MLPClassifier
from scipy import signal
from sklearn.utils import resample
from sklearn.metrics import confusion_matrix
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import export_graphviz
from IPython.display import Image  
import pydotplus
from sklearn import tree
from pydot import graph_from_dot_data
from sklearn.svm import OneClassSVM
from sklearn.decomposition import PCA
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import Perceptron
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
import random
from sklearn.metrics import f1_score
from sklearn.svm import OneClassSVM
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
from sklearn.neighbors import LocalOutlierFactor
from numpy import vstack
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import LeavePOut
from sklearn.model_selection import LeavePGroupsOut
from itertools import combinations
from sklearn.metrics import accuracy_score
from google.colab import drive

The unseen data is denoted by -1 in the confusion matrix. As seen in the confusion matrix, the recall for unseen data is 85% for this dataset and the precision is 94%. 

In [None]:
#https://scikit-learn.org/stable/modules/generated/sklearn.svm.OneClassSVM.html
model = OneClassSVM(kernel= 'rbf',gamma=0.5, nu=0.2)

#https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html#sklearn.ensemble.IsolationForest
# model1 = IsolationForest(n_estimators=100 ,contamination='auto',max_features=81)

# train the models using a portion of the seen data
model.fit(x1)
# model1.fit(x1)

# detect outliers in the test set
yhat = model.predict(x_test)
# yhat1 = model1.predict(x_test)


# mark inliers 1, outliers -1
y_test[y_test == 1] = -1
y_test[y_test == 0] = 1
# calculate score
print(classification_report(y_test, yhat))
# print(classification_report(y_test, yhat1))
#score = f1_score(y_test, yhat, pos_label=1)
#print('F1 Score: %.3f' % score)
yhat = pd.Series(yhat)
y_test = pd.Series(y_test)
#yhat.to_csv("test123.csv")
#print(accuracy_score(y_test, yhat, normalize=True))
#y_test.to_csv("test124.csv")
cmat = confusion_matrix(y_test, yhat) 
# cmat1 = confusion_matrix(y_test, yhat1) 

print(cmat) 

              precision    recall  f1-score   support

          -1       0.94      0.85      0.90       300
           1       0.50      0.73      0.59        60

    accuracy                           0.83       360
   macro avg       0.72      0.79      0.74       360
weighted avg       0.87      0.83      0.85       360

[[256  44]
 [ 16  44]]
