In [16]:
import pandas as pd
import numpy as np
import scipy.sparse as sp
import scipy as sc
from scipy.sparse.linalg import gmres 
import timeit
from sklearn import metrics  as m
from sklearn.decomposition import TruncatedSVD
from sklearn.neighbors import NearestNeighbors
import plotly.graph_objects as go
import plotly 
from plotly.subplots import make_subplots 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression 
from sklearn.semi_supervised import LabelSpreading, LabelPropagation
from sklearn.neighbors import KNeighborsClassifier 

# 0. Processing DC motor dataset

In [17]:
'''
one_w_center - 0
one_w_border - 1
two_w_center - 2
two_w_border - 3
two_w_diag - 4
blank  - 5
'''

'\none_w_center - 0\none_w_border - 1\ntwo_w_center - 2\ntwo_w_border - 3\ntwo_w_diag - 4\nblank  - 5\n'

In [18]:
data_class = ['twb','twd', 'twc', 'owb', 'owc', 'blank'] 

In [19]:
list_sensors = dict()
data_folder = 'data/620rpm'
for t in data_class:
    one_wbAccelerometer = pd.read_csv(data_folder + '/'+t+'/'+t+'Accelerometer.csv' , )
    one_wbGyroscope = pd.read_csv(data_folder + '/'+t+'/'+t+'Gyroscope.csv',  )
    one_wbMagnetometr = pd.read_csv(data_folder + '/'+t+'/'+t+'Magnetometer.csv',  ) 

    joined_results = one_wbAccelerometer.merge(one_wbMagnetometr[['X (mGa)', 'Y (mGa)', 'Z (mGa)', 'NodeTimestamp']],
                              on='NodeTimestamp', how='left').merge(one_wbGyroscope[['X (dps)', 'Y (dps)', 'Z (dps)', 'NodeTimestamp']],
                              on='NodeTimestamp', how='left')[['X (mGa)', 'Y (mGa)', 'Z (mGa)', 
                                                               'X (dps)', 'Y (dps)', 'Z (dps)',
                                                              'X (mg)', 'Y (mg)', 'Z (mg)',]]
    list_sensors[t] = pd.concat([joined_results]).rename(columns={'X (mGa)': 'X_mGa','Z (mGa)': 'Z_mGa','Y (mGa)': 'Y_mGa',
                                                        'X (dps)':'X_dps','Y (dps)':'Y_dps', 'Z (dps)':'Z_dps',
                                                        'X (mg)': 'X_mg','Y (mg)': 'Y_mg' , 'Z (mg)': 'Z_mg' })

In [20]:
list_sensors['owc']['target'] = np.zeros(list_sensors['owc'].shape[0]) 
list_sensors['owb']['target'] = np.zeros(list_sensors['owb'].shape[0]) +1
list_sensors['twc']['target'] = np.zeros(list_sensors['twc'].shape[0]) +2
list_sensors['twb']['target'] = np.zeros(list_sensors['twb'].shape[0]) + 3
list_sensors['twd']['target'] = np.zeros(list_sensors['twd'].shape[0]) + 4
list_sensors['blank']['target'] = np.zeros(list_sensors['blank'].shape[0]) + 5

In [21]:
df_general = pd.concat([list_sensors['owc'][:1024], list_sensors['owb'][:1024], list_sensors['twc'][:1024], 
                        list_sensors['twb'][:1024], list_sensors['twd'][:1024], list_sensors['blank'][:1024]])

In [22]:
to_export = pd.DataFrame( df_general[['X_mGa', 'Y_mGa', 'Z_mGa',  
                                        'X_dps', 'Y_dps', 'Z_dps', 
                                        'X_mg', 'Y_mg', 'Z_mg','target']])

In [23]:
Xmga = to_export[['X_mGa','Y_mGa', 'Z_mGa']].values
Xmg = to_export[['X_mg','Y_mg', 'Z_mg']].values
Xdps = to_export[['X_dps','Y_dps', 'Z_dps']].values
Y_ = to_export['target'].values

# 1. Transforming data into graph structure

In [24]:
def generate_adj(X_array, nnodes, metric='minkowski', n_neighbours=25):
    Af = np.zeros((nnodes, nnodes))
    for feature in X_array:
        nbrs = NearestNeighbors(n_neighbors=n_neighbours, metric=metric ).fit(feature)
        distances, indices = nbrs.kneighbors(feature)
        for i in range(nnodes):
            Af[i, indices[i]] = 1
            Af[indices[i], i ] = 1
    return Af 

In [25]:
def calc_A_hat(adj_matrix: sp.spmatrix, delta, sigma, MMx) -> sp.spmatrix:
    nnodes = adj_matrix.shape[0]
    A = adj_matrix + sp.eye(nnodes)#Ω#@ D_invsqrt_corr
    D_vec = np.sum(A, axis=1).A1 
    lsigma = sigma - 1
    rsigma = - sigma
    wsigma = -2*sigma + 1
    
    D_l = sp.diags(np.power(D_vec, lsigma)) 
    D_r= sp.diags(np.power(D_vec, rsigma ) )
    Dw = sp.diags(np.power(D_vec, wsigma ) )
    S_ = MMx@ Dw 
    
    return S_ , D_l@A@D_r  - delta* S_

# 2. optimization parts for PRPCA algorithm

In [26]:
def IP(A, Z, Y, iter_,  alpha):
    A = np.copy(A)
    Z = np.copy(Z)
    Y = np.copy(Y) 
    start = timeit.default_timer()
    for _ in range(iter_):
        Z =  alpha * AHAT@Z   + (1-alpha) * Y
        Z = normalize(Z,'l1')
    print('time(s):', timeit.default_timer() - start)
    return Z

In [27]:
def GMRES(A, Y, alpha, k, tol):
    A = np.copy(A)
    Y = np.copy(Y) 
    predicts = []
    for j in range(k): 
        temp_ = gmres(A, (1-alpha)*Y[:,j], tol=tol)[0] 
        predicts.append([temp_])
    return np.concatenate(predicts).T 

# 3. Computation of PRPCA, PaSVM and PaLR

In [29]:
from sklearn.model_selection import train_test_split
X_list = [[Xmga,Xdps], [Xmga,Xdps, Xmg], [Xmga], [Xmg],  [Xdps] ]
names = ['mgadps', 'all', 'mga', 'mg', 'dps']
sets_ = [['X_mGa','Y_mGa', 'Z_mGa', 'X_dps','Y_dps', 'Z_dps'], 
         ['X_mg','Y_mg', 'Z_mg', 'X_mGa','Y_mGa', 'Z_mGa',  'X_dps','Y_dps', 'Z_dps'],
         ['X_mGa','Y_mGa', 'Z_mGa'],
         ['X_mg','Y_mg', 'Z_mg'],
         ['X_dps','Y_dps', 'Z_dps']]
         
seed = 0
sigma = 1
nl=20
alpha= 0.9  
iter_ = 10
tol = 1e-03
delta = 1e-03
beta = 0.9 
Z_collect = []
nnodes = Xdps.shape[0]
for ind_, X in enumerate(X_list):
    mean_acc_prpca = []
    mean_acc_svm = []
    mean_acc_lr = []
    for seed in np.arange(1): 
        Xall = np.concatenate(X, axis=1) 
        MMx = np.zeros((nnodes, nnodes))
        nnodes = Xall.shape[0] 
        w1 = 1/len(X) 
        # covariance for each sensor space
        for x in X:
            Xn = np.copy(x) 
            Xn = Xn - np.median(Xn, axis=0)
            S =  np.dot(Xn, Xn.T) / (Xn.shape[0] - 1 )
            MMx +=  w1*S 
        # adjacency matrix for each sensor space
        Af = generate_adj(X, nnodes, n_neighbours=25, metric='euclidean' )
        mmc, AHAT = calc_A_hat(Af, delta, sigma, MMx ) 
        rex = (np.identity(nnodes)  - alpha * AHAT )
        # random split dataset for training
        rs = np.random.RandomState(seed=seed) 
        ind0lab = rs.choice(np.where(Y_ == 0 )[0], nl, replace=False)
        ind1lab = rs.choice(np.where(Y_ == 1 )[0], nl, replace=False)
        ind2lab = rs.choice(np.where(Y_ == 2 )[0], nl, replace=False)
        ind3lab = rs.choice(np.where(Y_ == 3 )[0], nl, replace=False)
        ind4lab = rs.choice(np.where(Y_ == 4 )[0], nl, replace=False) 
        ind5lab = rs.choice(np.where(Y_ == 5 )[0], nl, replace=False) 
        all_lab = np.concatenate([ ind0lab, ind1lab, ind2lab, ind3lab,  ind4lab, ind5lab]) 
        y_train = np.zeros((Xmga.shape[0], 6))#dfp.target.nunique()
        for i in all_lab:
            y_train[i, int(Y_[i])] =  1  
        # PPRPCA training
        Z = GMRES(A=rex, Y=y_train, alpha=alpha, k=y_train.shape[1], tol=tol)
        df_X_mgadps = to_export[sets_[ind_]]
  
        # PPRPCA stacking 
        df_X_mgadps['c0']= Z[:,0]
        df_X_mgadps['c1']= Z[:,1]
        df_X_mgadps['c2']= Z[:,2]
        df_X_mgadps['c3']= Z[:,3]
        df_X_mgadps['c4']= Z[:,4]
        df_X_mgadps['c5']= Z[:,5] 
        df_X_mgadps['target'] = Y_
        df_X_mgadps['prpca'] = np.argmax(np.array(Z), axis=1) 
        # keep the data for ZGP
        df_X_mgadps.to_csv('data/620_prpca_'+names[ind_]+'.csv',index=False)
        # training of PaSVM and PaLR
        X_glob = np.concatenate([Xall, Z],axis=-1 )
        Z_glob = np.argmax(Z , axis=-1)
        mean_acc_prpca.append(m.accuracy_score(Y_  , np.argmax(np.array(Z), axis=1)   ))  
        # PRPCA self-labelling
        X_train, X_test, z_train, z_test = train_test_split(X_glob, Z_glob, test_size=0.3, random_state=1) 
        X_train, X_test, y_train, y_test = train_test_split(X_glob, Y_, test_size=0.3, random_state=1) 
        # PaSVM
        svc = SVC(gamma='auto')
        svc.fit( X_train, z_train)
        scv_predict = svc.predict( X_test)
        mean_acc_svm.append(m.accuracy_score(y_test, scv_predict)) 
        # PaLR
        clf = LogisticRegression(random_state=0) 
        clf.fit(X_train, z_train)
        scv_predict = clf.predict(X_test)
        mean_acc_lr.append(m.accuracy_score(y_test, scv_predict))
    print(names[ind_], 'prlg', np.mean(mean_acc_lr)) 
    print(names[ind_], 'prsvc', np.mean(mean_acc_svm)) 
    print(names[ind_], 'prpca', np.mean(mean_acc_prpca)) 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



mgadps prlg 0.19796954314720813
mgadps prsvc 0.6441060349689791
mgadps prpca 0.6516587677725119



lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression



all prlg 0.16412859560067683
all prsvc 0.4703891708967851
all prpca 0.6093432633716994




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

mga prlg 0.15284827975183304
mga prsvc 0.15284827975183304
mga prpca 0.17603249830737983




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

mg prlg 0.19458544839255498
mg prsvc 0.3998871968415116
mg prpca 0.4226472579553148




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

dps prlg 0.3530738860688099
dps prsvc 0.6734348561759729
dps prpca 0.6946513202437373


# 4. Computation of LP, KNN, SVM and LR 

In [14]:
nl = 20
nnodes = Xdps.shape[0]
rs = np.random.RandomState(seed=0) 
ind0lab = rs.choice(np.where(Y_ == 0 )[0], nl, replace=False)
ind1lab = rs.choice(np.where(Y_ == 1 )[0], nl, replace=False)
ind2lab = rs.choice(np.where(Y_ == 2 )[0], nl, replace=False)
ind3lab = rs.choice(np.where(Y_ == 3 )[0], nl, replace=False)
ind4lab = rs.choice(np.where(Y_ == 4 )[0], nl, replace=False) 
ind5lab = rs.choice(np.where(Y_ == 5 )[0], nl, replace=False) 
all_lab = np.concatenate([ ind0lab, ind1lab, ind2lab, ind3lab,  ind4lab, ind5lab]) 
y_train = np.zeros((Xdps.shape[0], 6))#dfp.target.nunique()
for i in all_lab:
    y_train[i, int(Y_[i])] =  1 
    
neigh = KNeighborsClassifier(n_neighbors=25, metric='euclidean')
neigh.fit(Xdps[all_lab], Y_[all_lab])
yknn_predict = neigh.predict(Xdps)
print('knn', m.accuracy_score(Y_, yknn_predict))
y_lp = np.ones(nnodes)*-1
y_lp[all_lab] = Y_[all_lab]
 
lp = LabelPropagation(kernel='rbf', gamma=2, n_neighbors=25)
lp.fit(Xdps, y_lp)
yLp_predict = lp.predict(Xdps)
print('lp',m.accuracy_score(Y_, yLp_predict) )
 
clf = LogisticRegression(random_state=0) 
clf.fit(Xdps[all_lab], Y_[all_lab])
scv_predict = clf.predict(Xdps)
print('lg',m.accuracy_score(Y_, scv_predict) )
 
svc = SVC(gamma='auto')
svc.fit(Xdps[all_lab], Y_[all_lab])
scv_predict = svc.predict(Xdps)
print('svm',m.accuracy_score(Y_, scv_predict) )

knn 0.2860528097494922
lp 0.31635071090047395
lg 0.2975626269465132
svm 0.6411645226811103
