# IMPORT LIBRARIES AND FUNCTIONS

In [None]:
def sammon(x, n, display = 0, inputdist = 'raw', maxhalves = 20, maxiter = 1, tolfun = 1e-9, init = 'default'):

    import numpy as np 
    from scipy.spatial.distance import cdist

    """Perform Sammon mapping on dataset x
    y = sammon(x) applies the Sammon nonlinear mapping procedure on
    multivariate data x, where each row represents a pattern and each column
    represents a feature.  On completion, y contains the corresponding
    co-ordinates of each point on the map.  By default, a two-dimensional
    map is created.  Note if x contains any duplicated rows, SAMMON will
    fail (ungracefully). 
    [y,E] = sammon(x) also returns the value of the cost function in E (i.e.
    the ess of the mapping).
    An N-dimensional output map is generated by y = sammon(x,n) .
    A set of optimisation options can be specified using optional
    arguments, y = sammon(x,n,[OPTS]):
       maxiter        - maximum number of iterations
       tolfun         - relative tolerance on objective function
       maxhalves      - maximum number of step halvings
       input          - {'raw','distance'} if set to 'distance', X is 
                        interpreted as a matrix of pairwise distances.
       display        - 0 to 2. 0 least verbose, 2 max verbose.
       init           - {'pca', 'cmdscale', random', 'default'}
                        default is 'pca' if input is 'raw', 
                        'msdcale' if input is 'distance'
    The default options are retrieved by calling sammon(x) with no
    parameters.
    File        : sammon.py
    Date        : 18 April 2014
    Authors     : Tom J. Pollard (tom.pollard.11@ucl.ac.uk)
                : Ported from MATLAB implementation by 
                  Gavin C. Cawley and Nicola L. C. Talbot
    Description : Simple python implementation of Sammon's non-linear
                  mapping algorithm [1].
    References  : [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data
                  Structure Analysis", IEEE Transactions on Computers,
                  vol. C-18, no. 5, pp 401-409, May 1969.
    Copyright   : (c) Dr Gavin C. Cawley, November 2007.
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
    """

    # Create distance matrix unless given by parameters
    if inputdist == 'distance':
        D = x
        if init == 'default':
            init = 'cmdscale'
    else:
        D = cdist(x, x)
        if init == 'default':
            init = 'pca'

    if inputdist == 'distance' and init == 'pca':
        raise ValueError("Cannot use init == 'pca' when inputdist == 'distance'")

    if np.count_nonzero(np.diagonal(D)) > 0:
        raise ValueError("The diagonal of the dissimilarity matrix must be zero")

    # Remaining initialisation
    N = x.shape[0]
    scale = 0.5 / D.sum()
    D = D + np.eye(N)     

    if np.count_nonzero(D<=0) > 0:
        raise ValueError("Off-diagonal dissimilarities must be strictly positive")   

    Dinv = 1 / D
    if init == 'pca':
        [UU,DD,_] = np.linalg.svd(x)
        y = UU[:,:n]*DD[:n] 
    elif init == 'cmdscale':
        from cmdscale import cmdscale
        y,e = cmdscale(D)
        y = y[:,:n]
    else:
        y = np.random.normal(0.0,1.0,[N,n])
    one = np.ones([N,n])
    d = cdist(y,y) + np.eye(N)
    dinv = 1. / d
    delta = D-d 
    E = ((delta**2)*Dinv).sum() 

    # Get on with it
    for i in range(maxiter):
        # Compute gradient, Hessian and search direction (note it is actually
        # 1/4 of the gradient and Hessian, but the step size is just the ratio
        # of the gradient and the diagonal of the Hessian so it doesn't
        # matter).
        delta = dinv - Dinv
        deltaone = np.dot(delta,one)
        g = np.dot(delta,y) - (y * deltaone)
        dinv3 = dinv ** 3
        y2 = y ** 2
        H = np.dot(dinv3,y2) - deltaone - np.dot(2,y) * np.dot(dinv3,y) + y2 * np.dot(dinv3,one)
        s = -g.flatten(order='F') / np.abs(H.flatten(order='F'))
        y_old    = y

        # Use step-halving procedure to ensure progress is made
        for j in range(maxhalves):
            s_reshape = np.reshape(s, (-1,n),order='F')
            y = y_old + s_reshape
            d = cdist(y, y) + np.eye(N)
            dinv = 1 / d
            delta = D - d
            E_new = ((delta**2)*Dinv).sum()
            if E_new < E:
                break
            else:
                s = 0.5*s

        # Bomb out if too many halving steps are required
        if j == maxhalves-1:
            print('Warning: maxhalves exceeded. Sammon mapping may not converge...')

        # Evaluate termination criterion
        if abs((E - E_new) / E) < tolfun:
            if display:
                print('TolFun exceeded: Optimisation terminated')
            break

        # Report progress
        E = E_new
        if display > 1:
            print('epoch = %d : E = %12.10f'% (i+1, E * scale))

    if i == maxiter-1:
        print('Warning: maxiter exceeded. Sammon mapping may not have converged...')

    # Fiddle stress to match the original Sammon paper
    E = E * scale
    
    return [y,E]

In [None]:
%matplotlib inline
import math
import numpy as np
import pandas as pd 
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import random 
import time
from collections import defaultdict
from scipy.stats.stats import pearsonr
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, classification_report, roc_curve, auc, plot_confusion_matrix, roc_auc_score

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

from sklearn.cluster import DBSCAN
from PyNomaly import loop

from sklearn.covariance import EllipticEnvelope

from sklearn.neighbors import LocalOutlierFactor

# evaluate model performance with outliers removed using isolation forest
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import IsolationForest
from sklearn.metrics import mean_absolute_error

from pyod.models.knn import KNN   # kNN detector
import pyod
from pyod.utils.data import generate_data
from pyod.utils.utility import score_to_label

In [None]:
import warnings
warnings.simplefilter("ignore", UserWarning)
warnings.filterwarnings("ignore")

# IMPORT DATA

In [None]:
dataname = 'cardio.csv'

data = pd.read_csv(dataname, delimiter=',') #load data
data.drop('Unnamed: 0', inplace=True, axis=1)
#data.drop('outlier', inplace=True, axis=1)
data.head()

In [None]:
target = data['class']
data.drop('class', inplace=True, axis=1) 

#supervised?
#data.drop('outlier', inplace=True, axis=1)

#FROID-LIGHT

In [None]:
scores_df = pd.DataFrame()
binary_scores_df = pd.DataFrame()
df_dimredu = pd.DataFrame()
df_dataredu = pd.DataFrame()
df_binredu = pd.DataFrame()

from pyod.models.mcd import MCD
from pyod.models.suod import SUOD
from pyod.models.loda import LODA
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.cblof import CBLOF
from pyod.models.ocsvm import OCSVM
from pyod.models.cof import COF
from pyod.models.copod import COPOD
from pyod.models.ecod import ECOD
from pyod.models.sos import SOS
from pyod.models.pca import PCA
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn import random_projection
from sklearn.manifold import Isomap
from keras.models import Model
from keras.layers import Input, Dense
from keras import regularizers
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn.decomposition import KernelPCA
import pandas as pd
from sklearn.manifold import TSNE
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import manifold
from functools import partial
from collections import OrderedDict
from time import time
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import NullFormatter

def froid_light(data):

  try:
    print('EllipticEnvelope')
    ee = EllipticEnvelope()
    score = ee.fit_predict(data)
    dec_func = ee.decision_function(data)
    maha = ee.mahalanobis(data)
    pred = ee.predict(data)
    scores_df['elliptic_decfunc_original_cont0.001'] = dec_func
    scores_df['elliptic_mahalanobis_original_cont0.001'] = maha
    binary_scores_df['elliptic_env_original_binary_cont0.001'] = pred
    binary_scores_df['elliptic_env_original_binary_cont0.001'] = np.where(binary_scores_df['elliptic_env_original_binary_cont0.001']==-1, 1,0)
  except:
    print('Exception Raised --> Elliptic Envelope Config 1')

  try:
    print('IsolationForest')
    #isolation forest
    iso = IsolationForest(n_jobs=-1)
    #returns -1 for outliers and +1 for inliers
    pred = iso.fit_predict(data)
    #using decision function i can obtain outlierness for each observation
    sklearn_score_anomalies = iso.decision_function(data)
    original_paper_score = [-1*s + 0.5 for s in sklearn_score_anomalies]
    scores_df['iso_forest_paper_score_original_cont0.001'] = original_paper_score
    binary_scores_df['iso_forest_paper_score_original_binary_cont0.001'] = pred
    binary_scores_df['iso_forest_paper_score_original_binary_cont0.001'] = np.where(binary_scores_df['iso_forest_paper_score_original_binary_cont0.001']==-1, 1,0)
  except:
    print('Exception Raised --> Iso Forest Config 1')

  try:
    print('LOF')
    #Local Outlier Factor
    lof = LocalOutlierFactor(novelty=True, metric='minkowski',n_jobs=-1)
    lof.fit(data)
    lof_score = lof.decision_function(data)
    pred = lof.predict(data)
    scores_df['lof_score_original_mink_nei10'] = lof_score
    binary_scores_df['lof_score_original_binary_mink_nei10'] = pred
    binary_scores_df['lof_score_original_binary_mink_nei10'] = np.where(binary_scores_df['lof_score_original_binary_mink_nei10']==-1, 1,0)
  except:
    print('Exception Raised --> LOF Config 1')

  try:
    print('COPOD')
    clf = COPOD(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_copod = clf.decision_scores_  # raw outlier scores on the train data
    scores_df['pyod_copod_original_cont0.001'] = pyod_copod
    binary_scores_df['pyod_copod_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> COPOD Config 1')

  try:
    print('ECOD')
    clf = ECOD(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_ecod = clf.decision_scores_
    scores_df['pyod_ecod_original_cont0.001'] = pyod_ecod
    binary_scores_df['pyod_ecod_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> ECOD Config 1')

  try:
    print('PCA')
    clf = PCA(n_components=2)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_pca = clf.decision_scores_
    scores_df['pyod_pca_original_cont0.001_whitFalse'] = pyod_pca
    binary_scores_df['pyod_pca_original_binary_cont0.001_whitFalse'] = pred
  except:
    print('Exception Raised -> PCA Config 1 data maybe not converged')

  try:
    print('MCD')
    clf = MCD()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_mcd = clf.decision_scores_
    scores_df['pyod_mcd_original_cont0.001'] = pyod_mcd
    binary_scores_df['pyod_mcd_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> MCD Config 1')

  try:
    print('COF')
    clf = COF()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_cof = clf.decision_scores_
    scores_df['pyod_cof_original_cont0.001_nei10'] = pyod_cof
    binary_scores_df['pyod_cof_original_binary_cont0.001_nei10'] = pred
  except:
    print('Exception Raised -> COF Config 1')

  try:
    print('CBLOF')
    clf = CBLOF(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_cblof = clf.decision_scores_
    scores_df['pyod_cblof_original_cont0.001'] = pyod_cblof
    binary_scores_df['pyod_cblof_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> CBLOF Config 1')

  try:
    print('HBOS')
    clf = HBOS()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_hbos = clf.decision_scores_
    scores_df['pyod_hbos_original_cont0.001'] = pyod_hbos
    binary_scores_df['pyod_hbos_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> HBOS Config 1')

  try:
    print('KNN')
    clf = KNN(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_knn = clf.decision_scores_
    scores_df['pyod_knn_original_cont0.001'] = pyod_knn
    binary_scores_df['pyod_knn_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> KNN Config 1')

  try:
    print('FeatureBagging')
    clf = FeatureBagging(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_featbagg = clf.decision_scores_
    scores_df['pyod_featbagg_original_cont0.001'] = pyod_featbagg
    binary_scores_df['pyod_featbagg_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> Feature Bagging Config 1')

  try:
    print('LODA')
    clf = LODA()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_loda = clf.decision_scores_
    scores_df['pyod_loda_original_cont0.001'] = pyod_loda
    binary_scores_df['pyod_loda_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> LODA Config 1')

  try:
    print('SUOD')
    clf = SUOD()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_suod = clf.decision_scores_
    scores_df['pyod_suod_original_cont0.001'] = pyod_suod
    binary_scores_df['pyod_suod_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> SUOD Config 1')

  print('')
  print('Dimensionality Reduction Algorithms: ')
  print('PCA')
  pca = PCA(n_components=2)
  out_pca = pd.DataFrame()
  out_pca_scores = pd.DataFrame()
  out_pca_bin = pd.DataFrame()
  try:
    pca.fit(data)
    out_pca = pd.DataFrame(pca.transform(data), index=data.index)
  except:
    print('Exception Raised --> PCA Config 1')

  try:
    pca.fit(scores_df)
    out_pca_scores = pd.DataFrame(pca.transform(scores_df), index=scores_df.index)
  except:
    print('Exception Raised --> PCA Config 2')

  try:
    pca.fit(binary_scores_df)
    out_pca_bin = pd.DataFrame(pca.transform(binary_scores_df), index=binary_scores_df.index)
  except:
    print('Exception Raised --> PCA Config 3')

  try:
    df_dataredu['pca1_data'] = out_pca[0]
    df_dataredu['pca2_data'] = out_pca[1]
    df_dimredu['pca1_scores'] = out_pca_scores[0]
    df_dimredu['pca2_scores'] = out_pca_scores[1]
    df_binredu['pca1_binary'] = out_pca_bin[0]
    df_binredu['pca2_binary'] = out_pca_bin[1]
  except:
    print('Exception Raised --> PCA Config loading')

  print('RandomGaussianProjection')
  rsp = random_projection.GaussianRandomProjection(n_components=2)
  try:
    ran_proj = rsp.fit_transform(scores_df)
    df_dimredu['sub_proj1_scores'] = ran_proj[:,0]
    df_dimredu['sub_proj2_scores'] = ran_proj[:,1]
  except:
    print('Exception Raised --> Random Projection Config scores')

  try:  
    ran_proj_data = rsp.fit_transform(data)
    df_dataredu['sub_proj1_data'] = ran_proj_data[:,0]
    df_dataredu['sub_proj2_data'] = ran_proj_data[:,1]
  except:
    print('Exception Raised --> Random Projection Config data')

  try:
    ran_proj_bin = rsp.fit_transform(binary_scores_df)
    df_binredu['sub_proj1_binary'] = ran_proj_bin[:,0]
    df_binredu['sub_proj2_binary'] = ran_proj_bin[:,1]
  except:
    print('Exception Raised --> Random Projection Config binary')

  print('TSNE')
  tsne = TSNE(n_components=2)

  try:
    sne_proj = tsne.fit_transform(scores_df)
    df_dimredu['sne_1_scores'] = sne_proj[:,0]
    df_dimredu['sne_2_scores'] = sne_proj[:,1]
  except:
    print('Exception Raised --> TSNE Config scores')

  try:  
    sne_proj_data = tsne.fit_transform(data)
    df_dataredu['sne_1_data'] = sne_proj_data[:,0]
    df_dataredu['sne_2_data'] = sne_proj_data[:,1]
  except:
    print('Exception Raised --> TSNE Config data')

  try:
    sne_proj_bin = tsne.fit_transform(binary_scores_df)
    df_binredu['sne_1_binary'] = sne_proj_bin[:,0]
    df_binredu['sne_2_binary'] = sne_proj_bin[:,1]
  except:
    print('Exception Raised --> TSNE Config binary')

  print('LinearDiscriminantAnalysis')
  clf = LinearDiscriminantAnalysis()

  try:
    clf.fit(scores_df, target)
    lda_proj = clf.transform(scores_df)
    df_dimredu['lda_1_scores_0'] = lda_proj[0:lda_proj.shape[0], 0]
    df_dimredu['lda_1_scores_1'] = lda_proj[0:lda_proj.shape[0], 1]
    df_dimredu['lda_1_scores_2'] = lda_proj[0:lda_proj.shape[0], 2]
    df_dimredu['lda_1_scores_3'] = lda_proj[0:lda_proj.shape[0], 3]
    df_dimredu['lda_1_scores_4'] = lda_proj[0:lda_proj.shape[0], 4]
    df_dimredu['lda_1_scores_5'] = lda_proj[0:lda_proj.shape[0], 5]
    df_dimredu['lda_1_scores_6'] = lda_proj[0:lda_proj.shape[0], 6]
    df_dimredu['lda_1_scores_7'] = lda_proj[0:lda_proj.shape[0], 7]
    df_dimredu['lda_1_scores_8'] = lda_proj[0:lda_proj.shape[0], 8]
    df_dimredu['lda_1_scores_9'] = lda_proj[0:lda_proj.shape[0], 9]
    df_dimredu['lda_1_scores_10'] = lda_proj[0:lda_proj.shape[0], 10]
  except:
    print('Exception Raised --> Linear Discriminant Analysis (classes exceeded)')

  try:
    clf.fit(data, target)
    lda_proj_data = clf.transform(data)
    df_dataredu['lda_1_data_0'] = lda_proj_data[0:lda_proj_data.shape[0], 0]
    df_dataredu['lda_1_data_1'] = lda_proj_data[0:lda_proj_data.shape[0], 1]
    df_dataredu['lda_1_data_2'] = lda_proj_data[0:lda_proj_data.shape[0], 2]
    df_dataredu['lda_1_data_3'] = lda_proj_data[0:lda_proj_data.shape[0], 3]
    df_dataredu['lda_1_data_4'] = lda_proj_data[0:lda_proj_data.shape[0], 4]
    df_dataredu['lda_1_data_5'] = lda_proj_data[0:lda_proj_data.shape[0], 5]
    df_dataredu['lda_1_data_6'] = lda_proj_data[0:lda_proj_data.shape[0], 6]
    df_dataredu['lda_1_data_7'] = lda_proj_data[0:lda_proj_data.shape[0], 7]
    df_dataredu['lda_1_data_8'] = lda_proj_data[0:lda_proj_data.shape[0], 8]
    df_dataredu['lda_1_data_9'] = lda_proj_data[0:lda_proj_data.shape[0], 9]
    df_dataredu['lda_1_data_10'] = lda_proj_data[0:lda_proj_data.shape[0], 10]
  except:
    print('Exception Raised --> Linear Discriminant Analysis Config (classes exceeded)')

  try:
    clf.fit(binary_scores_df, target)
    lda_proj_bin = clf.transform(binary_scores_df) 
    df_binredu['lda_1_binary_0'] = lda_proj_bin[0:lda_proj_bin.shape[0], 0]
    df_binredu['lda_1_binary_1'] = lda_proj_bin[0:lda_proj_bin.shape[0], 1]
    df_binredu['lda_1_binary_2'] = lda_proj_bin[0:lda_proj_bin.shape[0], 2]
    df_binredu['lda_1_binary_3'] = lda_proj_bin[0:lda_proj_bin.shape[0], 3]
    df_binredu['lda_1_binary_4'] = lda_proj_bin[0:lda_proj_bin.shape[0], 4]
    df_binredu['lda_1_binary_5'] = lda_proj_bin[0:lda_proj_bin.shape[0], 5]
    df_binredu['lda_1_binary_6'] = lda_proj_bin[0:lda_proj_bin.shape[0], 6]
    df_binredu['lda_1_binary_7'] = lda_proj_bin[0:lda_proj_bin.shape[0], 7]
    df_binredu['lda_1_binary_8'] = lda_proj_bin[0:lda_proj_bin.shape[0], 8]
    df_binredu['lda_1_binary_9'] = lda_proj_bin[0:lda_proj_bin.shape[0], 9]
    df_binredu['lda_1_binary_10'] = lda_proj_bin[0:lda_proj_bin.shape[0], 10]
  except:
    print('Exception Raised --> Linear Discriminant Analysis Config (classes exceeded)')

#APPLY FEATURE EXTRACTION PIPELINE ON ORIGINAL DATA

In [None]:
import time

start_time = time.time()

print('Feature Extraction: \n')
froid_light(data)

end = time.time()
#save first feature extraction phase time ellapsed
print("Support Features Extraction Time: %.8s seconds" % (end - start_time))

# --- --- --- --- --- #

print('\nNull Values in Outlierness Scores DataSets:')
print('Scores_df: ')
scores_df.loc[:, scores_df.isna().any()]
scores_df.dropna(axis=1, how="any", thresh=None, subset=None, inplace=True)
print('--- --- ---')
print('Binary_Scores_df: ')
binary_scores_df.dropna(axis=1, how="any", thresh=None, subset=None, inplace=True)
binary_scores_df.loc[:, binary_scores_df.isna().any()]
print('--- --- ---')
print('\nNull Values in Dimensionality Reduction DataSets:')
df_dimredu.dropna(axis=1, how="any", thresh=None, subset=None, inplace=True)
print(df_dimredu.isnull().sum())
print('--- --- ---')
df_dataredu.dropna(axis=1, how="any", thresh=None, subset=None, inplace=True)
print(df_dataredu.isnull().sum())
print('--- --- ---')
df_binredu.dropna(axis=1, how="any", thresh=None, subset=None, inplace=True)
print(df_binredu.isnull().sum())
print('--- --- ---')

# --- --- --- --- --- #

#normalize outlierness scores
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
scores_df = pd.DataFrame(scaler.fit_transform(scores_df), columns=scores_df.columns)
scores_df.to_csv( "outlierness_scores_light.csv")
binary_scores_df.to_csv("binary_scores_light.csv")

df_dimredu = pd.DataFrame(scaler.fit_transform(df_dimredu), columns=df_dimredu.columns)
df_dataredu = pd.DataFrame(scaler.fit_transform(df_dataredu), columns=df_dataredu.columns)
df_binredu = pd.DataFrame(scaler.fit_transform(df_binredu), columns=df_binredu.columns)
df_dimredu.to_csv( "dim_redu_scores_light.csv")
df_dataredu.to_csv("dim_redu_original_data_light.csv")
df_binredu.to_csv("dim_redu_binary_scores_light.csv")

#OUTLIERNESS SCORES EXTRACTION ON PRINCIPAL COMPONENTS

In [None]:
data = pd.read_csv('dim_redu_original_data_light.csv', delimiter=',') #load data
data.drop('Unnamed: 0', inplace=True, axis=1)

original = pd.read_csv(dataname, delimiter=',') #load data
original.drop('Unnamed: 0', inplace=True, axis=1)
target = original['class']

##pynomaly

In [None]:
scores_df = pd.DataFrame()
binary_scores_df = pd.DataFrame()

##sklearn algorithms

In [None]:
from pyod.models.copod import COPOD
from pyod.models.suod import SUOD
from pyod.models.loda import LODA
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.cblof import CBLOF
from pyod.models.cof import COF
from pyod.models.ocsvm import OCSVM
from pyod.models.mcd import MCD
from pyod.models.pca import PCA
from pyod.models.ecod import ECOD
from pyod.models.sos import SOS
from sklearn.svm import OneClassSVM

def froid_light(data):
  try:
    #Minimum Covariance Determinant
    ee = EllipticEnvelope()
    score = ee.fit_predict(data)
    dec_func = ee.decision_function(data)
    maha = ee.mahalanobis(data)
    pred = ee.predict(data)
    scores_df['elliptic_decfunc_dim_redu_cont0.001'] = dec_func
    scores_df['elliptic_mahalanobis_dim_redu_cont0.001'] = maha
    binary_scores_df['elliptic_env_dim_redu_binary_cont0.001'] = pred
    binary_scores_df['elliptic_env_dim_redu_binary_cont0.001'] = np.where(binary_scores_df['elliptic_env_dim_redu_binary_cont0.001']==-1, 1,0)
  except:
    print('Exception Raised --> Elliptic Envelope Config 1')

  try:
    #isolation forest
    iso = IsolationForest(n_jobs=-1)
    #returns -1 for outliers and +1 for inliers
    pred = iso.fit_predict(data)
    #using decision function i can obtain outlierness for each observation
    sklearn_score_anomalies = iso.decision_function(data)
    original_paper_score = [-1*s + 0.5 for s in sklearn_score_anomalies]
    scores_df['iso_forest_paper_score_dim_redu_cont0.001'] = original_paper_score
    binary_scores_df['iso_forest_paper_score_dim_redu_binary_cont0.001'] = pred
    binary_scores_df['iso_forest_paper_score_dim_redu_binary_cont0.001'] = np.where(binary_scores_df['iso_forest_paper_score_dim_redu_binary_cont0.001']==-1, 1,0)
  except:
    print('Exception Raised --> Isolation Forest Config 1')

  try:
    #Local Outlier Factor
    lof = LocalOutlierFactor(novelty=True, metric='minkowski',n_jobs=-1)
    lof.fit(data)
    lof_score = lof.decision_function(data)
    pred = lof.predict(data)
    scores_df['lof_score_dim_redu_mink_nei10'] = lof_score
    binary_scores_df['lof_score_dim_redu_binary_mink_nei10'] = pred
    binary_scores_df['lof_score_dim_redu_binary_mink_nei10'] = np.where(binary_scores_df['lof_score_dim_redu_binary_mink_nei10']==-1, 1,0)
  except:
    print('Exception Raised --> LOF Config 1')

  try:
    clf = COPOD(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_copod = clf.decision_scores_  # raw outlier scores on the train data
    scores_df['pyod_copod_dim_redu_cont0.001'] = pyod_copod
    binary_scores_df['pyod_copod_dim_redu_binary_cont0.001'] = pred
  except:
    print('Exception Raised --> COPOD Config 1')

  try:
    clf = ECOD(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_ecod = clf.decision_scores_
    scores_df['pyod_ecod_dim_redu_cont0.001'] = pyod_ecod
    binary_scores_df['pyod_ecod_dim_redu_binary_cont0.001'] = pred
  except:
    print('Exception Raised --> ECOD Config 1')

  try:
    clf = PCA(n_components=2,whiten=False)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_pca = clf.decision_scores_
    scores_df['pyod_pca_dim_redu_cont0.001_whitFalse'] = pyod_pca
    binary_scores_df['pyod_pca_dim_redu_binary_cont0.001_whitFalse'] = pred
  except:
    print('Exception Raised --> PCA Config 1')

  try:
    clf = MCD()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_mcd = clf.decision_scores_
    scores_df['pyod_mcd_dim_redu_cont0.001'] = pyod_mcd
    binary_scores_df['pyod_mcd_dim_redu_binary_cont0.001'] = pred
  except:
    print('Exception Raised --> MCD Config 1')

  try:
    clf = COF()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_cof = clf.decision_scores_
    scores_df['pyod_cof_dim_redu_cont0.001_nei10'] = pyod_cof
    binary_scores_df['pyod_cof_dim_redu_binary_cont0.001_nei10'] = pred
  except:
    print('Exception Raised --> COF Config 1')

  try:
    clf = CBLOF(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_cblof = clf.decision_scores_
    scores_df['pyod_cblof_dim_redu_cont0.001'] = pyod_cblof
    binary_scores_df['pyod_cblof_dim_redu_binary_cont0.001'] = pred
  except:
    print('Exception Raised --> CBLOF Config 1')

  try:
    clf = HBOS()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_hbos = clf.decision_scores_
    scores_df['pyod_hbos_dim_redu_cont0.001'] = pyod_hbos
    binary_scores_df['pyod_hbos_dim_redu_binary_cont0.001'] = pred
  except:
    print('Exception Raised --> HBOS Config 1')

  try:
    clf = KNN(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_knn = clf.decision_scores_
    scores_df['pyod_knn_dim_redu_cont0.001'] = pyod_knn
    binary_scores_df['pyod_knn_dim_redu_binary_cont0.001'] = pred
  except:
    print('Exception Raised --> KNN Config 1')

  try:
    clf = FeatureBagging(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_featbagg = clf.decision_scores_
    scores_df['pyod_featbagg_dim_redu_cont0.001'] = pyod_featbagg
    binary_scores_df['pyod_featbagg_dim_redu_binary_cont0.001'] = pred
  except:
    print('Exception Raised --> Feature Bagging Config 1')

  try:
    clf = LODA()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_loda = clf.decision_scores_
    scores_df['pyod_loda_dim_redu_cont0.001'] = pyod_loda
    binary_scores_df['pyod_loda_dim_redu_binary_cont0.001'] = pred
  except:
    print('Exception Raised --> LODA Config 1')

  try:
    clf = SUOD(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_suod = clf.decision_scores_
    scores_df['pyod_suod_dim_redu_cont0.001'] = pyod_suod
    binary_scores_df['pyod_suod_dim_redu_binary_cont0.001'] = pred
  except:
    print('Exception Raised --> SUOD Config 1')

#APPLY FEATURE EXTRACTION PIPELINE ON PRINCIPAL COMPONENTS

In [None]:
from sklearn.preprocessing import RobustScaler
import time

print('Outlierness Scores from Principal Components Datasets: ')
start_time = time.time()
froid_light(data)
end = time.time()

#save first feature extraction phase time ellapsed
print("Support Features on Principal Components Extraction Time: %.8s seconds" % (end - start_time))
print('\nNull Values in Outlierness Scores DataSets:')
print('Scores_df: ')
scores_df.dropna(axis=1, how="any", thresh=None, subset=None, inplace=True)
scores_df.loc[:, scores_df.isna().any()]
print('--- --- ---')
print('Binary_Scores_df: ')
binary_scores_df.dropna(axis=1, how="any", thresh=None, subset=None, inplace=True)
binary_scores_df.loc[:, binary_scores_df.isna().any()]
print('--- --- ---')

#normalize outlierness scores
scaler = RobustScaler()
scores_df = pd.DataFrame(scaler.fit_transform(scores_df), columns=scores_df.columns)

scores_df.to_csv( "scores_principal_comp_light.csv")
binary_scores_df.to_csv( "binary_scores_principal_comp_light.csv")

# INCEPTION FROID

In [None]:
dataname = 'outlierness_scores_light.csv'
dataname2 = 'dim_redu_original_data_light.csv'

data = pd.read_csv(dataname, delimiter=',') #load outlierness scores on original data -> apply anomaly detection on it
data.drop('Unnamed: 0', inplace=True, axis=1)

data2 = pd.read_csv(dataname2, delimiter=',') #load principal components on original data -> apply dimensionality reduction on it
data2.drop('Unnamed: 0', inplace=True, axis=1)

In [None]:
scores_df = pd.DataFrame()
binary_scores_df = pd.DataFrame()

In [None]:
from pyod.models.mcd import MCD
from pyod.models.suod import SUOD
from pyod.models.loda import LODA
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.cblof import CBLOF
from pyod.models.ocsvm import OCSVM
from pyod.models.cof import COF
from pyod.models.copod import COPOD
from pyod.models.ecod import ECOD
from pyod.models.sos import SOS
from pyod.models.pca import PCA

def froid_light(data):
  #Minimum Covariance Determinant
  try:
    ee = EllipticEnvelope()
    score = ee.fit_predict(data)
    dec_func = ee.decision_function(data)
    maha = ee.mahalanobis(data)
    pred = ee.predict(data)
    scores_df['inception_elliptic_decfunc_original_cont0.001'] = dec_func
    scores_df['inception_elliptic_mahalanobis_original_cont0.001'] = maha
    binary_scores_df['inception_elliptic_env_original_binary_cont0.001'] = pred
    binary_scores_df['inception_elliptic_env_original_binary_cont0.001'] = np.where(binary_scores_df['inception_elliptic_env_original_binary_cont0.001']==-1, 1,0)
  except:
    print('Exception Raised --> Elliptic Envelope Config 1')

  try:
    #isolation forest
    iso = IsolationForest(n_jobs=-1)
    #returns -1 for outliers and +1 for inliers
    pred = iso.fit_predict(data)
    #using decision function i can obtain outlierness for each observation
    sklearn_score_anomalies = iso.decision_function(data)
    original_paper_score = [-1*s + 0.5 for s in sklearn_score_anomalies]
    scores_df['inception_iso_forest_paper_score_original_cont0.001'] = original_paper_score
    binary_scores_df['inception_iso_forest_paper_score_original_binary_cont0.001'] = pred
    binary_scores_df['inception_iso_forest_paper_score_original_binary_cont0.001'] = np.where(binary_scores_df['inception_iso_forest_paper_score_original_binary_cont0.001']==-1, 1,0)
  except:
    print('Exception Raised --> Iso Forest Config 1')

  try:
    #Local Outlier Factor
    lof = LocalOutlierFactor(novelty=True, metric='minkowski',n_jobs=-1)
    lof.fit(data)
    lof_score = lof.decision_function(data)
    pred = lof.predict(data)
    scores_df['inception_lof_score_original_mink_nei10'] = lof_score
    binary_scores_df['inception_lof_score_original_binary_mink_nei10'] = pred
    binary_scores_df['inception_lof_score_original_binary_mink_nei10'] = np.where(binary_scores_df['inception_lof_score_original_binary_mink_nei10']==-1, 1,0)
  except:
    print('Exception Raised --> LOF Config 1')

  try:
    clf = COPOD(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_copod = clf.decision_scores_  # raw outlier scores on the train data
    scores_df['inception_pyod_copod_original_cont0.001'] = pyod_copod
    binary_scores_df['inception_pyod_copod_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> COPOD Config 1')

  try:
    clf = ECOD(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_ecod = clf.decision_scores_
    scores_df['inception_pyod_ecod_original_cont0.001'] = pyod_ecod
    binary_scores_df['inception_pyod_ecod_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> ECOD Config 1')

  try:
    clf = PCA(n_components=2)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_pca = clf.decision_scores_
    scores_df['inception_pyod_pca_original_cont0.001_whitFalse'] = pyod_pca
    binary_scores_df['inception_pyod_pca_original_binary_cont0.001_whitFalse'] = pred
  except:
    print('Exception Raised -> PCA Config 1 data maybe not converged')

  try:
    clf = MCD()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_mcd = clf.decision_scores_
    scores_df['inception_pyod_mcd_original_cont0.001'] = pyod_mcd
    binary_scores_df['inception_pyod_mcd_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> MCD Config 1')

  try:
    clf = COF()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_cof = clf.decision_scores_
    scores_df['inception_pyod_cof_original_cont0.001_nei10'] = pyod_cof
    binary_scores_df['inception_pyod_cof_original_binary_cont0.001_nei10'] = pred
  except:
    print('Exception Raised -> COF Config 1')

  try:
    clf = CBLOF(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_cblof = clf.decision_scores_
    scores_df['inception_pyod_cblof_original_cont0.001'] = pyod_cblof
    binary_scores_df['inception_pyod_cblof_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> CBLOF Config 1')

  try:
    clf = HBOS()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_hbos = clf.decision_scores_
    scores_df['inception_pyod_hbos_original_cont0.001'] = pyod_hbos
    binary_scores_df['inception_pyod_hbos_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> HBOS Config 1')

  try:
    clf = KNN(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_knn = clf.decision_scores_
    scores_df['inception_pyod_knn_original_cont0.001'] = pyod_knn
    binary_scores_df['inception_pyod_knn_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> KNN Config 1')

  try:
    clf = FeatureBagging(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_featbagg = clf.decision_scores_
    scores_df['inception_pyod_featbagg_original_cont0.001'] = pyod_featbagg
    binary_scores_df['inception_pyod_featbagg_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> Feature Bagging Config 1')

  try:
    clf = LODA()
    clf.fit(data)
    pred = clf.predict(data)
    pyod_loda = clf.decision_scores_
    scores_df['inception_pyod_loda_original_cont0.001'] = pyod_loda
    binary_scores_df['inception_pyod_loda_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> LODA Config 1')

  try:
    clf = SUOD(n_jobs=-1)
    clf.fit(data)
    pred = clf.predict(data)
    pyod_suod = clf.decision_scores_
    scores_df['inception_pyod_suod_original_cont0.001'] = pyod_suod
    binary_scores_df['inception_pyod_suod_original_binary_cont0.001'] = pred
  except:
    print('Exception Raised -> SUOD Config 1')


In [None]:
scores_df

In [None]:
import time

start_time = time.time()

print('Outlierness Scores Extraction: \n')
froid_light(data) #anomaly detection methods applied on outlierness scores

#normalize outlierness scores
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
scores_df = pd.DataFrame(scaler.fit_transform(scores_df), columns=scores_df.columns)

print('\nNull Values in Outlierness Scores DataSets:')
print('Scores_df: ')
scores_df.dropna(axis=1, how="any", thresh=None, subset=None, inplace=True)
scores_df.loc[:, scores_df.isna().any()]
print('--- --- ---')
print('Binary_Scores_df: ')
binary_scores_df.dropna(axis=1, how="any", thresh=None, subset=None, inplace=True)
binary_scores_df.loc[:, binary_scores_df.isna().any()]
print('--- --- ---')

#save outlierness numeric and binary scores to .csv
scores_df.to_csv( "inception_outlierness_scores_light.csv")
binary_scores_df.to_csv( "inception_binary_scores_light.csv")

In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn import random_projection
from sklearn.manifold import Isomap
from keras.models import Model
from keras.layers import Input, Dense
from keras import regularizers
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn.decomposition import KernelPCA
import pandas as pd
from sklearn.manifold import TSNE
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import manifold
from functools import partial
from collections import OrderedDict
from time import time
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import NullFormatter

df_dimredu = pd.DataFrame()
df_dataredu = pd.DataFrame()
df_binredu = pd.DataFrame()

def froid_light(data):
  pca = PCA(n_components=2)
  out_pca = pd.DataFrame()
  out_pca_scores = pd.DataFrame()
  out_pca_bin = pd.DataFrame()
  try:
    pca.fit(data)
    out_pca = pd.DataFrame(pca.transform(data), index=data.index)
  except:
    print('Exception Raised --> PCA Config 1')

  try:
    pca.fit(scores_df)
    out_pca_scores = pd.DataFrame(pca.transform(scores_df), index=scores_df.index)
  except:
    print('Exception Raised --> PCA Config 2')

  try:
    pca.fit(binary_scores_df)
    out_pca_bin = pd.DataFrame(pca.transform(binary_scores_df), index=binary_scores_df.index)
  except:
    print('Exception Raised --> PCA Config 3')

  try:
    df_dataredu['inception_pca1_data'] = out_pca[0]
    df_dataredu['inception_pca2_data'] = out_pca[1]
    df_dimredu['inception_pca1_scores'] = out_pca_scores[0]
    df_dimredu['inception_pca2_scores'] = out_pca_scores[1]
    df_binredu['inception_pca1_binary'] = out_pca_bin[0]
    df_binredu['inception_pca2_binary'] = out_pca_bin[1]
  except:
    print('Exception Raised --> PCA Config loading')

  rsp = random_projection.GaussianRandomProjection(n_components=2)
  try:
    ran_proj = rsp.fit_transform(scores_df)
    df_dimredu['inception_sub_proj1_scores'] = ran_proj[:,0]
    df_dimredu['inception_sub_proj2_scores'] = ran_proj[:,1]
  except:
    print('Exception Raised --> Random Projection Config scores')

  try:  
    ran_proj_data = rsp.fit_transform(data)
    df_dataredu['inception_sub_proj1_data'] = ran_proj_data[:,0]
    df_dataredu['inception_sub_proj2_data'] = ran_proj_data[:,1]
  except:
    print('Exception Raised --> Random Projection Config data')

  try:
    ran_proj_bin = rsp.fit_transform(binary_scores_df)
    df_binredu['inception_sub_proj1_binary'] = ran_proj_bin[:,0]
    df_binredu['inception_sub_proj2_binary'] = ran_proj_bin[:,1]
  except:
    print('Exception Raised --> Random Projection Config binary')

  tsne = TSNE(n_components=2)

  try:
    sne_proj = tsne.fit_transform(scores_df)
    df_dimredu['inception_sne_1_scores'] = sne_proj[:,0]
    df_dimredu['inception_sne_2_scores'] = sne_proj[:,1]
  except:
    print('Exception Raised --> TSNE Config scores')

  try:  
    sne_proj_data = tsne.fit_transform(data)
    df_dataredu['inception_sne_1_data'] = sne_proj_data[:,0]
    df_dataredu['inception_sne_2_data'] = sne_proj_data[:,1]
  except:
    print('Exception Raised --> TSNE Config data')

  try:
    sne_proj_bin = tsne.fit_transform(binary_scores_df)
    df_binredu['inception_sne_1_binary'] = sne_proj_bin[:,0]
    df_binredu['inception_sne_2_binary'] = sne_proj_bin[:,1]
  except:
    print('Exception Raised --> TSNE Config binary')

  clf = LinearDiscriminantAnalysis()

  try:
    clf.fit(scores_df, target)
    lda_proj = clf.transform(scores_df)
    df_dimredu['inception_lda_1_scores_0'] = lda_proj[0:lda_proj.shape[0], 0]
    df_dimredu['inception_lda_1_scores_1'] = lda_proj[0:lda_proj.shape[0], 1]
    df_dimredu['inception_lda_1_scores_2'] = lda_proj[0:lda_proj.shape[0], 2]
    df_dimredu['inception_lda_1_scores_3'] = lda_proj[0:lda_proj.shape[0], 3]
    df_dimredu['inception_lda_1_scores_4'] = lda_proj[0:lda_proj.shape[0], 4]
    df_dimredu['inception_lda_1_scores_5'] = lda_proj[0:lda_proj.shape[0], 5]
    df_dimredu['inception_lda_1_scores_6'] = lda_proj[0:lda_proj.shape[0], 6]
    df_dimredu['inception_lda_1_scores_7'] = lda_proj[0:lda_proj.shape[0], 7]
    df_dimredu['inception_lda_1_scores_8'] = lda_proj[0:lda_proj.shape[0], 8]
    df_dimredu['inception_lda_1_scores_9'] = lda_proj[0:lda_proj.shape[0], 9]
    df_dimredu['inception_lda_1_scores_10'] = lda_proj[0:lda_proj.shape[0], 10]
  except:
    print('Exception Raised --> Linear Discriminant Analysis (classes exceeded)')

  try:
    clf.fit(data, target)
    lda_proj_data = clf.transform(data)
    df_dataredu['inception_lda_1_data_0'] = lda_proj_data[0:lda_proj_data.shape[0], 0]
    df_dataredu['inception_lda_1_data_1'] = lda_proj_data[0:lda_proj_data.shape[0], 1]
    df_dataredu['inception_lda_1_data_2'] = lda_proj_data[0:lda_proj_data.shape[0], 2]
    df_dataredu['inception_lda_1_data_3'] = lda_proj_data[0:lda_proj_data.shape[0], 3]
    df_dataredu['inception_lda_1_data_4'] = lda_proj_data[0:lda_proj_data.shape[0], 4]
    df_dataredu['inception_lda_1_data_5'] = lda_proj_data[0:lda_proj_data.shape[0], 5]
    df_dataredu['inception_lda_1_data_6'] = lda_proj_data[0:lda_proj_data.shape[0], 6]
    df_dataredu['inception_lda_1_data_7'] = lda_proj_data[0:lda_proj_data.shape[0], 7]
    df_dataredu['inception_lda_1_data_8'] = lda_proj_data[0:lda_proj_data.shape[0], 8]
    df_dataredu['inception_lda_1_data_9'] = lda_proj_data[0:lda_proj_data.shape[0], 9]
    df_dataredu['inception_lda_1_data_10'] = lda_proj_data[0:lda_proj_data.shape[0], 10]
  except:
    print('Exception Raised --> Linear Discriminant Analysis Config (classes exceeded)')

  try:
    clf.fit(binary_scores_df, target)
    lda_proj_bin = clf.transform(binary_scores_df) 
    df_binredu['inception_lda_1_binary_0'] = lda_proj_bin[0:lda_proj_bin.shape[0], 0]
    df_binredu['inception_lda_1_binary_1'] = lda_proj_bin[0:lda_proj_bin.shape[0], 1]
    df_binredu['inception_lda_1_binary_2'] = lda_proj_bin[0:lda_proj_bin.shape[0], 2]
    df_binredu['inception_lda_1_binary_3'] = lda_proj_bin[0:lda_proj_bin.shape[0], 3]
    df_binredu['inception_lda_1_binary_4'] = lda_proj_bin[0:lda_proj_bin.shape[0], 4]
    df_binredu['inception_lda_1_binary_5'] = lda_proj_bin[0:lda_proj_bin.shape[0], 5]
    df_binredu['inception_lda_1_binary_6'] = lda_proj_bin[0:lda_proj_bin.shape[0], 6]
    df_binredu['inception_lda_1_binary_7'] = lda_proj_bin[0:lda_proj_bin.shape[0], 7]
    df_binredu['inception_lda_1_binary_8'] = lda_proj_bin[0:lda_proj_bin.shape[0], 8]
    df_binredu['inception_lda_1_binary_9'] = lda_proj_bin[0:lda_proj_bin.shape[0], 9]
    df_binredu['inception_lda_1_binary_10'] = lda_proj_bin[0:lda_proj_bin.shape[0], 10]
  except:
    print('Exception Raised --> Linear Discriminant Analysis Config (classes exceeded)')

#APPLY FEATURE EXTRACTION PIPELINE ON PROJECTED FEATURES

In [None]:
import time

#apply dimensionality reduction methods on principal components
print('\n--- --- --- --- ---\nDimensionality Reduction Features Extraction: ')

start_time = time.time()
froid_light(data2)
end = time.time()

#save first feature extraction phase time ellapsed
print("Support Features Extraction Time: %.8s seconds" % (end - start_time))
print('\nNull Values in Dimensionality Reduction DataSets:')
df_dataredu.dropna(axis=1, how="any", thresh=None, subset=None, inplace=True)
print(df_dataredu.isnull().sum())
print('--- --- ---')

#normalize calculated principal components
scaler = RobustScaler()
df_dataredu = pd.DataFrame(scaler.fit_transform(df_dataredu), columns=df_dataredu.columns)

#save dimensionality reduction (applied on principal components) to .csv
df_dataredu.to_csv("inception_dim_redu_original_data_light.csv")

