# Importing Packages

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import sys, os, logging, timeit
from pathlib import Path as Pathlb


from sklearn import preprocessing
from sklearn.metrics import accuracy_score
import itertools


sys.path.insert(0, os.path.abspath(os.path.join('..')))
from MLPackage import Features as feat
from MLPackage.FS import hho
from MLPackage import config as cfg

# Making Folders and Logger

In [None]:
project_dir = os.getcwd()[:-5]
fig_dir = os.path.join(project_dir, "Manuscripts", "src", "figures")
tbl_dir = os.path.join(project_dir, "Manuscripts", "src", "tables")
results_dir = os.path.join(project_dir, "results")
dataset_dir = os.path.join(project_dir, "Datasets")
temp_dir = os.path.join(project_dir, "temp")
log_path = os.path.join(project_dir, 'logs')

Pathlb(log_path).mkdir(parents=True, exist_ok=True)
Pathlb(dataset_dir).mkdir(parents=True, exist_ok=True)
Pathlb(temp_dir).mkdir(parents=True, exist_ok=True)
Pathlb(results_dir ).mkdir(parents=True, exist_ok=True)
Pathlb(fig_dir).mkdir(parents=True, exist_ok=True)
Pathlb(tbl_dir).mkdir(parents=True, exist_ok=True)


def create_logger(level):
    loggerName = "main pynb"
    Pathlb(log_path).mkdir(parents=True, exist_ok=True)
    grey = '\x1b[38;21m'
    blue = '\x1b[38;5;39m'
    yellow = '\x1b[38;5;226m'
    red = '\x1b[38;5;196m'
    bold_red = '\x1b[31;1m'
    reset = '\x1b[0m'

    logger = logging.getLogger(loggerName)
    logger.setLevel(level)
    formatter_colored = logging.Formatter(blue + '[%(asctime)s]-' + yellow + '[%(name)s @%(lineno)d]' + reset + blue + '-[%(levelname)s]' + reset + bold_red + '\t\t%(message)s' + reset, datefmt='%m/%d/%Y %I:%M:%S %p ')
    formatter = logging.Formatter('[%(asctime)s]-[%(name)s @%(lineno)d]-[%(levelname)s]\t\t%(message)s', datefmt='%m/%d/%Y %I:%M:%S %p ')
    file_handler = logging.FileHandler( os.path.join(log_path, loggerName + '_loger.log'), mode = 'w')
    file_handler.setLevel(level)
    file_handler.setFormatter(formatter)
    stream_handler = logging.StreamHandler()
    stream_handler.setLevel(logging.INFO)

    stream_handler.setFormatter(formatter_colored)


    logger.addHandler(file_handler)
    logger.addHandler(stream_handler)
    return logger
logger = create_logger(logging.DEBUG)





# Reading CASIA-D Dateset

In [None]:
data_path = os.path.join(project_dir, 'Datasets', 'datalist.npy')
meta_path = os.path.join(project_dir, 'Datasets', 'metadatalist.npy')
eps = 5


data = np.load(data_path)
metadata = np.load(meta_path)
logger.info("Data shape: {}".format(data.shape))
logger.info("Metadata shape: {}".format(metadata.shape))

# Extracting Features

In [None]:


features = list()
prefeatures = list()


for sample, label in zip(data, metadata):
    
    COA = feat.computeCOATimeSeries(sample, Binarize = "simple", Threshold = 0)

    aMDIST = feat.computeMDIST(COA)    
    aRDIST = feat.computeRDIST(COA)
    aTOTEX = feat.computeTOTEX(COA)
    aMVELO = feat.computeMVELO(COA)
    aRANGE = feat.computeRANGE(COA)
    aAREACC = feat.computeAREACC(COA)
    aAREACE = feat.computeAREACE(COA)
    aAREASW = feat.computeAREASW(COA)
    aMFREQ = feat.computeMFREQ(COA)
    aFDPD = feat.computeFDPD(COA)
    aFDCC = feat.computeFDCC(COA)
    aFDCE = feat.computeFDCE(COA)

    handcraft_COAfeatures = np.concatenate((aMDIST, aRDIST, aTOTEX, aMVELO, aRANGE, [aAREACC], [aAREACE], [aAREASW], aMFREQ, aFDPD, [aFDCC], [aFDCE]), axis = 0)
    COAs = COA.flatten()

    GRF = feat.computeGRF(sample)
    handcraft_GRFfeatures = feat.computeGRFfeatures(GRF)

    wt_GRF = feat.wt_feature(GRF, waveletname="coif1", pywt_mode="constant", wavelet_level=4)

    wt_COA_RD = feat.wt_feature(COA[0,:], waveletname="coif1", pywt_mode="constant", wavelet_level=4)
    wt_COA_AP = feat.wt_feature(COA[1,:], waveletname="coif1", pywt_mode="constant", wavelet_level=4)
    wt_COA_ML = feat.wt_feature(COA[2,:], waveletname="coif1", pywt_mode="constant", wavelet_level=4)


    


 

    features.append( np.concatenate((COAs, handcraft_COAfeatures, GRF, handcraft_GRFfeatures, wt_COA_RD, wt_COA_AP, wt_COA_ML, wt_GRF, label[0:2]), axis=0)  )

    prefeatures.append(feat.prefeatures(sample))
    # break
    




saving_path = os.path.join(temp_dir, 'features_all.xlsx')
columnsName = cfg.COA_RD + cfg.COA_AP + cfg.COA_ML + cfg.COA_HC + cfg.GRF + cfg.GRF_HC + cfg.wt_COA_RD + cfg.wt_COA_AP + cfg.wt_COA_ML + cfg.wt_GRF + cfg.label
pd.DataFrame(features, columns=columnsName).to_excel(saving_path)
np.save(os.path.join(temp_dir, 'prefeatures.npy'), prefeatures)

# Feature Selection


In [None]:
feature_path = os.path.join(project_dir, "temp", "features_all.xlsx")
DF_features_all = pd.read_excel(feature_path, index_col = 0)


data  = DF_features_all.values
features  = np.asarray(data[:, 0:-2])
label = np.asarray(data[:, -2])


In [None]:



# parameter
k    = 5     # k-value in KNN
N    = 10    # number of chromosomes
T    = 100   # maximum number of generations
CR   = 0.8
MR   = 0.01
opts = {'N':N, 'T':T, 'CR':CR, 'MR':MR}

# perform feature selection
fmdl = hho.jfs(features, label, opts)