In [1]:
from MLPackage.Deep_network import * 

class Pipeline(Classifier, Seamese):
    
    _col = [
        "test_id",
        "subject",
        "combination",
        "classifier_name",
        "normilizing",
        "persentage",
        "EER",
        "TH",
        "ACC_bd",
        "BACC_bd",
        "FAR_bd",
        "FRR_bd",
        "ACC_ud",
        "BACC_ud",
        "FAR_ud",
        "FRR_ud",
        "AUS",
        "FAU",
        "unknown_imposter_samples",
        "AUS_All",
        "FAU_All",
        "CM_bd_TN",
        "CM_bd_FP",
        "CM_bd_FN",
        "CM_bd_TP",
        "CM_ud_TN",
        "CM_ud_FP",
        "CM_ud_FN",
        "CM_ud_TP",
        "num_pc",
        "KFold",
        "p_training_samples",
        "train_ratio",
        "ratio",
        # pos_te_samples,
        # neg_te_samples,
        "known_imposter",
        "unknown_imposter",
        "min_number_of_sample",
        "number_of_unknown_imposter_samples",
        "y_train.shape[0]",
        "y_train.sum()",
        "y_val.shape[0]",
        "y_val.sum()",
        "y_test.shape[0]",
        "y_test.sum()",
    ]

    def __init__(self, kwargs):

        self.dataset_name = ""
        self._combination = 0

        self._labels = 0

        self._GRFs = pd.DataFrame()
        self._COAs = pd.DataFrame()
        self._COPs = pd.DataFrame()
        self._pre_images = pd.DataFrame()

        self._COA_handcrafted = pd.DataFrame()
        self._COP_handcrafted = pd.DataFrame()
        self._GRF_handcrafted = pd.DataFrame()

        self._GRF_WPT = pd.DataFrame()
        self._COP_WPT = pd.DataFrame()
        self._COA_WPT = pd.DataFrame()

        self._deep_features = pd.DataFrame()

        self._CNN_base_model = ""

        self._CNN_weights = "imagenet"
        self._CNN_include_top = False
        self._verbose = False
        self._CNN_batch_size = 32
        self._CNN_epochs = 10
        self._CNN_optimizer = "adam"
        self._val_size = 0.2

        #####################################################
        self._CNN_class_numbers = 97
        self._CNN_epochs = 10
        self._CNN_image_size = (60, 40, 3)

        self._min_number_of_sample = 30
        self._known_imposter = 5
        self._unknown_imposter = 30
        self._number_of_unknown_imposter_samples = 1.0  # Must be less than 1

        # self._known_imposter_list   = []
        # self._unknown_imposter_list = []

        self._waveletname = "coif1"
        self._pywt_mode = "constant"
        self._wavelet_level = 4

        self._KFold = 10
        self._random_state = 42

        self._p_training_samples = 11
        self._train_ratio = 4
        self._ratio = True

        self._classifier_name = ""

        self._KNN_n_neighbors = 5
        self._KNN_metric = "euclidean"
        self._KNN_weights = "uniform"
        self._SVM_kernel = "linear"
        self._random_runs = 10
        self._THRESHOLDs = np.linspace(0, 1, 100)
        self._persentage = 0.95
        self._normilizing = "z-score"

        self._num_pc = 0

        for (key, value) in kwargs.items():
            if key in self.__dict__:
                setattr(self, key, value)
            else:
                logger.error("key must be one of these:", self.__dict__.keys())
                raise KeyError(key)

        super().__init__(self.dataset_name, self._classifier_name)

    def run(self, DF_features_all: pd.DataFrame, feature_set_names: list):

        DF_known_imposter, DF_unknown_imposter = self.filtering_subjects_and_samples(
            DF_features_all
        )
        DF_unknown_imposter = DF_unknown_imposter.dropna()
        DF_known_imposter = DF_known_imposter.dropna()

        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # # extract features of shod dataset to use as unknown imposter samples
        # # it is overwrite on DF_unknown_imposter DataFrame
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # DF_features_all_shod, feature_set_names_shod = self.extracting_feature_set1('casia-shod')
        # DF_unknown_imposter = DF_features_all_shod[DF_features_all_shod['side']>=2.0].dropna()
        # subjects, samples = np.unique(DF_unknown_imposter["ID"].values, return_counts=True)

        # self._unknown_imposter_list = subjects[-self._unknown_imposter:]
        # DF_unknown_imposter =  DF_unknown_imposter[DF_unknown_imposter["ID"].isin(self._unknown_imposter_list)]

        # self.set_dataset_path('casia')
        # breakpoint()
        # ----------------------------------------------------------------

        results = list()
        for idx, subject in enumerate(self._known_imposter_list):
            # if idx not in [0, 1]: #todo: remove this block to run for all subjects.
            #     break

            if self._verbose == True:
                logger.info(
                    f"   Subject number: {idx} out of {len(self._known_imposter_list)} (subject ID is {subject})"
                )

            # #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # # # droping shod samples from known imposter in training set
            # # # it is overwrite on DF_unknown_imposter DataFrame
            # #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # index_of_shod_samples = DF_known_imposter[ (DF_known_imposter['side'] >= 2) & (DF_known_imposter['ID'] == subject)].index
            # DF_known_imposter1 = DF_known_imposter.drop(index_of_shod_samples)
            # #----------------------------------------------------------------

            # #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # # # droping barefoot samples from unknown imposter
            # # # it is overwrite on DF_unknown_imposter DataFrame
            # #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # index_of_barefoot_samples = DF_unknown_imposter[ DF_unknown_imposter['side'] <= 1 ].index
            # DF_unknown_imposter = DF_unknown_imposter.drop(index_of_barefoot_samples)
            # #----------------------------------------------------------------

            (
                DF_known_imposter_binariezed,
                DF_unknown_imposter_binariezed,
            ) = self.binarize_labels(DF_known_imposter, DF_unknown_imposter, subject)

            # #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # # # applying template selection on known imposters
            # # # it is select only 200 samples from all knowwn imposters
            # #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # A1 = DF_known_imposter_binariezed[DF_known_imposter_binariezed['ID'] == 1.0]
            # A2 = DF_known_imposter_binariezed[DF_known_imposter_binariezed['ID'] == 0.0]
            # A2 = self.template_selection(A2, 'DEND', 200, verbose=True)
            # DF_known_imposter_binariezed = pd.concat([A1, A2], axis=0)
            # # breakpoint()
            # #----------------------------------------------------------------

            CV = model_selection.StratifiedKFold(
                n_splits=self._KFold, shuffle=False
            )  # random_state=self._random_state,
            X = DF_known_imposter_binariezed
            U = DF_unknown_imposter_binariezed

            cv_results = list()

            ncpus = int(
                os.environ.get(
                    "SLURM_CPUS_PER_TASK", default=multiprocessing.cpu_count()
                )
            )
            pool = multiprocessing.Pool(processes=ncpus)

            for fold, (train_index, test_index) in enumerate(
                CV.split(X.iloc[:, :-1], X.iloc[:, -1])
            ):
                # breakpoint()
                # res = pool.apply_async(self.fold_calculating, args=(feature_set_names, subject, X, U, train_index, test_index, fold,))#, callback=print)#cv_results.append)
                # print(res.get())  # this will raise an exception if it happens within func

                cv_results.append(
                    self.fold_calculating(
                        feature_set_names, subject, X, U, train_index, test_index, fold
                    )
                )  # todo: comment this line to run all folds
                # break #todo: comment this line to run all folds

            pool.close()
            pool.join()
            # breakpoint()
            result = self.compacting_results(cv_results, subject)
            results.append(result)

        return pd.DataFrame(results, columns=self._col)

    def compacting_results(self, results, subject):
        # [EER, TH, ACC_bd, BACC_bd, FAR_bd, FRR_bd, ACC_ud, BACC_ud, FAR_ud, FRR_ud,]

        # return results, CM_bd, CM_ud
        # breakpoint()
        # pos_te_samples = self._p
        # neg_te_samples = self._
        # pos_tr_samples = self._
        # neg_tr_ratio = self._

        result = list()

        result.append(
            [
                self._test_id,
                subject,
                self._combination,
                self._classifier_name,
                self._normilizing,
                self._persentage,
                # configs["classifier"][CLS],
            ]
        )

        result.append(np.array(results).mean(axis=0))
        # result.append([np.array(CM_bd).mean(axis=0), np.array(CM_ud).mean(axis=0)])

        # _CNN_weights = 'imagenet'
        # _CNN_base_model = ""

        result.append(
            [
                self._KFold,
                self._p_training_samples,
                self._train_ratio,
                self._ratio,
                # pos_te_samples,
                # neg_te_samples,
                self._known_imposter,
                self._unknown_imposter,
                self._min_number_of_sample,
                self._number_of_unknown_imposter_samples,
            ]
        )

        return [val for sublist in result for val in sublist]

    def fold_calculating(self, feature_set_names: list, subject: int, x_train, x_test, U, train_index, val_index, fold):

        logger.info(f"\tFold number: {fold} out of {self._KFold} ({os.getpid()})")
        df_train = x_train.iloc[train_index, :]
        df_val = x_train.iloc[val_index, :]
        # breakpoint()

        # df_train = self.down_sampling_new(df_train, 2)
        
        df_train, df_val, df_test, df_test_U = self.scaler(df_train, df_val, x_test, U)

        df_train, df_val, df_test, df_test_U, num_pc = self.projector(feature_set_names, df_train, df_val, df_test, df_test_U, )
        results = self.ML_classifier(subject, x_train=df_train, x_val=df_val, x_test=df_test, x_test_U=df_test_U)

        results["num_pc"] = num_pc
        results.update({

            "training_samples": df_train.shape[0],
            "pos_training_samples": df_train['ID'].sum(),
            "validation_samples": 0,
            "pos_validation_samples": 0,
            "testing_samples": df_val.shape[0],
            "pos_testing_samples": df_val['ID'].sum(),
        })

        return results

    def collect_results(self, result: pd.DataFrame, pipeline_name: str) -> None:
        # result['pipeline'] = pipeline_name
        test = os.environ.get("SLURM_JOB_NAME", default=pipeline_name)
        excel_path = os.path.join(os.getcwd(), "results", f"Result__{test}.xlsx")

        if os.path.isfile(excel_path):
            Results_DF = pd.read_excel(excel_path, index_col=0)
        else:
            Results_DF = pd.DataFrame(columns=self._col)

        Results_DF = Results_DF.append(result)
        try:
            Results_DF.to_excel(excel_path)
        except Exception as e:
            logger.error(e)
            Results_DF.to_excel(excel_path[:-5] + str(self._test_id) + ".xlsx")

    def extracting_feature_set1(self, dataset_name: str) -> pd.DataFrame:
        GRFs, COPs, COAs, pre_images, labels = self.loading_pre_features(dataset_name)
        COA_handcrafted = self.loading_COA_handcrafted(COAs)
        COP_handcrafted = self.loading_COP_handcrafted(COPs)
        GRF_handcrafted = self.loading_GRF_handcrafted(GRFs)
        COA_WPT = self.loading_COA_WPT(COAs)
        COP_WPT = self.loading_COP_WPT(COPs)
        GRF_WPT = self.loading_GRF_WPT(GRFs)

        # deep_features_list = A.loading_deep_features_from_list((pre_images, labels), ['P100', 'P80'], 'resnet50.ResNet50')
        # image_from_list = A.loading_pre_image_from_list(pre_images, ['P80', 'P100'])
        # P70 = A.loading_pre_image(pre_images, 'P70')
        # P90 = A.loading_deep_features((pre_images, labels), 'P90', 'resnet50.ResNet50')

        feature_set_names = [
            "COP_handcrafted",
            "COPs",
            "COP_WPT",
            "GRF_handcrafted",
            "GRFs",
            "GRF_WPT",
        ]
        feature_set = []
        for i in feature_set_names:
            feature_set.append(eval(f"{i}"))

        return pd.concat(feature_set + [labels], axis=1), feature_set_names

print('Done')

Done


In [2]:
def optimizer_accross_subjects(Users, no_samples, classifier):
    setting = {
        "dataset_name": "casia",
        "_classifier_name": "knn",
        "_combination": True,
        "_CNN_weights": "imagenet",
        "_verbose": True,
        "_CNN_batch_size": 32,
        "_CNN_base_model": "",
        "_CNN_epochs": 500,
        "_CNN_optimizer": "adam",
        "_val_size": 0.2,
        "_min_number_of_sample": 30,
        "_known_imposter": 32,
        "_unknown_imposter": 32,
        "_number_of_unknown_imposter_samples": 1.0,  # Must be less than 1
        "_waveletname": "coif1",
        "_pywt_mode": "constant",
        "_wavelet_level": 4,
        "_p_training_samples": 11,
        "_train_ratio": 34,
        "_ratio": False,
        "_KNN_n_neighbors": 5,
        "_KNN_metric": "euclidean",
        "_KNN_weights": "uniform",
        "_SVM_kernel": "linear",
        "_KFold": 10,
        "_random_runs": 20,
        "_persentage": 0.95,
        "_normilizing": "z-score",
    }

    A = Pipeline(setting)

    A._known_imposter = Users
    A._unknown_imposter = 10
    A._classifier_name = classifier

    image_feature_name = ["P80", "P100" ]  
    dataset_name = "casia"

    GRFs, COPs, COAs, pre_images, labels = A.loading_pre_features(dataset_name)

   

    ####################################################################################################################
    # pipeline 1: P100 and P80
    image_from_list = A.loading_pre_image_from_list(pre_images, image_feature_name)
    feature_set_names = ["P80", "P100"]
    DF_feature_all = pd.concat([i for i in image_from_list] + [labels], axis=1)
    
    subjects, samples = np.unique(DF_feature_all["ID"].values, return_counts=True)

    ss = [a[0] for a in list(zip(subjects, samples)) if a[1] >= A._min_number_of_sample]

    known_imposter_list = ss[:A._known_imposter]
    unknown_imposter_list = ss[-A._unknown_imposter :]

    DF_unknown_imposter = DF_feature_all[ DF_feature_all["ID"].isin(unknown_imposter_list) ]
    DF_known_imposter = DF_feature_all[DF_feature_all["ID"].isin(known_imposter_list)]

    search = {
            'knn': {'n_neighbors': [1, 20]},
            'svm-linear': {'logC': [-4, 3]},
            'svm-rbf': {'logGamma': [-6, 0], 'logC': [-4, 3]},
            'svm-poly': {'logGamma': [2, 5], 'logC': [-4, 3], 'coef0': [0, 1]},
            'rf': {'n_estimators': [20, 120], 'max_features': [5, 25]},
            'if': {'n_estimators': [20, 120], 'max_features': [5, 25]},
            'ocsvm': { 'nu': [0, 1]},
            'svdd': {'nu': [0, 1], 'logGamma': [-6, 0]},
            'tm': None,
            'lda': None,
        }
        
    

    def performance(DF_known_imposter, DF_unknown_imposter,):
        def objective_func(
                    n_neighbors=None, metric=None, weights=None,
                    logC=None, logGamma=None, degree=None, coef0=None,
                    n_estimators=None, max_features=None,
                    nu=None):

            lst = []

            for idx, subject in enumerate(DF_known_imposter["ID"].unique()):

                non_targets = DF_known_imposter[DF_known_imposter["ID"]!=subject]
                non_targets = non_targets.groupby("ID", group_keys=False).apply(lambda x: x.sample( n=no_samples, replace=False, random_state=A._random_state))
                target = DF_known_imposter[DF_known_imposter["ID"]==subject]
                DF = pd.concat([target, non_targets], axis=0)

                X, U = A.binarize_labels(DF, DF_unknown_imposter, subject)
                
                x_train, x_test = model_selection.train_test_split(X, test_size=0.20, random_state=A._random_state, stratify=X.iloc[:, -1].values,)
                x_train, x_val = model_selection.train_test_split(x_train, test_size=0.20, random_state=A._random_state, stratify=x_train.iloc[:, -1].values)

                df_train, df_val, df_test, df_test_U = A.scaler(x_train, x_val, x_test, U)
                df_train, df_val, df_test, df_test_U, num_pc = A.projector(feature_set_names, df_train, df_val, df_test, df_test_U, )

                
                # param, info = A.subject_optimizer(df_train, 2, 30, search[A._classifier_name])
                
                if   A._classifier_name == 'knn':
                    if int(n_neighbors) < 1:
                        return 0
                    else:
                        classifier = knn( n_neighbors=int(n_neighbors))#, metric=metric, weights=weights, )
                        best_model = classifier.fit(df_train.iloc[:, :-1].values, df_train.iloc[:, -1].values)
                        y_pred_tr = best_model.predict_proba(df_train.iloc[:, :-1].values)[:, 1]

                        FRR_t, FAR_t = A.FXR_calculater(df_train.iloc[:, -1].values, y_pred_tr)
                        EER, t_idx = A.compute_eer(FRR_t, FAR_t)
                        TH = A._THRESHOLDs[t_idx]

                        y_pred = best_model.predict_proba(df_val.iloc[:, :-1].values)[:, 1]
                        
                        y_pred[y_pred >= TH] = 1
                        y_pred[y_pred < TH] = 0
                elif A._classifier_name == 'svm-linear':
                    classifier = svm.SVC(kernel='linear', probability=True, random_state=A._random_state, C=10 ** logC)
                    best_model = classifier.fit(df_train.iloc[:, :-1].values, df_train.iloc[:, -1].values)
                    y_pred_tr = best_model.predict_proba(df_train.iloc[:, :-1].values)[:, 1]

                    FRR_t, FAR_t = A.FXR_calculater(df_train.iloc[:, -1].values, y_pred_tr)
                    EER, t_idx = A.compute_eer(FRR_t, FAR_t)
                    TH = A._THRESHOLDs[t_idx]

                    y_pred = best_model.predict_proba(df_val.iloc[:, :-1].values)[:, 1]
                    
                    y_pred[y_pred >= TH] = 1
                    y_pred[y_pred < TH] = 0
                elif A._classifier_name == 'svm-poly':
                    classifier = svm.SVC(kernel='poly', probability=True, random_state=A._random_state , C=10 ** logC, degree=degree, coef0=coef0)
                    best_model = classifier.fit(df_train.iloc[:, :-1].values, df_train.iloc[:, -1].values)
                    y_pred_tr = best_model.predict_proba(df_train.iloc[:, :-1].values)[:, 1]

                    FRR_t, FAR_t = A.FXR_calculater(df_train.iloc[:, -1].values, y_pred_tr)
                    EER, t_idx = A.compute_eer(FRR_t, FAR_t)
                    TH = A._THRESHOLDs[t_idx]

                    y_pred = best_model.predict_proba(df_val.iloc[:, :-1].values)[:, 1]
                    
                    y_pred[y_pred >= TH] = 1
                    y_pred[y_pred < TH] = 0
                elif A._classifier_name == 'svm-rbf':
                    classifier = svm.SVC(kernel='rbf', probability=True, random_state=A._random_state, C=10 ** logC, gamma=10 ** logGamma)
                    best_model = classifier.fit(df_train.iloc[:, :-1].values, df_train.iloc[:, -1].values)
                    y_pred_tr = best_model.predict_proba(df_train.iloc[:, :-1].values)[:, 1]

                    FRR_t, FAR_t = A.FXR_calculater(df_train.iloc[:, -1].values, y_pred_tr)
                    EER, t_idx = A.compute_eer(FRR_t, FAR_t)
                    TH = A._THRESHOLDs[t_idx]

                    y_pred = best_model.predict_proba(df_val.iloc[:, :-1].values)[:, 1]
                    
                    y_pred[y_pred >= TH] = 1
                    y_pred[y_pred < TH] = 0
                elif A._classifier_name == "rf":
                    classifier = RandomForestClassifier(n_estimators=int(n_estimators), max_features=int(max_features))
                    best_model = classifier.fit(df_train.iloc[:, :-1].values, df_train.iloc[:, -1].values)
                    y_pred_tr = best_model.predict_proba(df_train.iloc[:, :-1].values)[:, 1]

                    FRR_t, FAR_t = A.FXR_calculater(df_train.iloc[:, -1].values, y_pred_tr)
                    EER, t_idx = A.compute_eer(FRR_t, FAR_t)
                    TH = A._THRESHOLDs[t_idx]

                    y_pred = best_model.predict_proba(df_val.iloc[:, :-1].values)[:, 1]
                    
                    y_pred[y_pred >= TH] = 1
                    y_pred[y_pred < TH] = 0
                elif A._classifier_name == "nb":
                    pass
                elif A._classifier_name == "if":
                    classifier = IsolationForest(n_estimators=int(n_estimators), max_features=int(max_features), random_state=A._random_state)
                    best_model = classifier.fit(df_train.iloc[:, :-1].values)
                    EER = 0
                    TH = 0
                    y_pred = best_model.predict(df_val.iloc[:, :-1].values)
                elif A._classifier_name == "ocsvm":
                    if (nu <= 0) or (nu > 1):
                        return 0
                    else:
                        classifier = OneClassSVM(kernel='linear', nu=nu)
                        best_model = classifier.fit(df_train.iloc[:, :-1].values)
                        EER = 0
                        TH = 0

                        y_pred = best_model.predict(df_val.iloc[:, :-1].values)
                        y_pred = 0.5-(y_pred/2)
                elif A._classifier_name == "svdd":
                    if (nu <= 0) or (nu > 1):
                        return 0
                    else:
                        classifier = OneClassSVM(kernel='rbf', nu=nu, gamma=10 ** logGamma)
                        best_model = classifier.fit(df_train.iloc[:, :-1].values)
                        EER = 0
                        TH = 0

                        y_pred = best_model.predict(df_val.iloc[:, :-1].values)
                        y_pred = 0.5-(y_pred/2)
                elif A._classifier_name == "lda":
                    pass
                elif A._classifier_name == "tm":
                    pass
                else:
                        raise(f'Unknown algorithm: {A._classifier_name}')

                lst.append(optunity.metrics.bacc(df_val.iloc[:, -1].values, y_pred, 1))
            
            
            logger.info(f"mean bacc: {np.mean(lst)}")
            return np.mean(lst)
        
        
        return objective_func

    objective_func = performance(DF_known_imposter, DF_unknown_imposter,)
    
    param = None
    pmap8 = optunity.parallel.create_pmap(16)
    if A._classifier_name in ['svdd', 'ocsvm', "knn", "svm-rbf", "svm-linear"]:
        solver = optunity.solvers.ParticleSwarm(num_particles=32, num_generations=10, **search[A._classifier_name])
        param, info = optunity.optimize(solver, objective_func, pmap=pmap8, maximize=True) # , pmap=pmap8
        print(optunity.call_log2dataframe(info.call_log))

    for idx, subject in enumerate(DF_known_imposter["ID"].unique()):

        logger.info(f"   Subject number: {idx} out of {len(known_imposter_list)} (subject ID is {subject})")


        X, U = A.binarize_labels(DF_known_imposter, DF_unknown_imposter, subject)
        x_train, x_test = model_selection.train_test_split(X, test_size=0.20, random_state=A._random_state, stratify=X.iloc[:, -1].values,)
        _, x_val = model_selection.train_test_split(x_train, test_size=0.20, random_state=A._random_state, stratify=x_train.iloc[:, -1].values)

        df_train, df_test, df_test_U = A.scaler(x_train, x_test, U)
        df_train, df_test, df_test_U, num_pc = A.projector(feature_set_names, df_train, df_test, df_test_U, )

        


        # df_train = self.down_sampling_new(df_train, 2)
        results = A.ML_classifier(subject, x_train=df_train, x_test=df_test, x_test_U=df_test_U, params=param)

        results["num_pc"] = num_pc
        results.update({
            "training_samples": df_train.shape[0],
            "pos_training_samples": df_train['ID'].sum(),
            "validation_samples": x_val.shape[0],
            "pos_validation_samples": x_val['ID'].sum(),
            "testing_samples": df_test.shape[0],
            "pos_testing_samples": df_test['ID'].sum(),
        })
        
 
        results.update( {
            "test_id": A._test_id,
            "subject": subject,
            "combination": A._combination,
            "classifier_name": A._classifier_name,
            "normilizing": A._normilizing,
            "persentage": A._persentage,
            "KFold": "-",
            "known_imposter": A._known_imposter,
            "unknown_imposter": A._unknown_imposter,
            "min_number_of_sample": A._min_number_of_sample,
            "param": param,
          
        })

        for i in results:
            try:
                res_dict[i].append(results[i])
            except UnboundLocalError:
                res_dict = {i: [] for i in results.keys()}
                res_dict[i].append(results[i])
    
    return pd.DataFrame.from_dict(res_dict)
   
print("Done")
# results1 = optimizer_accross_subjects(2, 30, "lda")

Done


In [3]:
p0 = [6, 41]
p1 = [5, 20]
# p2 = ['svdd', 'ocsvm', "knn", "svm-rbf", "svm-linear", "lda", "if", "rf", "tm", "nb"]
p2 = ["knn", "svm-rbf", "lda",]


space = list(product(p0, p1, p2))
space = space[1:]


results1 = optimizer_accross_subjects(6, 5, "knn")
path = os.path.join(os.getcwd(), "results", "accross_subj.xlsx")
results1.to_excel(path)

for idx, parameters in enumerate(space):

    logger.info(f"Starting [step {idx+1} out of {len(space)}], parameters: {parameters}")
    results = optimizer_accross_subjects(parameters[0], parameters[1], parameters[2])
    results1 = pd.concat([results1, results], axis=0)

    path = os.path.join(os.getcwd(), "results", "accross_subj.xlsx")
    results1.to_excel(path)
print('Done')

[38;5;39m[06/28/2022 08:39:42 AM ]-[38;5;226m[PreFeatures @731][0m[38;5;39m-[INFO][0m[31;1m all pre features were loaded!!![0m
[38;5;39m[06/28/2022 08:39:43 AM ]-[38;5;226m[PreFeatures @854][0m[38;5;39m-[INFO][0m[31;1mloading P80 features!!![0m
[38;5;39m[06/28/2022 08:39:44 AM ]-[38;5;226m[PreFeatures @854][0m[38;5;39m-[INFO][0m[31;1mloading P100 features!!![0m
[38;5;39m[06/28/2022 08:40:23 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 67.4074074074074[0m
[38;5;39m[06/28/2022 08:40:23 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 66.99074074074075[0m
[38;5;39m[06/28/2022 08:40:23 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 67.4074074074074[0m
[38;5;39m[06/28/2022 08:40:24 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 87.08333333333333[0m
[38;5;39m[06/28/2022 08:40:24 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1m

[38;5;39m[06/28/2022 08:42:34 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 84.58333333333333[0m
[38;5;39m[06/28/2022 08:42:34 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 70.0[0m
[38;5;39m[06/28/2022 08:43:12 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 67.4074074074074[0m
[38;5;39m[06/28/2022 08:43:12 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 70.0[0m
[38;5;39m[06/28/2022 08:43:12 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 88.75[0m
[38;5;39m[06/28/2022 08:43:13 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 84.58333333333333[0m
[38;5;39m[06/28/2022 08:43:13 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 66.99074074074075[0m
[38;5;39m[06/28/2022 08:43:17 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 87.08333333333333[0m

[38;5;39m[06/28/2022 08:45:23 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 70.41666666666667[0m
[38;5;39m[06/28/2022 08:46:02 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 88.75[0m
[38;5;39m[06/28/2022 08:46:03 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 84.58333333333333[0m
[38;5;39m[06/28/2022 08:46:04 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 67.4074074074074[0m
[38;5;39m[06/28/2022 08:46:04 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 67.4074074074074[0m
[38;5;39m[06/28/2022 08:46:06 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 65.83333333333333[0m
[38;5;39m[06/28/2022 08:46:06 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 84.16666666666667[0m
[38;5;39m[06/28/2022 08:46:06 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean ba

[38;5;39m[06/28/2022 08:48:48 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 70.0[0m
[38;5;39m[06/28/2022 08:48:48 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 67.91666666666667[0m
[38;5;39m[06/28/2022 08:48:51 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 88.75[0m
[38;5;39m[06/28/2022 08:48:52 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 87.08333333333333[0m
[38;5;39m[06/28/2022 08:48:52 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 84.16666666666667[0m
[38;5;39m[06/28/2022 08:48:52 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 87.08333333333333[0m
[38;5;39m[06/28/2022 08:48:53 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 84.58333333333333[0m
[38;5;39m[06/28/2022 08:48:53 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 88.75[

[38;5;39m[06/28/2022 08:51:39 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 70.0[0m
[38;5;39m[06/28/2022 08:51:40 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 84.58333333333333[0m
[38;5;39m[06/28/2022 08:51:40 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 84.16666666666667[0m
[38;5;39m[06/28/2022 08:51:40 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 70.41666666666667[0m
[38;5;39m[06/28/2022 08:51:41 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 84.16666666666667[0m
[38;5;39m[06/28/2022 08:51:41 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 88.75[0m
[38;5;39m[06/28/2022 08:51:42 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 87.08333333333333[0m
[38;5;39m[06/28/2022 08:51:42 AM ]-[38;5;226m[PreFeatures @212][0m[38;5;39m-[INFO][0m[31;1mmean bacc: 87.0833

     n_neighbors      value
0       6.603516  70.416667
1       8.978516  67.916667
2      18.478516  67.407407
3      13.728516  67.407407
4       4.228516  87.083333
..           ...        ...
315     3.419956  88.750000
316     2.992140  84.583333
317     1.698288  84.166667
318     1.719275  84.166667
319     3.465547  88.750000

[320 rows x 2 columns]


[38;5;39m[06/28/2022 08:53:49 AM ]-[38;5;226m[PreFeatures @229][0m[38;5;39m-[INFO][0m[31;1m   Subject number: 1 out of 6 (subject ID is 5.0)[0m
[38;5;39m[06/28/2022 08:53:51 AM ]-[38;5;226m[PreFeatures @229][0m[38;5;39m-[INFO][0m[31;1m   Subject number: 2 out of 6 (subject ID is 6.0)[0m
[38;5;39m[06/28/2022 08:53:52 AM ]-[38;5;226m[PreFeatures @229][0m[38;5;39m-[INFO][0m[31;1m   Subject number: 3 out of 6 (subject ID is 7.0)[0m
[38;5;39m[06/28/2022 08:53:54 AM ]-[38;5;226m[PreFeatures @229][0m[38;5;39m-[INFO][0m[31;1m   Subject number: 4 out of 6 (subject ID is 8.0)[0m
[38;5;39m[06/28/2022 08:53:56 AM ]-[38;5;226m[PreFeatures @229][0m[38;5;39m-[INFO][0m[31;1m   Subject number: 5 out of 6 (subject ID is 9.0)[0m
[38;5;39m[06/28/2022 08:54:07 AM ]-[38;5;226m[PreFeatures @17][0m[38;5;39m-[INFO][0m[31;1mStarting [step 1 out of 11], parameters: (6, 5, 'svm-rbf')[0m
[38;5;39m[06/28/2022 08:54:09 AM ]-[38;5;226m[PreFeatures @731][0m[38;5;39m-[INFO]

Traceback (most recent call last):
  File "/project/6010852/saeed67/master-project/env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3444, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_24913/575709898.py", line 18, in <module>
    results = optimizer_accross_subjects(parameters[0], parameters[1], parameters[2])
  File "/tmp/ipykernel_24913/4160882563.py", line 224, in optimizer_accross_subjects
    param, info = optunity.optimize(solver, objective_func, pmap=pmap8, maximize=True) # , pmap=pmap8
  File "/project/6010852/saeed67/master-project/env/lib/python3.8/site-packages/optunity/api.py", line 245, in optimize
    solution, report = solver.optimize(f, maximize, pmap=pmap)
  File "/project/6010852/saeed67/master-project/env/lib/python3.8/site-packages/optunity/solvers/ParticleSwarm.py", line 270, in optimize
    fitnesses = pmap(evaluate, list(map(self.particle2dict, pop)))
  File "/project/6010852/saeed67/master-project

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/project/6010852/saeed67/master-project/env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3444, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_24913/575709898.py", line 18, in <module>
    results = optimizer_accross_subjects(parameters[0], parameters[1], parameters[2])
  File "/tmp/ipykernel_24913/4160882563.py", line 224, in optimizer_accross_subjects
    param, info = optunity.optimize(solver, objective_func, pmap=pmap8, maximize=True) # , pmap=pmap8
  File "/project/6010852/saeed67/master-project/env/lib/python3.8/site-packages/optunity/api.py", line 245, in optimize
    solution, report = solver.optimize(f, maximize, pmap=pmap)
  File "/project/6010852/saeed67/master-project/env/lib/python3.8/site-packages/optunity/solvers/ParticleSwarm.py", line 270, in optimize
    fitnesses = pmap(evaluate, list(map(self.particle2dict, pop)))
  File "/project/6010852/saeed67/master-project

TypeError: object of type 'NoneType' has no len()

In [None]:
results1 = optimizer_accross_subjects(6, 20, "knn")
path = os.path.join(os.getcwd(), "results", "accross_subj_6_20.xlsx")
results1.to_excel(path)


[38;5;39m[06/28/2022 09:46:42 AM ]-[38;5;226m[PreFeatures @731][0m[38;5;39m-[INFO][0m[31;1m all pre features were loaded!!![0m
[38;5;39m[06/28/2022 09:46:43 AM ]-[38;5;226m[PreFeatures @854][0m[38;5;39m-[INFO][0m[31;1mloading P80 features!!![0m
[38;5;39m[06/28/2022 09:46:44 AM ]-[38;5;226m[PreFeatures @854][0m[38;5;39m-[INFO][0m[31;1mloading P100 features!!![0m


In [None]:
print('Done')