# 0 - Information

# 1 - Packages

## 1.1 - Import of classical packages

In [1]:
# Math packages
import numpy as np

# Progress bar
from tqdm import tqdm

## 1.2 - Import of personal packages

In [2]:
# Import different tools functions
from Modules.Utils.Dropout import *
from Modules.Utils.ImportData import *
from Modules.Utils.Normalisation import *
from Modules.Utils.Preprocessing import *
from Modules.Utils.Predictions import *
from Modules.Utils.Transform import *

# Import functions for the data augmentation
from Modules.DataAugmentations.NoAugmentation import *
from Modules.DataAugmentations.ComplementarySequences import *
from Modules.DataAugmentations.PertubatedSequences import *

# Import functions for the embedding
from Modules.Embeddings.NoEmbedding import *
from Modules.Embeddings.SpectrumEmbedding import *
from Modules.Embeddings.DimismatchEmbedding import *
from Modules.Embeddings.MotifEmbedding import *
from Modules.Embeddings.WeightedDegreeEmbedding import *
from Modules.Embeddings.HotEncodingEmbedding import *
from Modules.Embeddings.FiguresEmbedding import *
from Modules.Embeddings.TraidEmbedding import *
# from Modules.Embeddings.HMMEmbedding import *

# Import functions for the selection of the model
from Modules.ModelSelection.CrossValidation import *
from Modules.ModelSelection.GridSearch import *

# Import functions for the kernels
from Modules.Kernels.LinearKernel import *
from Modules.Kernels.PolyKernel import *
from Modules.Kernels.DimismatchPolyKernel import *
from Modules.Kernels.CKN import *
from Modules.Kernels.GaussianKernel import *
from Modules.Kernels.SpectrumKernel import *
from Modules.Kernels.HMM import *

# Import function of model
from Modules.Models.KernelLogisticRegression import *
from Modules.Models.KernelSVM import *

# 2 - Data Import

In [3]:
# Extraction of the dataset
df_mat_dict = ImportData("./Data/Optionnal/", "./Data/", suffix="_mat100")
df_dict = ImportData("./Data/", "./Data/", header=0, sep=",")

# Display one of the datasets extracted (Xtr0_mat100)
display(df_dict[0][1].head())

Unnamed: 0,Id,seq
0,0,TTGACCGGAGGGATGAAAACAGTCTAAGACAGATTTATGGCGATAG...
1,1,TGTTGCTGGTGTGTAGAAACACTACTGATTTTTGTATCCAGCCACA...
2,2,TGTCTTAAACTTTACAGCTTTTTCATACAGTCTGTGTAGTACTAGA...
3,3,CAGGGAAAGGTTTGAAACGGAACGTAACAGCCCACGTTTTGCCAAA...
4,4,CCACCACCACTCCTGGCTAATTTTTGTATTTTTGGTAGAGACAGGG...


# 3 - Determining Best Model

In [65]:
a = np.arange(9, dtype=np.uint8).reshape((3, 3))
b = np.random.randint(0, 100, size=(3, 3), dtype=np.uint8)
result = np.zeros((3, 3))
np.dot(a, b, result)

ValueError: output array is not acceptable (must have the right datatype, number of dimensions, and be a C-Array)

In [63]:
import numpy as np
from numba import njit


def PolyKernel(X, Y, k=2, add_ones=False):
    """Compute the K matrix in the case of the linear kernel."""

    # Shape of X
    n, _ = np.shape(X)
    d, _ = np.shape(Y)
    

    # Convert X and Y
    X = np.array(X, dtype=np.float)
    Y = np.array(Y, dtype=np.float)

    @njit
    def subPolyKernel(X, Y):
        """Apply the dot product to X and Y."""

        return np.dot(X, Y.T)

    # Count the dot product
    result = subPolyKernel(X, Y)

    # Test if add ones
    if add_ones:
        # Compute results
        result = (np.array(result) + np.ones((n, d))) ** k

    else:
        # Compute results
        result = np.array(result) ** k

    return result


In [64]:
# Hyperparameters for DataAugmentation
hyperparameters_data_augmentation = {
                                        NoAugmentation: {},
#                                         PertubatedSequences: {"n": [2], "add_compl": [True, False]}
                                    }

# Hyperparameters for the embedding
hyperparameters_embedding = {
#                               NoEmbedding: {}
                              SpectrumEmbedding: {"d_l": [[5, 7, 12]]}
#                               FiguresEmbedding: {},
#                               DimismatchEmbedding: {"d": [[5, 6, 7]]}
                            }

# Hyperparameters of the kernels
hyperparameters_kernels = {
#     SpectrumKernel: {"d_l": [[5, 7, 12]]}
    PolyKernel: {
        "k": [2],
        "add_ones": [True]
    }
#     DimismatchPolyKernel: {
#         "m" : [3],
#         "k": [2],
#         "add_ones": [True],
#         "d_l": [[5, 6, 7]]
#     }
#    GaussianKernelBIS: {"sigma": [None, 10000, 10]}
}

# Hyper-parameters of the models
hyperparamters_models = {
                         KernelLogisticRegression: 
                             {"lamda": [10e-12], 
                              "informations": [False],
                              "preprocessing": [None],
                              "normalisation": [None],
                              "max_iter": [15],
                             },
#                          KernelSVM:
#                              {"lamda": [1, 0.01],
#                               "max_iter": [10e4],
#                               "tol": [10e-6],
#                               "informations": [False],
#                               "preprocessing": [Preprocessing, None]}
                        }



# GridSearch
[best_score, best_parameters_names,
best_parameters_values] = GridSearch(df_dict,
                                     hyperparameters_data_augmentation,
                                     hyperparameters_embedding,
                                     hyperparamters_models,
                                     hyperparameters_kernels,
                                     cv=5)

# Display result
print("Best Score: ", best_score)
print("Best Parameters: ", best_parameters_names)

  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s][A

  0%|          | 0/172281 [00:00<?, ?it/s][A[A

  0%|          | 37/172281 [00:00<08:03, 356.56it/s][A[A

  0%|          | 72/172281 [00:00<08:12, 349.78it/s][A[A

  0%|          | 105/172281 [00:00<08:22, 342.93it/s][A[A

  0%|          | 139/172281 [00:00<08:26, 340.07it/s][A[A

  0%|          | 173/172281 [00:00<08:29, 337.82it/s][A[A

  0%|          | 205/172281 [00:00<08:42, 329.28it/s][A[A

  0%|          | 239/172281 [00:00<08:40, 330.65it/s][A[A

  0%|          | 275/172281 [00:00<08:29, 337.35it/s][A[A

  0%|          | 309/172281 [00:00<08:28, 338.03it/s][A[A

  0%|          | 348/172281 [00:01<08:10, 350.20it/s][A[A

  0%|          | 385/172281 [00:01<08:05, 354.26it/s][A[A

  0%|          | 420/172281 [00:01<08:10, 350.73it/s][A[A

  0%|          | 455/172281 [00:01<09:04, 315.36it/s][A[A

  0%|          | 487/172281 [00:01<09:56, 288.08it/s][A[A

  0%|          | 5

  6%|▌         | 10623/172281 [00:27<06:03, 444.85it/s][A[A

  6%|▌         | 10668/172281 [00:27<06:07, 439.96it/s][A[A

  6%|▌         | 10713/172281 [00:27<06:12, 434.21it/s][A[A

  6%|▌         | 10757/172281 [00:27<06:11, 434.34it/s][A[A

  6%|▋         | 10801/172281 [00:27<06:13, 431.94it/s][A[A

  6%|▋         | 10845/172281 [00:27<06:12, 433.77it/s][A[A

  6%|▋         | 10889/172281 [00:28<06:11, 434.56it/s][A[A

  6%|▋         | 10934/172281 [00:28<06:08, 438.23it/s][A[A

  6%|▋         | 10978/172281 [00:28<06:08, 437.82it/s][A[A

  6%|▋         | 11022/172281 [00:28<06:08, 437.53it/s][A[A

  6%|▋         | 11066/172281 [00:28<06:10, 435.56it/s][A[A

  6%|▋         | 11110/172281 [00:28<06:09, 436.61it/s][A[A

  6%|▋         | 11154/172281 [00:28<06:11, 434.24it/s][A[A

  6%|▋         | 11198/172281 [00:28<06:10, 434.85it/s][A[A

  7%|▋         | 11242/172281 [00:28<06:12, 432.88it/s][A[A

  7%|▋         | 11286/172281 [00:29<06:12, 432.64it/s]

 13%|█▎        | 22108/172281 [00:54<05:43, 437.51it/s][A[A

 13%|█▎        | 22152/172281 [00:54<05:43, 436.99it/s][A[A

 13%|█▎        | 22197/172281 [00:54<05:42, 438.56it/s][A[A

 13%|█▎        | 22242/172281 [00:54<05:40, 441.00it/s][A[A

 13%|█▎        | 22287/172281 [00:54<05:39, 442.29it/s][A[A

 13%|█▎        | 22332/172281 [00:54<05:39, 441.21it/s][A[A

 13%|█▎        | 22377/172281 [00:54<05:41, 439.02it/s][A[A

 13%|█▎        | 22421/172281 [00:54<05:42, 436.92it/s][A[A

 13%|█▎        | 22465/172281 [00:54<05:42, 437.38it/s][A[A

 13%|█▎        | 22510/172281 [00:54<05:40, 439.80it/s][A[A

 13%|█▎        | 22554/172281 [00:55<05:41, 438.98it/s][A[A

 13%|█▎        | 22599/172281 [00:55<05:39, 440.56it/s][A[A

 13%|█▎        | 22644/172281 [00:55<05:38, 441.58it/s][A[A

 13%|█▎        | 22689/172281 [00:55<05:41, 438.42it/s][A[A

 13%|█▎        | 22733/172281 [00:55<05:44, 434.54it/s][A[A

 13%|█▎        | 22777/172281 [00:55<05:44, 433.36it/s]

 20%|█▉        | 33663/172281 [01:20<05:16, 437.88it/s][A[A

 20%|█▉        | 33707/172281 [01:20<05:18, 435.66it/s][A[A

 20%|█▉        | 33751/172281 [01:20<05:17, 435.87it/s][A[A

 20%|█▉        | 33795/172281 [01:20<05:18, 435.13it/s][A[A

 20%|█▉        | 33840/172281 [01:20<05:15, 439.04it/s][A[A

 20%|█▉        | 33885/172281 [01:21<05:13, 441.59it/s][A[A

 20%|█▉        | 33930/172281 [01:21<05:14, 439.72it/s][A[A

 20%|█▉        | 33974/172281 [01:21<05:15, 438.31it/s][A[A

 20%|█▉        | 34018/172281 [01:21<05:17, 435.63it/s][A[A

 20%|█▉        | 34063/172281 [01:21<05:15, 438.24it/s][A[A

 20%|█▉        | 34108/172281 [01:21<05:13, 440.70it/s][A[A

 20%|█▉        | 34153/172281 [01:21<05:15, 437.80it/s][A[A

 20%|█▉        | 34197/172281 [01:21<05:19, 431.96it/s][A[A

 20%|█▉        | 34241/172281 [01:21<05:19, 431.84it/s][A[A

 20%|█▉        | 34286/172281 [01:21<05:16, 436.29it/s][A[A

 20%|█▉        | 34331/172281 [01:22<05:13, 439.90it/s]

 26%|██▌       | 44937/172281 [01:47<05:15, 403.46it/s][A[A

 26%|██▌       | 44978/172281 [01:47<05:34, 380.89it/s][A[A

 26%|██▌       | 45017/172281 [01:47<05:45, 368.31it/s][A[A

 26%|██▌       | 45058/172281 [01:47<05:35, 378.99it/s][A[A

 26%|██▌       | 45103/172281 [01:47<05:20, 397.05it/s][A[A

 26%|██▌       | 45148/172281 [01:48<05:09, 410.11it/s][A[A

 26%|██▌       | 45191/172281 [01:48<05:05, 415.37it/s][A[A

 26%|██▋       | 45233/172281 [01:48<05:06, 414.90it/s][A[A

 26%|██▋       | 45275/172281 [01:48<05:07, 413.44it/s][A[A

 26%|██▋       | 45318/172281 [01:48<05:04, 417.22it/s][A[A

 26%|██▋       | 45363/172281 [01:48<04:58, 424.96it/s][A[A

 26%|██▋       | 45406/172281 [01:48<05:16, 400.54it/s][A[A

 26%|██▋       | 45450/172281 [01:48<05:08, 411.26it/s][A[A

 26%|██▋       | 45492/172281 [01:48<05:06, 413.49it/s][A[A

 26%|██▋       | 45537/172281 [01:49<05:00, 422.31it/s][A[A

 26%|██▋       | 45580/172281 [01:49<05:09, 408.73it/s]

 32%|███▏      | 54978/172281 [02:14<04:36, 424.21it/s][A[A

 32%|███▏      | 55021/172281 [02:14<04:37, 422.27it/s][A[A

 32%|███▏      | 55064/172281 [02:15<04:39, 418.98it/s][A[A

 32%|███▏      | 55106/172281 [02:15<04:43, 413.40it/s][A[A

 32%|███▏      | 55149/172281 [02:15<04:40, 416.86it/s][A[A

 32%|███▏      | 55191/172281 [02:15<04:40, 417.65it/s][A[A

 32%|███▏      | 55233/172281 [02:15<04:46, 408.29it/s][A[A

 32%|███▏      | 55274/172281 [02:15<04:51, 400.85it/s][A[A

 32%|███▏      | 55316/172281 [02:15<04:48, 405.42it/s][A[A

 32%|███▏      | 55359/172281 [02:15<04:44, 411.14it/s][A[A

 32%|███▏      | 55402/172281 [02:15<04:42, 414.32it/s][A[A

 32%|███▏      | 55446/172281 [02:16<04:37, 421.63it/s][A[A

 32%|███▏      | 55490/172281 [02:16<04:35, 424.16it/s][A[A

 32%|███▏      | 55533/172281 [02:16<04:39, 418.31it/s][A[A

 32%|███▏      | 55575/172281 [02:16<04:47, 406.20it/s][A[A

 32%|███▏      | 55616/172281 [02:16<04:55, 394.82it/s]

 38%|███▊      | 66321/172281 [02:41<04:09, 425.50it/s][A[A

 39%|███▊      | 66364/172281 [02:41<04:19, 407.72it/s][A[A

 39%|███▊      | 66407/172281 [02:41<04:15, 413.58it/s][A[A

 39%|███▊      | 66451/172281 [02:41<04:12, 419.13it/s][A[A

 39%|███▊      | 66494/172281 [02:42<04:11, 420.97it/s][A[A

 39%|███▊      | 66538/172281 [02:42<04:08, 426.07it/s][A[A

 39%|███▊      | 66582/172281 [02:42<04:07, 427.52it/s][A[A

 39%|███▊      | 66626/172281 [02:42<04:06, 428.37it/s][A[A

 39%|███▊      | 66671/172281 [02:42<04:04, 431.78it/s][A[A

 39%|███▊      | 66715/172281 [02:42<04:03, 432.73it/s][A[A

 39%|███▉      | 66759/172281 [02:42<04:03, 433.89it/s][A[A

 39%|███▉      | 66803/172281 [02:42<04:04, 430.56it/s][A[A

 39%|███▉      | 66847/172281 [02:42<04:04, 431.23it/s][A[A

 39%|███▉      | 66891/172281 [02:42<04:04, 430.22it/s][A[A

 39%|███▉      | 66935/172281 [02:43<04:05, 429.88it/s][A[A

 39%|███▉      | 66978/172281 [02:43<04:05, 429.38it/s]

 45%|████▌     | 77677/172281 [03:08<03:39, 431.22it/s][A[A

 45%|████▌     | 77721/172281 [03:08<03:41, 427.12it/s][A[A

 45%|████▌     | 77765/172281 [03:08<03:39, 429.76it/s][A[A

 45%|████▌     | 77808/172281 [03:08<03:40, 428.39it/s][A[A

 45%|████▌     | 77851/172281 [03:08<03:40, 427.94it/s][A[A

 45%|████▌     | 77894/172281 [03:08<03:41, 426.34it/s][A[A

 45%|████▌     | 77938/172281 [03:08<03:40, 427.72it/s][A[A

 45%|████▌     | 77981/172281 [03:09<03:40, 427.18it/s][A[A

 45%|████▌     | 78024/172281 [03:09<03:40, 426.75it/s][A[A

 45%|████▌     | 78067/172281 [03:09<03:40, 426.70it/s][A[A

 45%|████▌     | 78110/172281 [03:09<03:59, 393.32it/s][A[A

 45%|████▌     | 78150/172281 [03:09<04:21, 359.88it/s][A[A

 45%|████▌     | 78187/172281 [03:09<04:28, 350.00it/s][A[A

 45%|████▌     | 78231/172281 [03:09<04:12, 371.75it/s][A[A

 45%|████▌     | 78274/172281 [03:09<04:03, 385.33it/s][A[A

 45%|████▌     | 78317/172281 [03:09<03:57, 395.87it/s]

 52%|█████▏    | 89120/172281 [03:34<03:14, 428.33it/s][A[A

 52%|█████▏    | 89163/172281 [03:35<03:25, 404.52it/s][A[A

 52%|█████▏    | 89204/172281 [03:35<03:31, 393.48it/s][A[A

 52%|█████▏    | 89248/172281 [03:35<03:24, 405.53it/s][A[A

 52%|█████▏    | 89292/172281 [03:35<03:20, 413.42it/s][A[A

 52%|█████▏    | 89336/172281 [03:35<03:18, 418.38it/s][A[A

 52%|█████▏    | 89379/172281 [03:35<03:16, 420.86it/s][A[A

 52%|█████▏    | 89423/172281 [03:35<03:14, 426.32it/s][A[A

 52%|█████▏    | 89468/172281 [03:35<03:11, 431.96it/s][A[A

 52%|█████▏    | 89513/172281 [03:35<03:09, 435.74it/s][A[A

 52%|█████▏    | 89558/172281 [03:36<03:09, 437.57it/s][A[A

 52%|█████▏    | 89603/172281 [03:36<03:07, 440.29it/s][A[A

 52%|█████▏    | 89648/172281 [03:36<03:07, 440.31it/s][A[A

 52%|█████▏    | 89693/172281 [03:36<03:07, 441.29it/s][A[A

 52%|█████▏    | 89738/172281 [03:36<03:09, 434.73it/s][A[A

 52%|█████▏    | 89782/172281 [03:36<03:09, 434.48it/s]

 58%|█████▊    | 99977/172281 [04:02<04:06, 292.89it/s][A[A

 58%|█████▊    | 100009/172281 [04:02<04:01, 299.30it/s][A[A

 58%|█████▊    | 100043/172281 [04:02<03:53, 309.49it/s][A[A

 58%|█████▊    | 100076/172281 [04:02<03:49, 315.28it/s][A[A

 58%|█████▊    | 100116/172281 [04:02<03:34, 336.16it/s][A[A

 58%|█████▊    | 100156/172281 [04:03<03:24, 351.99it/s][A[A

 58%|█████▊    | 100197/172281 [04:03<03:16, 366.73it/s][A[A

 58%|█████▊    | 100235/172281 [04:03<03:33, 337.86it/s][A[A

 58%|█████▊    | 100270/172281 [04:03<03:39, 327.36it/s][A[A

 58%|█████▊    | 100304/172281 [04:03<03:51, 310.68it/s][A[A

 58%|█████▊    | 100336/172281 [04:03<04:11, 285.63it/s][A[A

 58%|█████▊    | 100366/172281 [04:03<04:08, 289.17it/s][A[A

 58%|█████▊    | 100396/172281 [04:03<04:06, 291.62it/s][A[A

 58%|█████▊    | 100431/172281 [04:03<03:54, 306.38it/s][A[A

 58%|█████▊    | 100471/172281 [04:04<03:39, 327.46it/s][A[A

 58%|█████▊    | 100505/172281 [04:04<04:

 64%|██████▍   | 110863/172281 [04:29<02:22, 429.58it/s][A[A

 64%|██████▍   | 110907/172281 [04:29<02:22, 431.01it/s][A[A

 64%|██████▍   | 110951/172281 [04:29<02:21, 432.57it/s][A[A

 64%|██████▍   | 110995/172281 [04:29<02:21, 431.67it/s][A[A

 64%|██████▍   | 111040/172281 [04:29<02:20, 434.76it/s][A[A

 64%|██████▍   | 111084/172281 [04:29<02:21, 432.32it/s][A[A

 65%|██████▍   | 111128/172281 [04:29<02:21, 433.43it/s][A[A

 65%|██████▍   | 111173/172281 [04:29<02:20, 435.62it/s][A[A

 65%|██████▍   | 111217/172281 [04:29<02:20, 435.26it/s][A[A

 65%|██████▍   | 111261/172281 [04:30<02:20, 433.65it/s][A[A

 65%|██████▍   | 111305/172281 [04:30<02:20, 433.75it/s][A[A

 65%|██████▍   | 111349/172281 [04:30<02:20, 433.89it/s][A[A

 65%|██████▍   | 111393/172281 [04:30<02:20, 431.98it/s][A[A

 65%|██████▍   | 111437/172281 [04:30<02:20, 432.35it/s][A[A

 65%|██████▍   | 111481/172281 [04:30<02:21, 430.61it/s][A[A

 65%|██████▍   | 111525/172281 [04:30<02

 71%|███████   | 121746/172281 [04:55<02:03, 410.81it/s][A[A

 71%|███████   | 121789/172281 [04:55<02:01, 414.50it/s][A[A

 71%|███████   | 121831/172281 [04:56<02:01, 414.07it/s][A[A

 71%|███████   | 121873/172281 [04:56<02:01, 415.41it/s][A[A

 71%|███████   | 121915/172281 [04:56<02:01, 416.10it/s][A[A

 71%|███████   | 121957/172281 [04:56<02:10, 384.56it/s][A[A

 71%|███████   | 121996/172281 [04:56<02:21, 356.24it/s][A[A

 71%|███████   | 122038/172281 [04:56<02:15, 371.78it/s][A[A

 71%|███████   | 122081/172281 [04:56<02:09, 387.16it/s][A[A

 71%|███████   | 122123/172281 [04:56<02:06, 395.71it/s][A[A

 71%|███████   | 122165/172281 [04:56<02:04, 402.09it/s][A[A

 71%|███████   | 122208/172281 [04:57<02:02, 407.93it/s][A[A

 71%|███████   | 122251/172281 [04:57<02:01, 411.82it/s][A[A

 71%|███████   | 122294/172281 [04:57<02:00, 414.82it/s][A[A

 71%|███████   | 122336/172281 [04:57<02:00, 415.44it/s][A[A

 71%|███████   | 122378/172281 [04:57<02

 77%|███████▋  | 132688/172281 [05:22<01:52, 352.83it/s][A[A

 77%|███████▋  | 132730/172281 [05:22<01:47, 369.11it/s][A[A

 77%|███████▋  | 132772/172281 [05:22<01:43, 382.69it/s][A[A

 77%|███████▋  | 132815/172281 [05:22<01:39, 394.76it/s][A[A

 77%|███████▋  | 132858/172281 [05:22<01:37, 404.15it/s][A[A

 77%|███████▋  | 132901/172281 [05:22<01:35, 411.33it/s][A[A

 77%|███████▋  | 132944/172281 [05:22<01:34, 416.08it/s][A[A

 77%|███████▋  | 132988/172281 [05:23<01:33, 420.85it/s][A[A

 77%|███████▋  | 133032/172281 [05:23<01:32, 424.30it/s][A[A

 77%|███████▋  | 133076/172281 [05:23<01:31, 428.54it/s][A[A

 77%|███████▋  | 133120/172281 [05:23<01:30, 430.97it/s][A[A

 77%|███████▋  | 133164/172281 [05:23<01:31, 428.49it/s][A[A

 77%|███████▋  | 133208/172281 [05:23<01:30, 431.26it/s][A[A

 77%|███████▋  | 133252/172281 [05:23<01:30, 430.26it/s][A[A

 77%|███████▋  | 133296/172281 [05:23<01:30, 430.00it/s][A[A

 77%|███████▋  | 133340/172281 [05:23<01

 84%|████████▎ | 143963/172281 [05:48<01:05, 433.08it/s][A[A

 84%|████████▎ | 144007/172281 [05:48<01:05, 433.66it/s][A[A

 84%|████████▎ | 144051/172281 [05:48<01:06, 426.31it/s][A[A

 84%|████████▎ | 144096/172281 [05:48<01:05, 431.80it/s][A[A

 84%|████████▎ | 144140/172281 [05:48<01:05, 431.59it/s][A[A

 84%|████████▎ | 144184/172281 [05:48<01:05, 430.65it/s][A[A

 84%|████████▎ | 144228/172281 [05:49<01:05, 430.59it/s][A[A

 84%|████████▎ | 144272/172281 [05:49<01:04, 431.14it/s][A[A

 84%|████████▍ | 144316/172281 [05:49<01:04, 430.60it/s][A[A

 84%|████████▍ | 144360/172281 [05:49<01:04, 429.86it/s][A[A

 84%|████████▍ | 144403/172281 [05:49<01:05, 427.70it/s][A[A

 84%|████████▍ | 144446/172281 [05:49<01:05, 426.95it/s][A[A

 84%|████████▍ | 144490/172281 [05:49<01:04, 428.15it/s][A[A

 84%|████████▍ | 144535/172281 [05:49<01:04, 431.90it/s][A[A

 84%|████████▍ | 144579/172281 [05:49<01:03, 433.73it/s][A[A

 84%|████████▍ | 144623/172281 [05:50<01

 90%|█████████ | 155155/172281 [06:14<00:40, 426.04it/s][A[A

 90%|█████████ | 155199/172281 [06:14<00:39, 428.89it/s][A[A

 90%|█████████ | 155242/172281 [06:14<00:40, 422.38it/s][A[A

 90%|█████████ | 155285/172281 [06:15<00:41, 408.65it/s][A[A

 90%|█████████ | 155327/172281 [06:15<00:41, 410.03it/s][A[A

 90%|█████████ | 155371/172281 [06:15<00:40, 418.54it/s][A[A

 90%|█████████ | 155414/172281 [06:15<00:40, 420.12it/s][A[A

 90%|█████████ | 155457/172281 [06:15<00:41, 407.26it/s][A[A

 90%|█████████ | 155500/172281 [06:15<00:40, 412.52it/s][A[A

 90%|█████████ | 155542/172281 [06:15<00:42, 394.40it/s][A[A

 90%|█████████ | 155582/172281 [06:15<00:42, 395.52it/s][A[A

 90%|█████████ | 155624/172281 [06:15<00:41, 401.44it/s][A[A

 90%|█████████ | 155666/172281 [06:15<00:41, 401.25it/s][A[A

 90%|█████████ | 155707/172281 [06:16<00:41, 397.30it/s][A[A

 90%|█████████ | 155751/172281 [06:16<00:40, 407.87it/s][A[A

 90%|█████████ | 155796/172281 [06:16<00

 96%|█████████▋| 165947/172281 [06:41<00:14, 435.12it/s][A[A

 96%|█████████▋| 165991/172281 [06:41<00:14, 433.87it/s][A[A

 96%|█████████▋| 166035/172281 [06:41<00:14, 431.38it/s][A[A

 96%|█████████▋| 166079/172281 [06:41<00:14, 429.97it/s][A[A

 96%|█████████▋| 166123/172281 [06:41<00:14, 432.47it/s][A[A

 96%|█████████▋| 166167/172281 [06:42<00:14, 434.39it/s][A[A

 96%|█████████▋| 166211/172281 [06:42<00:13, 435.78it/s][A[A

 97%|█████████▋| 166255/172281 [06:42<00:13, 436.29it/s][A[A

 97%|█████████▋| 166299/172281 [06:42<00:13, 436.14it/s][A[A

 97%|█████████▋| 166343/172281 [06:42<00:13, 435.53it/s][A[A

 97%|█████████▋| 166387/172281 [06:42<00:13, 428.37it/s][A[A

 97%|█████████▋| 166430/172281 [06:42<00:13, 427.03it/s][A[A

 97%|█████████▋| 166474/172281 [06:42<00:13, 428.35it/s][A[A

 97%|█████████▋| 166518/172281 [06:42<00:13, 430.63it/s][A[A

 97%|█████████▋| 166562/172281 [06:43<00:13, 427.16it/s][A[A

 97%|█████████▋| 166606/172281 [06:43<00

  3%|▎         | 4775/169849 [00:12<06:46, 405.93it/s][A[A

  3%|▎         | 4817/169849 [00:12<06:43, 409.45it/s][A[A

  3%|▎         | 4860/169849 [00:12<06:37, 414.69it/s][A[A

  3%|▎         | 4903/169849 [00:12<06:34, 417.88it/s][A[A

  3%|▎         | 4948/169849 [00:12<06:28, 424.35it/s][A[A

  3%|▎         | 4992/169849 [00:12<06:26, 426.80it/s][A[A

  3%|▎         | 5035/169849 [00:12<06:26, 426.79it/s][A[A

  3%|▎         | 5079/169849 [00:12<06:25, 427.93it/s][A[A

  3%|▎         | 5123/169849 [00:12<06:23, 429.92it/s][A[A

  3%|▎         | 5167/169849 [00:12<06:23, 429.75it/s][A[A

  3%|▎         | 5210/169849 [00:13<06:23, 429.27it/s][A[A

  3%|▎         | 5253/169849 [00:13<06:29, 423.02it/s][A[A

  3%|▎         | 5296/169849 [00:13<06:31, 420.78it/s][A[A

  3%|▎         | 5339/169849 [00:13<06:31, 419.90it/s][A[A

  3%|▎         | 5382/169849 [00:13<06:39, 411.79it/s][A[A

  3%|▎         | 5424/169849 [00:13<07:10, 382.37it/s][A[A

  3%|▎  

  9%|▉         | 16043/169849 [00:39<05:59, 427.72it/s][A[A

  9%|▉         | 16087/169849 [00:39<05:57, 430.45it/s][A[A

  9%|▉         | 16131/169849 [00:39<05:58, 428.51it/s][A[A

 10%|▉         | 16175/169849 [00:39<05:57, 430.42it/s][A[A

 10%|▉         | 16219/169849 [00:39<05:57, 430.16it/s][A[A

 10%|▉         | 16263/169849 [00:39<05:56, 430.29it/s][A[A

 10%|▉         | 16307/169849 [00:39<05:57, 429.60it/s][A[A

 10%|▉         | 16350/169849 [00:39<05:59, 427.57it/s][A[A

 10%|▉         | 16393/169849 [00:39<05:59, 426.78it/s][A[A

 10%|▉         | 16436/169849 [00:39<06:03, 422.49it/s][A[A

 10%|▉         | 16479/169849 [00:40<06:04, 420.69it/s][A[A

 10%|▉         | 16522/169849 [00:40<06:04, 420.54it/s][A[A

 10%|▉         | 16565/169849 [00:40<06:07, 417.42it/s][A[A

 10%|▉         | 16609/169849 [00:40<06:02, 422.53it/s][A[A

 10%|▉         | 16653/169849 [00:40<05:59, 426.43it/s][A[A

 10%|▉         | 16696/169849 [00:40<05:58, 426.86it/s]

 16%|█▌        | 27316/169849 [01:05<05:24, 438.63it/s][A[A

 16%|█▌        | 27361/169849 [01:06<05:24, 439.62it/s][A[A

 16%|█▌        | 27405/169849 [01:06<05:24, 439.49it/s][A[A

 16%|█▌        | 27449/169849 [01:06<05:24, 438.56it/s][A[A

 16%|█▌        | 27493/169849 [01:06<05:28, 433.85it/s][A[A

 16%|█▌        | 27537/169849 [01:06<05:28, 433.21it/s][A[A

 16%|█▌        | 27581/169849 [01:06<05:27, 434.59it/s][A[A

 16%|█▋        | 27625/169849 [01:06<05:26, 435.52it/s][A[A

 16%|█▋        | 27669/169849 [01:06<05:27, 434.27it/s][A[A

 16%|█▋        | 27714/169849 [01:06<05:25, 436.24it/s][A[A

 16%|█▋        | 27760/169849 [01:06<05:22, 441.06it/s][A[A

 16%|█▋        | 27805/169849 [01:07<05:24, 438.23it/s][A[A

 16%|█▋        | 27849/169849 [01:07<05:25, 436.72it/s][A[A

 16%|█▋        | 27893/169849 [01:07<05:26, 434.72it/s][A[A

 16%|█▋        | 27937/169849 [01:07<05:27, 433.32it/s][A[A

 16%|█▋        | 27981/169849 [01:07<05:26, 434.66it/s]

KeyboardInterrupt: 

# 4 - Computation of the Predictions for the Best Parameters

## 4.1 - Computation of the best parameters

In [23]:
# Definition of the data augmentation function
data_aug = DataAugmentationDefault(NoAugmentation, {})


# Defintion of the embedding
embedding = EmbeddingDefault(SpectrumEmbedding , {"d_l": [5, 7, 12]})
# 

# Definition of the kernel
kernel = KernelDefault(PolyKernel, {"k": 2})

# Definition of the model
model = KernelLogisticRegression(kernel, informations=True,  lamda=10, max_iter=15,
                                 preprocessing=None)

# Defintion of best parameters values
best_parameters_values = {"Data Augmentation": {"Function": data_aug},
                          "Embedding": {"Function": embedding},
                          "Kernel": {"Function": kernel},
                          "Model": {"Function": model}}

In [24]:
# Computation of the predicition
predictions = Prediction(best_parameters_values, df_dict)

# Display predicitons
predictions

100%|██████████| 3/3 [00:00<00:00, 22.46it/s]


KeyboardInterrupt: 

In [None]:
np.mean(predictions, axis=0)

## 4.2 - Save predicitons into a csv

In [None]:
np.savetxt("./Resultats/Predictions_Test_Spectrum.csv", predictions, 
           fmt='%i', delimiter=",", header="Id,Bound", comments='')

# Testing

## 1 - Study the result

In [4]:
df = pd.read_csv("./Resultats/grid_search_res.csv",  sep='\t')

# Display df
df.sort_values("score", inplace=True, ascending=False)
df

Unnamed: 0.1,Unnamed: 0,data_aug_hp,data_aug_type,embedding_hp,embedding_type,kernel_hp,kernel_type,model_hp,model_type,score,scores_1,scores_1_mean,scores_2,scores_2_mean,scores_3,scores_3_mean
0,0,{},ComplementarySequences,"{'d_l': [5, 10, 15]}",SpectrumEmbedding,"{'k': 2, 'add_ones': False}",PolyKernel,"{'lamda': 1, 'preprocessing': None, 'informati...",KernelLogisticRegression,0.205,"[0.59, 0.6225, 0.6225, 0.625, 0.615]",0.615,"[0, 0, 0, 0, 0]",0.0,"[0, 0, 0, 0, 0]",0.0
12,12,{},ComplementarySequences,"{'d_l': [5, 10, 15]}",SpectrumEmbedding,"{'k': 2, 'add_ones': False}",PolyKernel,"{'lamda': 0.0001, 'preprocessing': None, 'info...",KernelLogisticRegression,0.204333,"[0.5925, 0.6175, 0.62, 0.62, 0.615]",0.613,"[0, 0, 0, 0, 0]",0.0,"[0, 0, 0, 0, 0]",0.0
7,7,{},ComplementarySequences,"{'d_l': [5, 10, 15]}",SpectrumEmbedding,"{'k': 2, 'add_ones': False}",PolyKernel,"{'lamda': 0.01, 'preprocessing': None, 'inform...",KernelLogisticRegression,0.204167,"[0.59, 0.615, 0.62, 0.62, 0.6175]",0.6125,"[0, 0, 0, 0, 0]",0.0,"[0, 0, 0, 0, 0]",0.0
13,13,{},ComplementarySequences,"{'d_l': [5, 7, 12]}",SpectrumEmbedding,"{'k': 2, 'add_ones': False}",PolyKernel,"{'lamda': 1, 'preprocessing': None, 'informati...",KernelLogisticRegression,0.2035,"[0.5975, 0.615, 0.625, 0.6125, 0.6025]",0.6105,"[0, 0, 0, 0, 0]",0.0,"[0, 0, 0, 0, 0]",0.0
6,6,{},ComplementarySequences,"{'d_l': [5, 7, 12]}",SpectrumEmbedding,"{'k': 2, 'add_ones': False}",PolyKernel,"{'lamda': 0.0001, 'preprocessing': None, 'info...",KernelLogisticRegression,0.203,"[0.595, 0.615, 0.6175, 0.615, 0.6025]",0.609,"[0, 0, 0, 0, 0]",0.0,"[0, 0, 0, 0, 0]",0.0
3,3,{},ComplementarySequences,"{'d_l': [5, 7, 12]}",SpectrumEmbedding,"{'k': 2, 'add_ones': False}",PolyKernel,"{'lamda': 0.01, 'preprocessing': None, 'inform...",KernelLogisticRegression,0.2025,"[0.5975, 0.6125, 0.615, 0.61, 0.6025]",0.6075,"[0, 0, 0, 0, 0]",0.0,"[0, 0, 0, 0, 0]",0.0
8,8,{},ComplementarySequences,"{'d_l': [5, 7, 12]}",SpectrumEmbedding,"{'k': 2, 'add_ones': False}",PolyKernel,"{'lamda': 100, 'preprocessing': None, 'informa...",KernelLogisticRegression,0.1975,"[0.5375, 0.6025, 0.6575, 0.5925, 0.5725]",0.5925,"[0, 0, 0, 0, 0]",0.0,"[0, 0, 0, 0, 0]",0.0
9,9,{},ComplementarySequences,"{'d_l': [5, 10, 15]}",SpectrumEmbedding,"{'k': 2, 'add_ones': False}",PolyKernel,"{'lamda': 100, 'preprocessing': None, 'informa...",KernelLogisticRegression,0.195333,"[0.5275, 0.61, 0.65, 0.59, 0.5525]",0.586,"[0, 0, 0, 0, 0]",0.0,"[0, 0, 0, 0, 0]",0.0
4,4,{},ComplementarySequences,"{'d_l': [5, 7, 12]}",SpectrumEmbedding,"{'k': 2, 'add_ones': False}",PolyKernel,"{'lamda': 1000000000.0, 'preprocessing': None,...",KernelLogisticRegression,0.188167,"[0.51, 0.5925, 0.59, 0.5625, 0.5675]",0.5645,"[0, 0, 0, 0, 0]",0.0,"[0, 0, 0, 0, 0]",0.0
5,5,{},ComplementarySequences,"{'d_l': [5, 7, 12]}",SpectrumEmbedding,"{'k': 2, 'add_ones': False}",PolyKernel,"{'lamda': 1000000.0, 'preprocessing': None, 'i...",KernelLogisticRegression,0.188167,"[0.51, 0.5925, 0.59, 0.5625, 0.5675]",0.5645,"[0, 0, 0, 0, 0]",0.0,"[0, 0, 0, 0, 0]",0.0


In [None]:
# from sklearn.feature_extraction.text import TfidfTransformer

# counts = spectrumEmbedding(df_dict[0][0], len_sq=4)
# new_X_train = transformer.fit_transform(counts).toarray()

# # transformer = TfidfTransformer(smooth_idf=False) # True

# for i in [5, 10]:
    
#     counts = spectrumEmbedding(df_dict[0][0], len_sq=i)
#     new_X_train = transformer.fit_transform(counts).toarray()

#     model = SVC(gamma="scale", C=100)
    
#     # Execute a cross validation on the model
#     gdm = GridSearchCV(model, hp, scoring="accuracy", cv=5)

#     # Fit the model and find the best parameters and score
#     gdm.fit(new_X_train, y_train)
    
#     print("Iteration {}, score {}".format(i, gdm.best_score_))

# 888 - CKN