In [None]:
%load_ext autoreload
%autoreload

import sys
sys.path.append('src')

from utils import *
import time
from tqdm import tqdm_notebook
import os

## Baseline using Spectrum kernel

In [None]:
from spectrum import *
%time cum_spectrum_31_kernels = precomputed_kernels(cum_spectrum, 'cum_spectrum_31', k=31)

In [None]:
%time svm_kernels(cum_spectrum_31_kernels, 'cum_spectrum_31')

The computations are slowed down over $k=13$.

In [None]:
spectrum_kernels = []
for k in range(1, 14):
    spectrum_kernels.append(precomputed_kernels(k_spectrum, 'spectrum_{}'.format(k), k=k))

In [None]:
spectrum_K = np.stack([kernels[0][0] for kernels in spectrum_kernels]).astype(float)
del spectrum_kernels

In [None]:
from newton import fit
import autograd as ag

def spectrum_sum(θ, I, J):
    K = ag.tensor(spectrum_K[:, I][:, :, I])
    return (K * ag.exp(θ)[:, None, None]).sum(axis=0)

n = 100
θ, λ, stats = fit(
    spectrum_sum, 
    train_Ys[0].astype(float)[:n], 
    k_folds_indices(n, 2), 
    ag.ones(spectrum_K.shape[0]),
    iters=5,
)

# Levenshtein Kernel

### Kernel Computing

In [None]:
from levenshtein import *
%time levenshtein_distances = precomputed_kernels(levenshtein_distance, 'levenshtein_distance', numeric=False)

In [None]:
def distance_to_kernel(i, d, α):
    return 1 / (1 + d ** α)

levenshtein_kernels = transform_kernels([levenshtein_distances], distance_to_kernel, α=.25)

In [None]:
svm_kernels(levenshtein_kernels, 'levenshtein')

In [None]:
αs = [.7, .7, .7]
svm_kernels(transform_kernels(
    [levenshtein_kernels, cum_spectrum_31_kernels], 
    lambda i, x, y: (1-αs[i]) * x + αs[i] * y), 'mixed_sum')

In [None]:
svm_kernels(transform_kernels(
    [levenshtein_kernels, cum_spectrum_31_kernels], 
    lambda _, x, y: (1 + .5 * x) * y), 'mixed_product')