In [1]:
from joblib import Parallel, delayed
from tqdm import tqdm
import numpy as np
from scipy.sparse import coo_matrix
from itertools import product
from collections import defaultdict
import pandas as pd
import cvxopt
cvxopt.solvers.options["show_progress"] = False
import optuna

In [2]:
def load_data(seq_file: str, label_file: str) -> tuple:
    """
    加载多组CSV文件并合并数据
    
    参数:
        seq_files: 多个x.csv文件路径列表（如["x.csv", "x1.csv"]）
        label_files: 多个y.csv文件路径列表（如["y.csv", "y1.csv"]）
    
    返回:
        (sequences, labels): 合并后的序列列表和标签数组
    """
    # 合并所有数据
    df_seq = pd.read_csv(seq_file)
    df_label = pd.read_csv(label_file)
    
    merged = pd.merge(df_seq, df_label, on='Id', how='inner')
    if merged.empty:
        print(f"警告: {seq_file}和{label_file}中没有匹配的ID")

    # 提取数据
    sequences = merged['seq'].values
    labels = np.where(merged['Bound'] == 1, 1, -1)
    return sequences, labels

def save_to_csv(df_0, df_1, df_2, suffixe = ""):
    res = pd.concat([df_0, df_1, df_2], ignore_index = True)
    res["Bound"] = res["Bound"].astype("int")
    res[["Id", "Bound"]].to_csv(f"./results/predictions_{suffixe}.csv", index = False)
    return

In [3]:
def generate_mismatch_neighbors(kmer, m, alphabet = "ACGT"):
    """
    Generate all possible k-mers that are within 'm' mismatches of the given k-mer.
    
    Args:
        kmer (str): The original k-mer.
        m (int): Maximum allowed mismatches.
        alphabet (str): Possible characters in the k-mers (e.g., "ACGT").
    
    Returns:
        set: A set of k-mers within m mismatches.
    """
    if m == 0:
        return {kmer}  # No mismatches allowed, return the k-mer itself

    n = len(kmer)
    mismatch_neighbors = set()

    # Generate all possible positions and substitutions up to m mismatches
    def generate(pos, mismatches, current_kmer):
        if mismatches > m:  # Stop if we exceed the allowed mismatches
            return
        if pos == n:  # If we processed all positions, add the modified k-mer
            mismatch_neighbors.add("".join(current_kmer))
            return

        # Keep the original character (no mismatch at this position)
        generate(pos + 1, mismatches, current_kmer)

        # Try all possible mismatches at the current position
        original_char = current_kmer[pos]
        for char in alphabet:
            if char != original_char:  # Only substitute if it's different
                current_kmer[pos] = char
                generate(pos + 1, mismatches + 1, current_kmer)
                current_kmer[pos] = original_char  # Restore original

    generate(0, 0, list(kmer))
    return mismatch_neighbors

def compute_feature_vector(seq, k, m, alphabet, neighbor_cache):
    """
    Compute the feature vector for a given sequence using the mismatch kernel.

    Args:
        seq (str): Input DNA sequence.
        k (int): Length of k-mers.
        m (int): Maximum number of mismatches allowed.
        alphabet (str): Alphabet set (e.g., "ACGT").
        neighbor_cache (dict): Dictionary to cache computed mismatch neighborhoods.

    Returns:
        dict: Feature vector where keys are k-mers and values are their frequencies.
    """
    feature_vector = defaultdict(int)

    # Iterate over all k-mers in the sequence
    for i in range(len(seq) - k + 1):
        kmer = seq[i : i + k]  # Extract k-mer from the sequence

        # Check if neighbors are cached
        if kmer in neighbor_cache:
            neighbors = neighbor_cache[kmer]
        else:
            neighbors = generate_mismatch_neighbors(kmer, m, alphabet)
            neighbor_cache[kmer] = neighbors  # Cache the result

        # Update feature vector for all mismatch neighbors
        for neighbor in neighbors:
            feature_vector[neighbor] += 1  # Count occurrences

    return feature_vector


In [4]:

def compute_mismatch_kernel(sequences, k, m=1, alphabet="ACGT"):
    """
    Compute the mismatch kernel matrix for a set of sequences.
    
    Args:
        sequences (list of str): List of input DNA sequences.
        k (int): Length of k-mers.
        m (int): Maximum number of mismatches allowed.
        alphabet (str): Alphabet set (default: "ACGT").
    
    Returns:
        np.ndarray: The normalized mismatch kernel matrix.
    """
    n = len(sequences)

    # Create a shared neighbor cache (read-only after computed)
    neighbor_cache = {}

    # Compute feature vectors in parallel
    feature_vectors = list(
        Parallel(n_jobs=-1)(
            delayed(compute_feature_vector)(seq, k, m, alphabet, neighbor_cache)
            for seq in tqdm(sequences, total=n, desc="Computing feature vectors")
        )
    )

    # Build the global vocabulary from all feature vectors
    all_kmers = set()
    
    # Collect all k-mers from feature vectors in parallel
    all_kmers = set().union(*Parallel(n_jobs=-1)(
        delayed(lambda fv: set(fv.keys()))(fv) for fv in tqdm(feature_vectors, desc="Collecting k-mers")
    ))

    all_kmers = sorted(all_kmers)
    kmer_index = {kmer: idx for idx, kmer in enumerate(all_kmers)}

    # Collect entries for the sparse matrix in parallel
    rows, cols, data = [], [], []
    results = Parallel(n_jobs=-1)(
        delayed(lambda i, fv: (
            [i] * len(fv),
            [kmer_index[kmer] for kmer in fv.keys()],
            list(fv.values())
        ))(i, fv) for i, fv in tqdm(enumerate(feature_vectors), total=len(feature_vectors), desc="Building sparse matrix entries")
    )

    for r, c, d in results:
        rows.extend(r)
        cols.extend(c)
        data.extend(d)

    # Build the sparse matrix (COO format) and convert to CSR
    X = coo_matrix((data, (rows, cols)), shape=(n, len(all_kmers)), dtype=np.float32).tocsr()

    # Compute kernel matrix (dot product)
    K = X @ X.T
    return K.toarray()


In [5]:
def manual_kfold_split(X, y, n_splits=3, seed=42):
    """Manually splits X and y into K folds for cross-validation."""
    np.random.seed(seed)
    indices = np.arange(len(X))
    np.random.shuffle(indices)
    
    folds = np.array_split(indices, n_splits)
    return folds

def train_svm(K, y, C=1.0):
    """Train an SVM using the precomputed kernel matrix K."""
    n = len(y)
    y = y.astype(float).reshape(-1, 1)  # Ensure y is a column vector

    # Construct the quadratic programming matrices
    P = cvxopt.matrix(np.outer(y, y) * K)  # P_ij = y_i * y_j * K_ij
    q = cvxopt.matrix(-np.ones(n))        # q_i = -1
    G = cvxopt.matrix(np.vstack((-np.eye(n), np.eye(n))))  # Constraints 0 <= α <= C
    h = cvxopt.matrix(np.hstack((np.zeros(n), C * np.ones(n))))
    A = cvxopt.matrix(y.T)  # Equality constraint sum(α_i * y_i) = 0
    b = cvxopt.matrix(0.0)

    # Solve the quadratic program
    solution = cvxopt.solvers.qp(P, q, G, h, A, b)
    alphas = np.ravel(solution['x'])

    # Support vectors: α > 0
    sv_indices = alphas > 1e-5
    support_vectors = np.where(sv_indices)[0]
    alphas = alphas[sv_indices]
    support_y = y[sv_indices].flatten()

    # Compute the bias term (intercept)
    bias = np.mean(support_y - np.sum(alphas * support_y * K[support_vectors][:, support_vectors], axis=1))

    return alphas, support_vectors, bias

def predict_svm(K_test, alphas, support_vectors, support_y, bias):
    """Make predictions using the trained SVM."""
    return np.sign(np.sum(alphas * support_y * K_test[:, support_vectors], axis=1) + bias)

def cross_val_score_manual(X, y, C, kernel_matrix, n_splits=3):
    """Performs cross-validation without using sklearn."""
    folds = manual_kfold_split(X, y, n_splits)
    accuracies = []

    for i in range(n_splits):
        # print("i = ", i)
        val_indices = folds[i]  # Current fold is validation set
        train_indices = np.hstack([folds[j] for j in range(n_splits) if j != i])  # Rest are training

        y_train = y[train_indices]
        y_val = y[val_indices]

        # Compute kernel matrices
        indices_train_submatrix = np.ix_(train_indices, train_indices)
        K_train = kernel_matrix[indices_train_submatrix]

        indices_test_submatrix = np.ix_(val_indices, train_indices)
        K_val = kernel_matrix[indices_test_submatrix]

        # Train and predict
        alphas, support_vectors, bias = train_svm(K_train, y_train, C)
        predictions = predict_svm(K_val, alphas, support_vectors, y_train[support_vectors], bias)

        # Compute accuracy
        accuracy = np.mean(predictions == y_val)
        accuracies.append(accuracy)

        # print("accuracies: ", accuracies)
    return np.mean(accuracies)

def objective(trial):
    """Objective function for Optuna to optimize k and C."""
    k = trial.suggest_int("k", 4, 9)  # k-mer length between 2 and 6
    m = trial.suggest_int("m", 1, 3)
    C = trial.suggest_loguniform("C", 1e-3, 1e2)  # C in [0.01, 100]
    
    kernel_matrix = compute_mismatch_kernel(X_train, k, m = m)

    cross_val = cross_val_score_manual(X_train, Y_train, C, kernel_matrix)

    print(f"C = {C}, k = {k}: {cross_val}")
    return cross_val

def train_and_predict_mismatch_svm(X_train_path, Y_train_path, X_test_path, n_trials=20):
    """Hyperparameter optimization with Optuna, then train and predict."""
    global X_train, Y_train  # Needed for Optuna's objective function
    X_train, Y_train = load_data(X_train_path, Y_train_path)

    df_pred = pd.read_csv(X_test_path)
    X_test = df_pred["seq"].values
    # X_test = load_data(X_test_path)

    # Run Optuna optimization
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=n_trials)

    # Best hyperparameters
    best_k = study.best_params["k"]
    best_m = study.best_params["m"]
    best_C = study.best_params["C"]
    print(f"Best k: {best_k}, Best C: {best_C}, Best m: {best_m}")

    # Train final model with best hyperparameters
    sequences = np.concatenate([X_train, X_test])
    kernel_matrix = compute_mismatch_kernel(sequences, best_k, m = best_m)
    K_train = kernel_matrix[0:len(X_train), 0:len(X_train)]
    K_test = kernel_matrix[0:len(X_test), 0:len(X_train)]
    # K_test = spectrum_kernel_matrix(X_test, X_train, best_k)
    alphas, support_vectors, bias = train_svm(K_train, Y_train, best_C)

    # Predict on test set
    predictions = predict_svm(K_test, alphas, support_vectors, Y_train[support_vectors], bias)

    # Convert {-1,1} predictions to {0,1}
    predictions = (predictions + 1) // 2

    df_pred["Bound"] = predictions

    return df_pred

In [6]:
prediction_0 = train_and_predict_mismatch_svm("./data/Xtr0.csv", "./data/Ytr0.csv", "./data/Xte0.csv", n_trials = 2)
prediction_1 = train_and_predict_mismatch_svm("./data/Xtr1.csv", "./data/Ytr1.csv", "./data/Xte1.csv", n_trials = 2)
prediction_2 = train_and_predict_mismatch_svm("./data/Xtr2.csv", "./data/Ytr2.csv", "./data/Xte2.csv", n_trials = 2)

save_to_csv(prediction_0, prediction_1, prediction_2, suffixe = "mismatch_svm")

[I 2025-03-10 11:27:22,273] A new study created in memory with name: no-name-a85d6538-a40f-4cec-b34a-569c8aaf4053
  C = trial.suggest_loguniform("C", 1e-3, 1e2)  # C in [0.01, 100]
Computing feature vectors: 100%|██████████| 2000/2000 [00:25<00:00, 79.57it/s]
Collecting k-mers: 100%|██████████| 2000/2000 [00:03<00:00, 635.03it/s]
Building sparse matrix entries: 100%|██████████| 2000/2000 [00:02<00:00, 772.77it/s]
[I 2025-03-10 11:28:27,092] Trial 0 finished with value: 0.5629955292623957 and parameters: {'k': 6, 'm': 2, 'C': 0.05319556701294642}. Best is trial 0 with value: 0.5629955292623957.


C = 0.05319556701294642, k = 6: 0.5629955292623957


Computing feature vectors: 100%|██████████| 2000/2000 [04:47<00:00,  6.95it/s]
Collecting k-mers: 100%|██████████| 2000/2000 [03:31<00:00,  9.48it/s]
Building sparse matrix entries: 100%|██████████| 2000/2000 [02:01<00:00, 16.42it/s]
[I 2025-03-10 11:48:24,237] Trial 1 finished with value: 0.48398248323285803 and parameters: {'k': 8, 'm': 3, 'C': 13.313718541748583}. Best is trial 0 with value: 0.5629955292623957.


C = 13.313718541748583, k = 8: 0.48398248323285803
Best k: 6, Best C: 0.05319556701294642, Best m: 2


Computing feature vectors: 100%|██████████| 3000/3000 [00:33<00:00, 89.90it/s]
Collecting k-mers: 100%|██████████| 3000/3000 [00:04<00:00, 703.71it/s]
Building sparse matrix entries: 100%|██████████| 3000/3000 [00:02<00:00, 1010.51it/s]


KeyboardInterrupt: 

# With logistic regression

In [None]:
def sigmoid(z):
    """Sigmoid activation function."""
    return 1 / (1 + np.exp(-z))

def train_logistic_regression(K_train, y_train, C, lr=0.01, epochs=1000):
    """
    Train logistic regression using batch gradient descent.
    
    Parameters:
    - K_train: (n_samples, n_samples) kernel matrix
    - y_train: (n_samples,) labels {-1,1}
    - C: regularization parameter
    - lr: learning rate
    - epochs: number of gradient updates
    
    Returns:
    - w: optimized weight vector
    - b: bias term
    """
    n_samples = K_train.shape[0]
    
    # Initialize weights
    w = np.zeros(n_samples)
    b = 0
    
    # Gradient Descent
    for _ in range(epochs):
        linear_model = K_train @ w + b
        y_pred = sigmoid(linear_model)
        
        # Compute gradients
        error = y_pred - (y_train + 1) / 2  # Convert {-1,1} -> {0,1} for logistic loss
        dw = (K_train.T @ error) / n_samples + C * w
        db = np.mean(error)
        
        # Update weights
        w -= lr * dw
        b -= lr * db

    return w, b


def predict_logistic_regression(K_test, w, b, threshold = 0.5):
    """
    Predict using logistic regression with kernel.
    
    Parameters:
    - K_test: (n_samples, n_train_samples) kernel matrix
    - w: trained weight vector
    - b: bias term
    - threshold: threshold for prediction 
    
    Returns:
    - predictions: {-1,1}
    """
    y_pred_prob = sigmoid(K_test @ w + b)
    return np.where(y_pred_prob >= threshold, 1, -1)


def cross_val_score_manual_logistic_regression(X, y, C, kernel_matrix, threshold, n_splits=3):
    """Performs cross-validation without using sklearn."""
    folds = manual_kfold_split(X, y, n_splits)
    accuracies = []

    for i in range(n_splits):
        val_indices = folds[i]  
        train_indices = np.hstack([folds[j] for j in range(n_splits) if j != i])

        y_train = X[train_indices], y[train_indices]
        y_val = X[val_indices], y[val_indices]

         # Compute kernel matrices
        indices_train_submatrix = np.ix_(train_indices, train_indices)
        K_train = kernel_matrix[indices_train_submatrix]

        indices_test_submatrix = np.ix_(val_indices, train_indices)
        K_val = kernel_matrix[indices_test_submatrix]

        # Train and predict
        w, b = train_logistic_regression(K_train, y_train, C)
        predictions = predict_logistic_regression(K_val, w, b, threshold = threshold)

        # Compute accuracy
        accuracy = np.mean(predictions == y_val)
        accuracies.append(accuracy)

    return np.mean(accuracies)


def objective(trial):
    """Objective function for Optuna to optimize k and C."""
    k = trial.suggest_int("k", 4, 9)  # k-mer length between 2 and 6
    C = trial.suggest_float("C", 1e-2, 1e1, log = True)  # Regularization term
    m = trial.suggest_int("m", 1, 3)
    threshold = trial.suggest_float("threshold", 0.48, 0.51)

    kernel_matrix = compute_mismatch_kernel(X_train, k, m = m)
    cross_val = cross_val_score_manual_logistic_regression(X_train, Y_train, C, kernel_matrix, threshold)

    # print(f"C = {C}, k = {k}: {cross_val}, m: {}")
    return cross_val


def train_and_predict_mismatch_LR(X_train_path, Y_train_path, X_test_path, n_trials=30):
    """Hyperparameter optimization with Optuna, then train and predict."""
    global X_train, Y_train  
    X_train, Y_train = load_data(X_train_path, Y_train_path)

    df_pred = pd.read_csv(X_test_path)
    X_test = df_pred["seq"].values

    # Run Optuna optimization
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=n_trials)

    # Best hyperparameters
    best_k = study.best_params["k"]
    best_C = study.best_params["C"]
    best_m = study.best_params["m"]


    #A MODIFIER !!!
    best_threshold = study.best_params["threshold"]
    print(f"Best k: {best_k}, Best C: {best_C}, Best threshold: {best_threshold}, Best m: {best_m}")

    # Train final model
    sequences = np.concatenate([X_train, X_test])
    kernel_matrix = compute_mismatch_kernel(sequences, best_k, m = best_m)
    K_train = kernel_matrix[0:len(X_train), 0:len(X_train)]
    K_test = kernel_matrix[0:len(X_test), 0:len(X_train)]

    w, b = train_logistic_regression(K_train, Y_train, best_C)

    # Predict on test set
    predictions = predict_logistic_regression(K_test, w, b, threshold = best_threshold)

    # Convert {-1,1} predictions to {0,1}
    predictions = (predictions + 1) // 2

    df_pred["Bound"] = predictions

    return df_pred

In [None]:
prediction_0 = train_and_predict_mismatch_LR("./data/Xtr0.csv", "./data/Ytr0.csv", "./data/Xte0.csv", n_trials = 2)
prediction_1 = train_and_predict_mismatch_LR("./data/Xtr1.csv", "./data/Ytr1.csv", "./data/Xte1.csv", n_trials = 2)
prediction_2 = train_and_predict_mismatch_LR("./data/Xtr2.csv", "./data/Ytr2.csv", "./data/Xte2.csv", n_trials = 2)

save_to_csv(prediction_0, prediction_1, prediction_2, suffixe= "mismatch_LR")

# With Kernel Ridge Regression

In [None]:
import numpy as np
import pandas as pd
import optuna
from scipy.linalg import solve


# ===== Kernel Ridge Regression (KRR) =====
def train_kernel_ridge_regression(K_train, y_train, lambda_reg=1.0):
    """Train Kernel Ridge Regression: Solves (K + λI)α = y."""
    n = K_train.shape[0]
    alpha = solve(K_train + lambda_reg * np.eye(n), y_train, assume_a='pos')
    return alpha

def predict_kernel_ridge_regression(K_test, alpha):
    """Predict using Kernel Ridge Regression."""
    return np.sign(K_test @ alpha)  # Predict {-1,1}


def cross_val_score_krr(X, y, kernel_matrix, lambda_reg, n_splits=3):
    """Performs cross-validation for KRR with Spectrum Kernel."""
    folds = manual_kfold_split(X, y, n_splits)
    accuracies = []

    for i in range(n_splits):
        val_indices = folds[i]  
        train_indices = np.hstack([folds[j] for j in range(n_splits) if j != i])

        y_train = X[train_indices], y[train_indices]
        y_val = X[val_indices], y[val_indices]

        # Compute kernel matrices
        indices_train_submatrix = np.ix_(train_indices, train_indices)
        K_train = kernel_matrix[indices_train_submatrix]

        indices_test_submatrix = np.ix_(val_indices, train_indices)
        K_val = kernel_matrix[indices_test_submatrix]

        alpha = train_kernel_ridge_regression(K_train, y_train, lambda_reg)
        predictions = predict_kernel_ridge_regression(K_val, alpha)

        accuracy = np.mean(predictions == y_val)
        accuracies.append(accuracy)

    return np.mean(accuracies)

# ===== Optuna Optimization =====
def objective(trial):
    """Objective function for Optuna to optimize k and lambda_reg."""
    k = trial.suggest_int("k", 7, 9)  # k-mer length between 2 and 6
    lambda_reg = trial.suggest_float("lambda_reg", 1e0, 1e3, log = True)  # λ in [0.01, 100]
    m = trial.suggest_int("m", 1, 3)

    kernel_matrix = compute_mismatch_kernel(X_train, k, m = m)
    cross_val = cross_val_score_krr(X_train, Y_train, kernel_matrix, lambda_reg)
    return cross_val  # Maximize accuracy

def train_and_predict_mismatch_krr(X_train_path, Y_train_path, X_test_path, n_trials=20):
    """Hyperparameter tuning + final prediction with Kernel Ridge Regression."""
    global X_train, Y_train  # Needed for Optuna

    # Load data
    X_train, Y_train = load_data(X_train_path, Y_train_path)

    df_test = pd.read_csv(X_test_path)
    X_test = df_test["seq"].values

    # Run Optuna optimization
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=n_trials)

    # Best hyperparameters
    best_k = study.best_params["k"]
    best_m = study.best_params["m"]
    best_lambda = study.best_params["lambda_reg"]
    print(f"Best k: {best_k}, Best lambda_reg: {best_lambda}, Best m: {best_m}")

    # Train final model with best hyperparameters
    sequences = np.concatenate([X_train, X_test])
    kernel_matrix = compute_mismatch_kernel(sequences, best_k, m = best_m)
    K_train = kernel_matrix[0:len(X_train), 0:len(X_train)]
    K_test = kernel_matrix[0:len(X_test), 0:len(X_train)]

    # K_train = spectrum_kernel_matrix(X_train, X_train, best_k)
    # K_test = spectrum_kernel_matrix(X_test, X_train, best_k)
    alpha = train_kernel_ridge_regression(K_train, Y_train, best_lambda)

    # Predict on test set
    predictions = predict_kernel_ridge_regression(K_test, alpha)

    # Convert {-1,1} predictions to {0,1}
    df_test["Bound"] = (predictions + 1) // 2

    return df_test

In [None]:
prediction_0 = train_and_predict_mismatch_svm("./data/Xtr0.csv", "./data/Ytr0.csv", "./data/Xte0.csv", n_trials = 2)
prediction_1 = train_and_predict_mismatch_svm("./data/Xtr1.csv", "./data/Ytr1.csv", "./data/Xte1.csv", n_trials = 2)
prediction_2 = train_and_predict_mismatch_svm("./data/Xtr2.csv", "./data/Ytr2.csv", "./data/Xte2.csv", n_trials = 2)

save_to_csv(prediction_0, prediction_1, prediction_2, suffixe = "mismatch_svm")

In [14]:
X_train_path = "./data/Xtr0.csv"
Y_train_path = "./data/Ytr0.csv"
X_test_path = "./data/Xte0.csv"

X_train, Y_train = load_data(X_train_path, Y_train_path)

df_pred = pd.read_csv(X_test_path)
X_test = df_pred["seq"].values

X = np.concatenate([X_train, X_test])

kernel = compute_mismatch_kernel(X, 5, m=1, alphabet="ACGT")


Computing feature vectors:   0%|          | 0/3000 [00:00<?, ?it/s]

Computing feature vectors: 100%|██████████| 3000/3000 [00:02<00:00, 1243.69it/s]
Collecting k-mers: 100%|██████████| 3000/3000 [00:00<00:00, 8119.96it/s]
Building sparse matrix entries: 100%|██████████| 3000/3000 [00:00<00:00, 9648.78it/s]


In [15]:
kernel.shape

(3000, 3000)

In [25]:
kernel[0:len(X_train), 0:len(X_train)]

array([[3860., 2710., 2710., ..., 2408., 2412., 2220.],
       [2710., 4728., 1938., ..., 3880., 2000., 1816.],
       [2710., 1938., 4116., ..., 1690., 2746., 2744.],
       ...,
       [2408., 3880., 1690., ..., 6628., 1866., 1434.],
       [2412., 2000., 2746., ..., 1866., 4040., 2522.],
       [2220., 1816., 2744., ..., 1434., 2522., 4544.]],
      shape=(2000, 2000), dtype=float32)

In [18]:
kernel[0:len(X_train), 0:len(X_test)].shape

(2000, 1000)

In [22]:
kernel[np.ix_([0,2], [0,2])]

array([[3860., 2710.],
       [2710., 4116.]], dtype=float32)

In [24]:
kernel[0,0]

np.float32(3860.0)