In [1]:
import numpy as np
import pickle
import sys

# Fix for NumPy 2.x compatibility with pickle files created with NumPy 1.x
# NumPy 2.0+ renamed numpy.core to numpy._core, but pickle files may reference numpy._core
# We need to create a custom unpickler that redirects numpy._core references to numpy.core

class NumpyCompatibilityUnpickler(pickle.Unpickler):
    """Custom unpickler that fixes numpy._core module references for NumPy 2.x compatibility"""
    def find_class(self, module, name):
        # Redirect numpy._core.* references to numpy.core.*
        if module.startswith('numpy._core'):
            module = module.replace('numpy._core', 'numpy.core')
        
        # Ensure numpy.core modules are accessible
        if module == 'numpy.core.multiarray':
            import numpy.core.multiarray
            return getattr(numpy.core.multiarray, name)
        elif module == 'numpy.core._multiarray_umath':
            import numpy.core._multiarray_umath
            return getattr(numpy.core._multiarray_umath, name)
        
        return super().find_class(module, name)

from sequenzo.define_sequence_data import SequenceData
from sequenzo.multidomain.cat import compute_cat_distance_matrix
import pandas as pd 

# Load pickle file using custom unpickler for NumPy 2.x compatibility
with open("dataset.pkl", "rb") as f:
    unpickler = NumpyCompatibilityUnpickler(f)
    data = unpickler.load()

sequences_mv = data["X"]

In [2]:
sequences_mv

array([[[0, 1, 1, ..., 1, 0, 0],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 1, 1, ..., 0, 0, 1],
        ...,
        [1, 0, 1, ..., 1, 0, 0],
        [0, 1, 1, ..., 1, 1, 0],
        [1, 0, 1, ..., 1, 0, 0]],

       [[0, 1, 0, ..., 1, 0, 1],
        [0, 1, 1, ..., 1, 0, 1],
        [0, 1, 1, ..., 0, 0, 1],
        ...,
        [0, 0, 1, ..., 1, 0, 0],
        [1, 0, 0, ..., 1, 0, 0],
        [1, 0, 1, ..., 0, 0, 1]],

       [[1, 1, 1, ..., 1, 1, 1],
        [1, 1, 1, ..., 1, 0, 1],
        [0, 1, 1, ..., 0, 1, 0],
        ...,
        [1, 1, 0, ..., 0, 0, 1],
        [1, 1, 1, ..., 1, 0, 1],
        [0, 1, 0, ..., 1, 1, 0]],

       ...,

       [[0, 1, 1, ..., 0, 1, 0],
        [0, 1, 1, ..., 1, 0, 0],
        [0, 1, 0, ..., 1, 0, 1],
        ...,
        [0, 0, 0, ..., 1, 0, 0],
        [0, 0, 0, ..., 1, 0, 1],
        [0, 0, 1, ..., 0, 1, 1]],

       [[0, 0, 0, ..., 1, 1, 1],
        [0, 1, 0, ..., 1, 1, 0],
        [1, 1, 1, ..., 1, 1, 1],
        ...,
        [1, 1, 0, ..., 

In [3]:
N = 10                          
V = 50
C_list = [2] * V  
T = sequences_mv[0].shape[0]
time_cols = [str(t) for t in range(1, T + 1)]
N_use = min(N, len(sequences_mv))

ids = list(range(N_use))

channels_seq = []
for v in range(V):
    mat = np.stack([sequences_mv[i][:, v] for i in range(N_use)], axis=0).astype(int) + 1
    df_v = pd.DataFrame(mat, columns=time_cols)
    df_v.insert(0, "ID", ids)
    df_v[time_cols] = df_v[time_cols].astype(str)  # "1"/"2"
    seq_v = SequenceData(
        data=df_v,
        time=time_cols,
        states=["1", "2"],
        id_col="ID",
    )
    channels_seq.append(seq_v)

print(channels_seq)

result_cat = compute_cat_distance_matrix(
    channels=channels_seq,
    method="OM",
    sm=["TRATE"] * V,          
    indel="auto",              
    link="sum",                
    norm="none",               
    full_matrix=True,
    what="diss",
    cweight=[1.0] * V,
    with_missing=[False] * V,  
    ch_sep="+"
)
D_sequenzo = result_cat.to_numpy() if hasattr(result_cat, "to_numpy") else np.asarray(result_cat)


[>] SequenceData initialized successfully! Here's a summary:
[>] Number of sequences: 10
[>] Number of time points: 20
[>] Min/Max sequence length: 20 / 20
[>] States: ['1', '2']
[>] Labels: ['1', '2']
[>] Weights: Not provided

[>] SequenceData initialized successfully! Here's a summary:
[>] Number of sequences: 10
[>] Number of time points: 20
[>] Min/Max sequence length: 20 / 20
[>] States: ['1', '2']
[>] Labels: ['1', '2']
[>] Weights: Not provided

[>] SequenceData initialized successfully! Here's a summary:
[>] Number of sequences: 10
[>] Number of time points: 20
[>] Min/Max sequence length: 20 / 20
[>] States: ['1', '2']
[>] Labels: ['1', '2']
[>] Weights: Not provided

[>] SequenceData initialized successfully! Here's a summary:
[>] Number of sequences: 10
[>] Number of time points: 20
[>] Min/Max sequence length: 20 / 20
[>] States: ['1', '2']
[>] Labels: ['1', '2']
[>] Weights: Not provided

[>] SequenceData initialized successfully! Here's a summary:
[>] Number of sequence

In [4]:
print(D_sequenzo)
print("\n")

# Load D_traminer before printing using custom unpickler for NumPy 2.x compatibility
with open("dist_om_traminer.pkl", "rb") as f:
    unpickler = NumpyCompatibilityUnpickler(f)
    D_traminer = unpickler.load()

print(D_traminer)

[[  0.         555.77818054 586.72673655 553.74846099 568.52964939
  526.54228876 596.48859341 543.23468468 539.0900539  589.1803452 ]
 [555.77818054   0.         557.76463399 483.59999173 563.70423285
  572.2479531  568.71981687 553.80778232 518.3099587  589.57718507]
 [586.72673655 557.76463399   0.         534.92522232 592.28786691
  572.90058341 571.37720228 549.55577545 535.37334475 571.66094807]
 [553.74846099 483.59999173 534.92522232   0.         571.4236996
  551.97031163 575.54900946 559.15333652 491.14717575 543.11487488]
 [568.52964939 563.70423285 592.28786691 571.4236996    0.
  471.78133794 561.05819983 598.52648706 581.1359417  546.28426539]
 [526.54228876 572.2479531  572.90058341 551.97031163 471.78133794
    0.         560.49836026 571.45692914 555.26504848 568.81431164]
 [596.48859341 568.71981687 571.37720228 575.54900946 561.05819983
  560.49836026   0.         611.80871363 547.98268066 540.72987218]
 [543.23468468 553.80778232 549.55577545 559.15333652 598.526487

In [5]:
with open("dist_om_traminer.pkl", "rb") as f : 
    unpickler = NumpyCompatibilityUnpickler(f)
    D_traminer = unpickler.load()
    
def upper_triangle_flat(M):
    """Retourne les valeurs du triangle sup√©rieur strict sous forme 1D."""
    return M[np.triu_indices_from(M, k=1)]

v1 = upper_triangle_flat(D_sequenzo)
v2 = upper_triangle_flat(D_traminer)


corr = np.corrcoef(v1, v2)[0, 1]

print(f"[corr] Pearson correlation between Sequenzo and Traminer = {corr:.6f}")

[corr] Pearson correlation between Sequenzo and Traminer = 0.986345
