# Libraries

In [None]:
import os
import math
import numpy as np
from scipy import signal
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 12

axisFontSize = 14
titleFontSize = 20

# Parameters

## Data Path

In [None]:
tracePath = 'C:/Users/01sun/source/repos/raspberrypi/trace_20250124/alignment_and_drop_GPU_20250203/TraceMerge'
cipherName = 'cipher.npy'
traceName = 'filtered_Trace.npy'

In [None]:
referenceTraceIndex = 18316
referenceTraceXrange = [10000, 20000]# [9500, 44500] #[39000, 44900] #[42900, 44900]
alignRange = [8000, 22000]
bound                = [0, 800]

bandPassFreqs = [
                    '',
                    #[int( 50e6), int(150e6)],
                    #[int( 10e6), int(300e6)],
                    #[int(  1e7), int(1e8)]
               ]

## Result

In [None]:
resultFolderName = '{}/realignment'.format(tracePath)
os.makedirs(resultFolderName, exist_ok=True)
print(resultFolderName)

with open('{}/parameters.txt'.format(resultFolderName), 'w') as fp:
    fp.write('tracePath: {}\n'.format(tracePath))
    fp.write('cipherName: {}\n'.format(cipherName))    
    fp.write('alignRange: {}\n'.format(alignRange))    
    fp.write('bandPassFreqs: {}\n'.format(bandPassFreqs))    
    fp.write('referenceTraceIndex: {}\n'.format(referenceTraceIndex))    
    fp.write('referenceTraceXrange: {}\n'.format(referenceTraceXrange))    
    fp.write('bound: {}\n'.format(bound)) 

# Test Reference Trace

In [None]:
def getCorrs(trace, refTrace, refTraceSum, refTraceSquSum, bound):
    squTrace = np.square(trace)
    traceLen = refTrace.shape[0]

    corrs = []
    maxCorr      = 0
    maxCorrIndex = 0
    for pointIndex in range(bound[0], trace.shape[0] - refTrace.shape[0] - bound[1]):
        traceSum    = np.sum(trace[pointIndex:pointIndex+traceLen])
        traceSquSum = np.sum(squTrace[pointIndex:pointIndex+traceLen])
        corr = (traceLen *  np.sum(trace[pointIndex:pointIndex+traceLen] * refTrace) - traceSum * refTraceSum) / (math.sqrt(traceLen * traceSquSum - traceSum ** 2) * math.sqrt(traceLen * refTraceSquSum - refTraceSum ** 2))
        corrs.append(corr)
        if maxCorr < corr:
            maxCorr = corr
            maxCorrIndex = pointIndex
            
    return maxCorr, maxCorrIndex, corrs

In [None]:
criterion = np.zeros(len(bandPassFreqs))
for bandPassIndex, bandPassFreq in tqdm(enumerate(bandPassFreqs)):
    if len(bandPassFreq) == 0:
        trace0 = np.load('{}/filtered_trace.npy'.format(tracePath))
        trace1 = np.load('{}/filtered_trace.npy'.format(tracePath))
    else:
        trace0 = np.load('{}/trace_{}.npy'.format(tracePath, bandPassFreq))
    print(bandPassFreq)
    
    plt.figure(figsize=(15, 8))
    plt.subplot(2, 1, 1)
    plt.title('Original Trace')
    plt.plot(trace0[:10].T, alpha=0.7, linewidth=0.3)
    #plt.vlines(alignRange[0], np.min(trace0), np.max(trace0), color='r', alpha=0.5)
    plt.vlines(alignRange[1], np.min(trace0), np.max(trace0), color='r', alpha=0.5)
    plt.xlim(0, trace0.shape[1]-1)
    plt.subplot(2, 1, 2)
    plt.plot(trace1[:10].T, alpha=0.7, linewidth=0.3)
    plt.xlim(alignRange[0], alignRange[1])
    plt.savefig('{}/AlignResult_{}.png'.format(resultFolderName, bandPassFreq), dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
criterion = np.zeros(len(bandPassFreqs))
for bandPassIndex, bandPassFreq in tqdm(enumerate(bandPassFreqs)):
    if len(bandPassFreq) == 0:
        trace = np.load('{}/filtered_trace.npy'.format(tracePath))
    else:
        trace = np.load('{}/trace_{}.npy'.format(tracePath, bandPassFreq))
    print(bandPassFreq)
    
    plt.figure(figsize=(15, 8))
    plt.subplot(2, 1, 1)
    plt.title('Original Trace')
    plt.plot(trace[:10].T, alpha=0.7, linewidth=0.3)
    plt.vlines(alignRange[0], np.min(trace), np.max(trace), color='r', alpha=0.5)
    plt.vlines(alignRange[1], np.min(trace), np.max(trace), color='r', alpha=0.5)
    plt.xlim(0, trace.shape[1]-1)
    plt.subplot(2, 1, 2)
    plt.plot(trace[:10].T, alpha=0.7, linewidth=0.3)
    plt.xlim(alignRange[0], alignRange[1])
    plt.savefig('{}/AlignResult_{}_{}_{}.png'.format(resultFolderName, bandPassFreq, traceIndex, maxCorr >= criterion[bandPassIndex]), dpi=300, bbox_inches='tight')
    plt.show()
    
    refTrace = trace[referenceTraceIndex, referenceTraceXrange[0]:referenceTraceXrange[1]]
    refTraceSum     = np.sum(refTrace)
    refTraceSquSum  = np.sum(np.square(refTrace))
    
    plt.figure(figsize=(15, 8))
    plt.subplot(2, 1, 1)
    plt.title('Reference Trace')
    plt.plot(trace[referenceTraceIndex], linewidth=0.3)
    plt.vlines(referenceTraceXrange[0], np.min(refTrace), np.max(refTrace), color='r', alpha=0.5)
    plt.vlines(referenceTraceXrange[1], np.min(refTrace), np.max(refTrace), color='r', alpha=0.5)
    plt.xlim(0, trace.shape[1]-1)
    plt.subplot(2, 1, 2)
    plt.plot(refTrace, linewidth=0.3)
    plt.xlim(0, refTrace.shape[0]-1)
    plt.show()
    
    
    maxCorrs = []
    for traceIndex in tqdm(range(20)):
        maxCorr, maxCorrIndex, corrs = getCorrs(trace[traceIndex, alignRange[0]:alignRange[1]], refTrace, refTraceSum, refTraceSquSum, bound)
        maxCorrs.append(maxCorr)
        print('Trace {:2d}\tCorrelation: {:.3f}\t(point: {:7d})\tOver criterion: {}'.format(traceIndex, maxCorr, maxCorrIndex, maxCorr >= criterion[bandPassIndex]))

        plt.figure(figsize=(15, 10))
        plt.subplot(4, 1, 1)
        plt.title('Original Signal', fontsize=titleFontSize)
        plt.plot(trace[traceIndex], linewidth=0.5)
        plt.xlim(0, trace.shape[1]-1)
        plt.ylabel('Voltage (V)', fontsize=axisFontSize)
        plt.vlines(alignRange[0], np.min(refTrace), np.max(refTrace), color='b', alpha=0.5)
        plt.vlines(alignRange[1], np.min(refTrace), np.max(refTrace), color='b', alpha=0.5)
        
        plt.vlines(alignRange[0]+maxCorrIndex,                   np.min(refTrace), np.max(refTrace), color='r', alpha=0.5)
        plt.vlines(alignRange[0]+maxCorrIndex+refTrace.shape[0], np.min(refTrace), np.max(refTrace), color='r', alpha=0.5)
        
        plt.subplot(4, 1, 2)
        plt.title('Overlap', fontsize=titleFontSize)
        plt.plot(trace[traceIndex], linewidth=0.5)
        plt.plot(np.arange(alignRange[0]+maxCorrIndex, alignRange[0]+maxCorrIndex+refTrace.shape[0]), refTrace, linewidth=0.5, alpha=0.7)
        plt.xlim(alignRange[0], alignRange[1])
        plt.ylabel('Voltage (V)', fontsize=axisFontSize)

        plt.subplot(4, 1, 3)
        plt.title('Overlap zoom', fontsize=titleFontSize)
        plt.plot(trace[traceIndex], linewidth=0.5)
        plt.plot(np.arange(alignRange[0]+maxCorrIndex, alignRange[0]+maxCorrIndex+refTrace.shape[0]), refTrace, linewidth=0.5, alpha=0.7)
        plt.xlim(alignRange[0]+maxCorrIndex, alignRange[0]+maxCorrIndex+refTrace.shape[0])

        plt.subplot(4, 1, 4)
        plt.title('Correlation', fontsize=titleFontSize)
        plt.plot(corrs)
        plt.xlim(0, len(corrs)-1)

        plt.xlabel('Time (point)', fontsize=axisFontSize)
        plt.tight_layout()
        plt.savefig('{}/criterionTest_{}_{}_{}.png'.format(resultFolderName, bandPassFreq, traceIndex, maxCorr >= criterion[bandPassIndex]), dpi=300, bbox_inches='tight')
        plt.show()
    maxCorrs = np.array(maxCorrs)
    print('Maximum of correlation\'s mean: {} ({:.2f}%), median: {}, lower quantile: {}'.format(np.mean(maxCorrs), np.sum(maxCorrs >= np.mean(maxCorrs)) / len(maxCorrs) * 100, np.median(maxCorrs),  np.quantile(maxCorrs, 0.25))) 

# Alignment

In [None]:
def getCorrs(trace, refTrace, refTraceSum, refTraceSquSum, bound):
    squTrace = np.square(trace)
    traceLen = refTrace.shape[0]

    maxCorr      = 0
    maxCorrIndex = 0
    for pointIndex in range(bound[0], trace.shape[0] - refTrace.shape[0] - bound[1]):
        traceSum    = np.sum(trace[pointIndex:pointIndex+traceLen])
        traceSquSum = np.sum(squTrace[pointIndex:pointIndex+traceLen])
        corr = (traceLen *  np.sum(trace[pointIndex:pointIndex+traceLen] * refTrace) - traceSum * refTraceSum) / (math.sqrt(traceLen * traceSquSum - traceSum ** 2) * math.sqrt(traceLen * refTraceSquSum - refTraceSum ** 2))
        if maxCorr < corr:
            maxCorr = corr
            maxCorrIndex = pointIndex
            
    return maxCorr, maxCorrIndex

In [None]:
for bandPassIndex, bandPassFreq in tqdm(enumerate(bandPassFreqs)):
    filtered_trace = np.load('{}/filtered_Trace.npy'.format(tracePath))
    trace = np.load('{}/trace.npy'.format(tracePath))
    cipher = np.load('{}/cipher.npy'.format(tracePath))
    
    refTrace = filtered_trace[referenceTraceIndex, referenceTraceXrange[0]:referenceTraceXrange[1]]
    refTraceSum     = np.sum(refTrace)
    refTraceSquSum  = np.sum(np.square(refTrace))
    
    filtered_trace = filtered_trace[:, alignRange[0]:alignRange[1]]
    trace = trace[:, alignRange[0]:alignRange[1]]
    
    filtered_alignTrace  = []
    alignTrace = []
    alignPlain  = []
    alignCipher = []
    for traceIndex in tqdm(range(trace.shape[0])):
        maxCorr, maxCorrIndex = getCorrs(filtered_trace[traceIndex], refTrace, refTraceSum, refTraceSquSum, bound)
        if maxCorr >= criterion[bandPassIndex]:
            if 0 <= maxCorrIndex - bound[0] and maxCorrIndex + len(refTrace) + bound[1] < len(trace):
                filtered_alignTrace.append(filtered_trace[traceIndex, maxCorrIndex - bound[0] : maxCorrIndex + len(refTrace) + bound[1]])
                alignTrace.append(trace[traceIndex, maxCorrIndex - bound[0] : maxCorrIndex + len(refTrace) + bound[1]])
                alignCipher.append(cipher[traceIndex])

    filtered_alignTrace = np.array(filtered_alignTrace)
    alignTrace  = np.array(alignTrace)
    alignCipher = np.array(alignCipher)
    print(alignTrace.shape)

    np.save('{}/filtered_trace.npy'.format(resultFolderName), filtered_alignTrace)
    np.save('{}/trace.npy'.format(resultFolderName), alignTrace)
    np.save('{}/cipher.npy'.format(resultFolderName), alignCipher)
    
    plt.figure(figsize=(20, 7))
    plt.subplot(2, 1, 1)
    plt.title('alignTrace 100 trace Overlap')
    plt.plot(alignTrace[:100].T, alpha=0.2)
    plt.xlim(0, alignTrace.shape[1])
    plt.ylabel('Voltage (V)', fontsize=axisFontSize)
    plt.xlabel('Time', fontsize=axisFontSize)
    plt.tight_layout()

    plt.subplot(2, 1, 2)
    plt.title('align_filtered_Trace 100 trace Overlap')
    plt.plot(filtered_alignTrace[:100].T, alpha=0.2)
    plt.xlim(0, filtered_alignTrace.shape[1])
    plt.ylabel('Voltage (V)', fontsize=axisFontSize)
    plt.xlabel('Time', fontsize=axisFontSize)
    plt.tight_layout()
    plt.savefig('{}/{}_trace_overlap.png'.format(resultFolderName, bandPassFreq), dpi=300, bbox_inches='tight')
    plt.show()

# GPU Alignment

In [None]:
import gc
import cupy as cp
import cusignal
from concurrent.futures import ThreadPoolExecutor

In [None]:
def check_gpu_memory():
    mem_info = cp.cuda.runtime.memGetInfo()
    free_mem = mem_info[0] / (1024 ** 2)  # Free memory in MB
    total_mem = mem_info[1] / (1024 ** 2)  # Total memory in MB
    print(f"GPU Memory - Free: {free_mem:.2f} MB, Total: {total_mem:.2f} MB")
    return free_mem, total_mem

def calculate_chunk_size(free_mem_mb, trace_length, ref_length, float32_size=4, mem_usage_ratio=0.5):
    # Usable memory after reserving for overhead
    usable_mem_mb = free_mem_mb * mem_usage_ratio
    if usable_mem_mb <= 0:
        raise ValueError("Insufficient GPU memory available for processing.")

    usable_mem_bytes = usable_mem_mb * (1024 ** 2)
    
    # Memory required for one trace and filtering
    single_trace_mem = trace_length * float32_size
    ref_trace_mem = ref_length * float32_size
    filtering_overhead_mem = single_trace_mem  # 필터링 중 추가 메모리 요구량

    # Total memory for filtering a batch
    overhead_mem = ref_trace_mem * 3  # Reference trace and its sums
    memory_per_trace = single_trace_mem + filtering_overhead_mem

    # Remaining memory for traces
    available_mem = usable_mem_bytes - overhead_mem
    if available_mem <= 0:
        raise ValueError("Not enough memory for even a single trace.")

    # Calculate maximum chunk size
    chunk_size = int(available_mem / memory_per_trace)
    print(f"Adjusted chunk size: {chunk_size} traces (usable GPU memory: {usable_mem_mb:.2f} MB)")
    return max(chunk_size, 1)


def getCorrs_gpu_batch(gpu_traces, gpu_refTrace, gpu_refTraceSum, gpu_refTraceSquSum, bound):
    num_traces, trace_length = gpu_traces.shape
    maxCorrs = cp.zeros(num_traces, dtype=cp.float32)
    maxCorrIndices = cp.zeros(num_traces, dtype=cp.int32)

    gpu_refTrace = gpu_refTrace.reshape(1, -1)  # Broadcastable shape
    traceLen = len(gpu_traces[0])

    for pointIndex in tqdm(range(bound[0], trace_length - traceLen - bound[1])):
        traceSums = cp.sum(gpu_traces, axis=1)
        traceSquSums = cp.sum(cp.square(gpu_traces), axis=1)
        corrs = ((traceLen * cp.sum(gpu_traces * gpu_refTrace, axis=1) - traceSums * gpu_refTraceSum) /
                 (cp.sqrt(traceLen * traceSquSums - traceSums ** 2) *
                  cp.sqrt(traceLen * gpu_refTraceSquSum - gpu_refTraceSum ** 2)))

        update_mask = corrs > maxCorrs
        maxCorrs = cp.where(update_mask, corrs, maxCorrs)
        maxCorrIndices = cp.where(update_mask, pointIndex, maxCorrIndices)

    return cp.asnumpy(maxCorrs), cp.asnumpy(maxCorrIndices)

In [None]:
criterion = np.zeros(len(bandPassFreqs))
for bandPassIndex, bandPassFreq in tqdm(enumerate(bandPassFreqs), desc="Processing BandPass Frequencies"):
    # 데이터 로드
    filtered_trace = np.load(f"{tracePath}/filtered_trace.npy")
    trace = np.load(f"{tracePath}/trace.npy")
    cipher = np.load(f"{tracePath}/{cipherName}")

    # 참조 트레이스 정의
    refTrace = filtered_trace[referenceTraceIndex, referenceTraceXrange[0]:referenceTraceXrange[1]]
    refTraceSum = np.sum(refTrace)
    refTraceSquSum = np.sum(np.square(refTrace))

    # GPU로 데이터 이동
    gpu_refTrace = cp.asarray(refTrace)
    gpu_refTraceSum = cp.asarray(refTraceSum)
    gpu_refTraceSquSum = cp.asarray(refTraceSquSum)

    # GPU 메모리 점검
    free_mem, total_mem = check_gpu_memory()

    # 최적의 배치 크기 결정
    trace_length = alignRange[1] - alignRange[0]
    ref_length = refTrace.shape[0]
    chunk_size = calculate_chunk_size(free_mem, trace_length, ref_length)

    # 트레이스 정렬 범위 설정
    filtered_trace = filtered_trace[:, alignRange[0]:alignRange[1]]
    trace = trace[:, alignRange[0]:alignRange[1]]

    # 결과 저장 리스트
    filtered_alignTrace = []
    alignTrace = []
    alignCipher = []

    # 데이터 병렬 처리
    chunk_indices = list(range(0, trace.shape[0], chunk_size))

    for start_index in tqdm(chunk_indices, desc="Processing Chunks"):
        end_index = min(start_index + chunk_size, trace.shape[0])
        gpu_traces = cp.asarray(filtered_trace[start_index:end_index])

        # GPU에서 상관 관계 계산
        maxCorrs, maxCorrIndices = getCorrs_gpu_batch(
            gpu_traces, gpu_refTrace, gpu_refTraceSum, gpu_refTraceSquSum, bound
        )

        # 결과 정렬 및 필터링
        for i, (maxCorr, maxCorrIndex) in enumerate(zip(maxCorrs, maxCorrIndices), start=start_index):
            if maxCorr >= criterion[bandPassIndex]:
                if 0 <= maxCorrIndex - bound[0] and maxCorrIndex + len(refTrace) + bound[1] < trace.shape[1]:
                    filtered_alignTrace.append(filtered_trace[i, maxCorrIndex - bound[0]: maxCorrIndex + len(refTrace) + bound[1]])
                    alignTrace.append(trace[i, maxCorrIndex - bound[0]: maxCorrIndex + len(refTrace) + bound[1]])
                    alignCipher.append(cipher[i])

        # GPU 메모리 정리
        del gpu_traces
        cp.get_default_memory_pool().free_all_blocks()
        gc.collect()

    # NumPy 배열 변환
    filtered_alignTrace = np.array(filtered_alignTrace)
    alignTrace = np.array(alignTrace)
    alignCipher = np.array(alignCipher)

    print("Aligned Trace Shape:", alignTrace.shape)

    # 결과 저장
    np.save(f"{resultFolderName}/filtered_trace.npy", filtered_alignTrace)
    np.save(f"{resultFolderName}/trace.npy", alignTrace)
    np.save(f"{resultFolderName}/cipher.npy", alignCipher)

    # 시각화
    plt.figure(figsize=(20, 7))

    plt.subplot(2, 1, 1)
    plt.title("Aligned Trace - 100 Trace Overlap")
    plt.plot(alignTrace[:100].T, alpha=0.2)
    plt.xlim(0, alignTrace.shape[1])
    plt.ylabel("Voltage (V)", fontsize=14)
    plt.xlabel("Time", fontsize=14)
    plt.tight_layout()

    plt.subplot(2, 1, 2)
    plt.title("Aligned Filtered Trace - 100 Trace Overlap")
    plt.plot(filtered_alignTrace[:100].T, alpha=0.2)
    plt.xlim(0, filtered_alignTrace.shape[1])
    plt.ylabel("Voltage (V)", fontsize=14)
    plt.xlabel("Time", fontsize=14)
    plt.tight_layout()

    plt.savefig(f"{resultFolderName}/{bandPassFreq}_trace_overlap.png", dpi=300, bbox_inches="tight")
    plt.show()