In [19]:
import os
import numpy as np
from tqdm.auto import tqdm

# 두 디렉터리 경로 설정
cpu_result_folder = "C:/Users/01sun/source/repos/raspberrypi/trace2/alignment_and_drop_cpu"
gpu_result_folder = "C:/Users/01sun/source/repos/raspberrypi/trace2/alignment_and_drop_GPU"
# 비교 허용 오차 설정
cpu_files = sorted([f for f in os.listdir(cpu_result_folder) if f.startswith("alignTrace")])
gpu_files = sorted([f for f in os.listdir(gpu_result_folder) if f.startswith("alignTrace")])
# 첫 10개 파일만 출력
print("CPU 파일 리스트 (처음 10개):", cpu_files[0])
print("GPU 파일 리스트 (처음 10개):", gpu_files[0])
# 폴더 내 파일 개수 출력
print(f"CPU 결과 파일 개수: {len(cpu_files)}")
print(f"GPU 결과 파일 개수: {len(gpu_files)}")

# 파일 개수 불일치 여부 확인
if len(cpu_files) != len(gpu_files):
    print("경고: CPU와 GPU 결과 폴더의 파일 개수가 다릅니다!")

# 결과 비교
same_count = 0
different_count = 0
different_files = []

for cpu_file, gpu_file in tqdm(zip(cpu_files, gpu_files), total=len(cpu_files)):
    cpu_path = os.path.join(cpu_result_folder, cpu_file)
    gpu_path = os.path.join(gpu_result_folder, gpu_file)

    cpu_trace = np.load(cpu_path)
    gpu_trace = np.load(gpu_path)
    
    if not np.array_equal(cpu_trace, gpu_trace):
        print(f"Different file name: {cpu_file}, {gpu_file}")
        different_files.append(gpu_file)
        different_count +=1
    else:
        same_count += 1
# 비교 결과 출력
print(f"결과가 같은 파일 개수: {same_count}")
print(f"결과가 다른 파일 개수: {different_count}")

CPU 파일 리스트 (처음 10개): alignTrace100000.npy
GPU 파일 리스트 (처음 10개): alignTrace100000.npy
CPU 결과 파일 개수: 151318
GPU 결과 파일 개수: 151318


  0%|          | 0/151318 [00:00<?, ?it/s]

Different file name: alignTrace433530.npy, alignTrace433530.npy
Different file name: alignTrace609147.npy, alignTrace609147.npy
Different file name: alignTrace869600.npy, alignTrace869600.npy
결과가 같은 파일 개수: 151315
결과가 다른 파일 개수: 3


In [None]:
import os
import math
import cusignal
import cupy as cp
import numpy as np
from scipy import signal
from tqdm.auto import tqdm

samplingFrequency = int(5e9)  # unit: Hz (set this from the setting of oscilloscope)
windowLength      = 1000      # The number of samples in the window.
noverlap          = None  

filterOrder  = int(1e2 - 1)            # Length of the filter (number of coefficients, i.e. the filter order + 1). numtaps must be odd if a passband includes the Nyquist frequency
filterWidth  = None                    # If width is not None, then assume it is the approximate width of the transition region (expressed in the same units as fs) for use in Kaiser FIR filter design. In this case, the window argument is ignored.
bandPassFreq = [int(1e7), int(1e8)]#, int(895e6), int(905e6), int(995e6), int(1005e6)]#[int(15e6), int(25e6)]  # Cutoff frequency of filter (expressed in the same units as fs) OR an array of cutoff frequencies (that is, band edges). In the latter case, the frequencies in cutoff should be positive and monotonically increasing between 0 and fs/2. The values 0 and fs/2 must not be included in cutoff.

referenceTraceIndex = 301
referenceTraceXrange = [13000, 26500]# [9500, 44500] #[39000, 44900] #[42900, 44900]
correlationCriterion = 0.65           # drop the trace whose maximum of the correlation between the trace and reference trace is under that
traceCutRange = [7000, 30000]

MAwindowSize = 500
jump         = 1
bound        = 500

firCoeff = signal.firwin(numtaps=filterOrder, cutoff=bandPassFreq, width=filterWidth, window='hamming', fs=samplingFrequency, pass_zero=False)

gpu_firCoeff = cusignal.firwin(numtaps=filterOrder, cutoff=bandPassFreq, width=filterWidth, window='hamming', fs=samplingFrequency, pass_zero=False)
gpu_firCoeff = cp.asarray(gpu_firCoeff)

In [None]:
def getCorrs(trace, refTrace, refTraceSum, refTraceSquSum, bound):
    squTrace = np.square(trace)
    traceLen = refTrace.shape[0]
    # Phase 1
    maxCorr      = 0
    maxCorrIndex = 0
    for pointIndex in range(bound, trace.shape[0] - refTrace.shape[0] - bound):
        traceSum    = np.sum(trace[pointIndex:pointIndex+traceLen])
        traceSquSum = np.sum(squTrace[pointIndex:pointIndex+traceLen])
        corr = ((traceLen *  np.sum(trace[pointIndex:pointIndex+traceLen] * refTrace) - traceSum * refTraceSum) / (math.sqrt(traceLen * traceSquSum - traceSum ** 2) * math.sqrt(traceLen * refTraceSquSum - refTraceSum ** 2)))
        if maxCorr < corr:
            maxCorr = corr
            maxCorrIndex = pointIndex
    return maxCorr, maxCorrIndex

def getCorrs_gpu_batch(gpu_traces, gpu_refTrace, gpu_refTraceSum, gpu_refTraceSquSum, traceLen, bound):
    num_traces, trace_length = gpu_traces.shape
    maxCorrs = cp.zeros(num_traces)
    maxCorrIndices = cp.zeros(num_traces, dtype=cp.int32)

    gpu_refTrace = gpu_refTrace.reshape(1, -1)  # Broadcastable shape

    for pointIndex in tqdm(range(bound, trace_length - traceLen - bound)):
        gpu_segments = gpu_traces[:, pointIndex:pointIndex + traceLen]
        traceSums = cp.sum(gpu_segments, axis=1)
        traceSquSums = cp.sum(cp.square(gpu_segments), axis=1)
        corrs = ((traceLen * cp.sum(gpu_segments * gpu_refTrace, axis=1) - traceSums * gpu_refTraceSum) /
                 (cp.sqrt(traceLen * traceSquSums - traceSums ** 2) *
                  cp.sqrt(traceLen * gpu_refTraceSquSum - gpu_refTraceSum ** 2)))

        update_mask = corrs > maxCorrs
        maxCorrs = cp.where(update_mask, corrs, maxCorrs)
        maxCorrIndices = cp.where(update_mask, pointIndex, maxCorrIndices)

    return cp.asnumpy(maxCorrs), cp.asnumpy(maxCorrIndices)

# Filter check

## parameter

In [None]:
check_trace = ["alignTrace433530.npy", 
               "alignTrace609147.npy", 
               "alignTrace869600.npy"]

In [None]:
tracePath = 'C:/Users/01sun/source/repos/raspberrypi/trace2'

for trace_name in tqdm(check_trace, total=len(check_trace)):
    trace = np.load('{}/{}'.format(tracePath, trace_name))
    gpu_trace = cp.asarray(trace)
    filteredTrace = np.array(signal.filtfilt(b=firCoeff, a=1.0, x=trace), dtype=np.float32)
    filtered_gpu_trace = cusignal.filtfilt(gpu_firCoeff, 1.0, gpu_trace)