1. 원본 코드는 MuxConv 패키지와 FHE_simulator 패키지를 참조해주세요
2. 여기서는 연산과 관련없는 부분 (디버그 메세지 등)은 최대한 지운 상태입니다.
3. 코멘트에 `FHE 연산` 이라고 표시된 부분 외에는 FHE 연산이 없고, 전/후에 필요한 일반 변수는 모두 미리 계산 가능합니다.
4. 조금 아래에 '본격적인 FHE 연산' 부분 부터 자세히 보시면 됩니다.
5. 각 FHE 연산마다 대응되는 HEAAN 기준 함수를 적어두었습니다. 
6. 최적화에 적당한 몇가지 패턴이 발견됩니다. 

<최적화에 적합할 것 같은 부분>
1. function_poly  
이 함수는 원래 HEAAN에서 작성되어있는 함수인데, 여러번 곱하기가 반복됩니다.
2. summation  
하나의 CTXT의 모든 숫자를 더하는 함수입니다. log2(CTXT 길이)만큼의 rotation과 add가 반복되는 패턴입니다.
3. rotate -> add  
summation의 내부 함수이기도 하며, 마지막 AVGPool과 Linear 레이어에서 특히 많이 사용됩니다.
4. MultByVec -> rescale -> add  
Convolution 계산에서 많이 나타납니다. 전/후에 다른 input/output이 없으며, CTXT 두 개에 대한 계산입니다.

In [None]:
import matplotlib.pyplot as plt 
import numpy as np
import hemul
hemul.USE_FPGA=False
from hemul import heaan
from muxcnn.resnet_HEAAN import ResNetHEAAN
from muxcnn.utils import get_channel_last
from muxcnn.utils import load_img, decrypt_result
import struct

# 1. 원본 Pytorch 모델 

Pytorch 부분. FHE와 무관

In [6]:
import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch import nn
import torch.nn.functional as F
from muxcnn.models.ResNet20 import ResNet, BasicBlock
from muxcnn.utils import load_params

num_workers = 0
batch_size = 32
valid_size = 0.2

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

train_data = datasets.CIFAR10('data', train=True,
                              download=True, transform=train_transform)
test_data = datasets.CIFAR10('data', train=False,
                             download=True, transform=test_transform)

num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, 
    sampler=valid_sampler, num_workers=num_workers)

# CIFAR-10 classes
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck']

model = ResNet(BasicBlock,[1,1,1])
model.eval() 

# Load trained parameters
load_params(model, fn_param="./ResNet8.pt",device='cpu')

Files already downloaded and verified
Files already downloaded and verified


## FHE Setup

In [5]:
logp = 30
logq = 800
logn = 15

# Rotation 미리 준비 
rot_l = [2**i for i in range(15)]
rot_l = rot_l + [2**15-1, 
                 2**15-33, 2**15-32, 2**15-31,
                 2**15-17, 2**15-16, 2**15-15, 
                 2**15-9,2**15-8, 2**15-7] + [3,5,7,9,15,17, 31, 33]

hec = heaan.HEAANContext(logn, logp, logq, load_keys=True, rot_l=rot_l)

Initializing the scheme as the data owner
Loading a secret key from:  ./SecretKey.txt
loading secret key done.
HEAAN CKKS setup is ready 


## FHE ResNet 모델 셋업

In [7]:
fhemodel = ResNetHEAAN(model, hec, alpha=14, min_depth=True, debug=False)

## Input image

32 * 32 * 3 * 8bit  
int8 -> double로 변환 후 사용  (CKKS는 double만 사용하므로)

In [24]:
img_tensor, label = next(iter(valid_loader))

print(img_tensor.data.size())
print(f"Total size: {np.product(img_tensor.data.size())} bit")

torch.Size([32, 3, 32, 32])
Total size: 98304 bit


## 1개의 암호문으로 packing

logn = 15 사용.

size(ctxt) = `2 * 2**15 * size(zz)`

In [None]:
# CTXT 1개를 1회 Encrypt
ctxt = fhemodel.pack_img_ctxt(img_tensor[:1,:,:,:])

# 본격적인 FHE 연산 

In [42]:
# fhemodel(ctxt) == fhemodel.forward(ctxt)
def forward(self, ctxt, ki=1, hi=32, wi=32, debug=None, verbose=True):
    model = self.torch_model
    # Step 1
    ctxt, outs0 = self.forward_early(ctxt, ki, hi, wi, debug=debug)
    # Step 2
    ctxt, outs1 = self.forward_bb(model.layer1[0], ctxt, outs0, debug=debug)
    ctxt, outs2 = self.forward_bb(model.layer2[0], ctxt, outs1, debug=debug)
    ctxt, outs3 = self.forward_bb(model.layer3[0], ctxt, outs2, debug=debug)
    # Step 3
    ctxt = self.AVGPool(ctxt, outs3, self.nslots) # Gloval pooling
    # Step 4
    result = self.forward_linear(ctxt, model.linear)
    return result

# Step 1
def forward_early(self, ct_a, ki, hi, wi, debug=False, verbose=True):
    model = self.torch_model
    _, ins0, outs0 = get_conv_params(model.conv1, {'k':ki, 'h':hi, 'w':wi})
    
    # Step 1-1
    ctxt = self.forward_convbn_par_fhe(model.conv1, model.bn1, ct_a, ins0)
    
    # Step 1-2
    ctxt = self.activation(ctxt)
    return ctxt, outs0 

# Step 1-1
def forward_convbn_par_fhe(self, cnn_layer, bn_layer, ctx, ins, kernels=[3,3]):
    U, ins, outs = get_conv_params(cnn_layer, ins)
    return self.MultParConvBN_fhe(ctx, U, bn_layer, ins, outs, kernels)

### Step 1-1-1. 중요 함수 1. (Conv + BN)
Multiplication depth 2.  

In [None]:
def MultParConvBN_fhe(self, ct_a, U, bn_layer, ins:Dict, outs:Dict,
                    kernels=[3,3],
                    nslots=2**15, 
                    scale_factor=1, debug=False):
    """Consumes two mults"""
    if ct_a.logq <= 80:
        ct_a = self.hec.bootstrap2(ct_a)
    ev = self.hec

    hi,wi,ci,ki,ti,pi = [ins[k] for k in ins.keys()]
    ho,wo,co,ko,to,po = [outs[k] for k in outs.keys()]
    q = get_q(co,pi)
    fh,fw= kernels[0],kernels[1]

    # 미리 계산 가능
    MuxBN_C, MuxBN_M, MuxBN_I = parMuxBN(bn_layer, outs, nslots)

    # FHE 연산 1 - 빈 Ciphertext 생성 
    # HEAAN.Ciphertext.Ciphertext(long, long, long) -> CTXT
    ct_d = self.gen_new_ctxt() 

    # FHE 연산 2 - mod switch
    # HEAAN.Scheme::modDownByAndEqual(CTXT) -> Void
    ev.modDownTo(ct_d, ct_a.logq - 2*ct_d.logp)
    
    ct = []
    nrots=0
    # fh, fw는 컨볼루션 커널 크기 = 3 x 3 
    # 9회의 rotation 수행
    
    # 임시로 9개의 CTXT 생성
    for i1 in range(fh):
        temp = []
        for i2 in range(fw):
            lrots = int((-(ki**2)*wi*(i1-(fh-1)/2) - ki*(i2-(fw-1)/2)))
            # rotation 크기 lrots 미리 계산 가능. 
            # ct_a CTXT를 회전한 복사본 저장
            # FHE 연산 3 - rotation
            # HEAAN.Scheme::leftRotateFast(CTXT, long) -> CTXT
            temp.append(ev.lrot(ct_a, -lrots, inplace=False))
            if lrots!=0:
                nrots = nrots+ 1#____________________________________ROTATION
        ct.append(temp)

    # q는 대략 한 자릿수 정수 (레이어 순서마다 조금씩 다름)
    for i3 in range(q):
        # FHE 연산 4 - 빈 CTXT 생성
        ct_b = self.gen_new_ctxt() ####
        
        # FHE 연산 5 - modswitch
        # HEAAN.Scheme::modDownByAndEqual(CTXT) -> Void
        ev.modDownTo(ct_b, ct[0][0].logq - ct_b.logp)

        # fh, fw는 컨볼루션 커널 크기 = 3 x 3 
        # 9회 반복
        for i1 in range(fh):
            for i2 in range(fw):
                # 미리 계산 가능
                w = ParMultWgt(U,i1,i2,i3,ins,co,kernels,nslots)
                
                # FHE 연산 6 - Ctxt * Ptxt 
                # 둘 다 같은 크기 (2 * 2**15 * size(ZZ))
                # HEAAN.Scheme::multByConstVec(CTXT, PTXT) -> CTXT
                tmp = ev.multByVec(ct[i1][i2], w, inplace=False)
                
                # FHE 연산 7 - rescale 
                # HEAAN.Scheme::reScaleByAndEqual(CTXT) -> Void
                ev.rescale(tmp)
                
                # FHE 연산 8 - Ctxt + Ctxt
                # 둘 다 같은 크기 (2 * 2**15 * size(ZZ))
                # HEAAN.Scheme::addAndEqual(CTXT1, CTXT2) -> Void
                ev.add(ct_b, tmp, inplace=True) ####

        # FHE 연산 9 (rotation + add)
        # ct_b, ct_c는 CTXT, 나머지 argument는 미리 계산 가능한 int
        # 아래 SumSlots 함수 참조
        ct_c,nrots0 = self.SumSlots(ct_b, ki,              1)
        ct_c,nrots1 = self.SumSlots(ct_c, ki,          ki*wi)
        ct_c,nrots2 = self.SumSlots(ct_c, ti,  (ki**2)*hi*wi)
        nrots += nrots0 + nrots1 + nrots2#____________________________________ROTATION

        # Layer마다 다르며, 대략 10회 정도 반복
        for i4 in range(0,min(pi,co-pi*i3)):
            i = pi*i3 +i4
            r0 = int(np.floor(nslots/pi))*(i%pi)
            r1 = int(np.floor(i/(ko**2)))*ko**2*ho*wo
            r2 = int(np.floor((i%(ko**2))/ko))*ko*wo
            r3 = i%ko
            rrots = (-r1-r2-r3)+r0
            
            # FHE 연산 10 
            # Rotation
            # HEAAN.Scheme::leftRotateFast(CTXT, long) -> CTXT
            rolled = ev.lrot(ct_c, rrots, inplace=False)
            
            S_mp = tensor_multiplexed_selecting(ho,wo,co,ko,to,i)
            vec_S = Vec(S_mp,nslots)
            
            # FHE 연산 11 Ctxt * Ptxt 
            # 둘 다 같은 크기 (2 * 2**15 * size(ZZ))
            # HEAAN.Scheme::multByConstVec(CTXT, PTXT) -> CTXT
            tmp = ev.multByVec(rolled, vec_S * MuxBN_C, 
                                                #rolled.logp), 
                                inplace=False)
            # FHE 연산 12 - rescale 
            # HEAAN.Scheme::reScaleByAndEqual(CTXT) -> Void    
            ev.rescale(tmp)
            
            # FHE 연산 13 - Ctxt + Ctxt
            # 둘 다 같은 크기 (2 * 2**15 * size(ZZ))
            # HEAAN.Scheme::addAndEqual(CTXT1, CTXT2) -> Void
            ev.add(ct_d, tmp, inplace=True)
            
            if rrots!=0:
                nrots=nrots+1 #_________________________________________ROTATION

    
    for j in range(int(np.round(np.log2(po)))):
        r = -int(np.round(2**j*(nslots/po)))
        
        # FHE 연산 14 - add
        # HEAAN.Scheme::addAndEqual(CTXT1, CTXT2) -> Void
        ev.add(ct_d, ev.lrot(ct_d, r, inplace=False), inplace=True)
        if r !=0:
            nrots+=1

    plain_vec = -1/scale_factor*(MuxBN_C*MuxBN_M-MuxBN_I)
                                    #,ct_d.logp)
    # FHE 연산 15 - addConst
    # HEAAN.Scheme::addConstAndEqual(CTXT1, PTXT) -> Void
    ev.addConst(ct_d, plain_vec, inplace=True)

    return ct_d

In [None]:
# FHE 연산 9 (rotation + add)
def SumSlots(self, ct_a,m,p):
    """Addition only"""
    ev = self.hec
    nrots = 0
    n = int(np.floor(np.log2(m)))
    ctx_b = []
    ctx_b.append(ct_a) 
    
    # logn - 1회 반복
    for j in range(1,n+1):
        lrots = int(p*2**(j-1))
        # FHE 연산 9-1, 9-2
        # 회전 후 더하기 
        # HEAAN.Scheme::leftRotateFast(CTXT)
        # HEAAN.Scheme::addAndEqual(CTXT1, CTXT2)
        ctx_b.append(ev.add(ctx_b[j-1], 
                            ev.lrot(ctx_b[j-1], lrots, inplace=False),
                        inplace=False)) ####
        if lrots!=0:
            nrots=nrots+1 
            
    ctx_c = ctx_b[n] 
    # logn - 1회 반복
    for j in range(0,n):
        n1 = np.floor((m/(2**j))%2)
        if n1==1:
            n2 =int(np.floor((m/(2**(j+1)))%2))
            lrots = int(p*2**(j+1))*n2
            
            # FHE 연산 9-3, 9-4
            # 회전 후 더하기 
            # HEAAN.Scheme::leftRotateFast(CTXT)
            # HEAAN.Scheme::addAndEqual(CTXT1, CTXT2)
            ev.add(ctx_c, 
                ev.lrot(ctx_b[j],lrots, inplace=False),
                inplace=True) 
            if lrots!=0:
                nrots=nrots+1
    return ctx_c,nrots

## Step 1-2

## Activation (ReLU)
정확도에 따라 다항식의 composition 횟수 다름.  
alpha = 12 기준으로 degree 15짜리 다항식 4번 사용 

In [None]:
class ApprRelu_HEAAN(ApprSign_FHE):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    
    def __call__(self, xin):
        hec = self.hec
        
        if xin.logq <= (3 * self.hec.parms.logp):
            # 필요에 따라 Bootstrapping 수행 
            # HEAAN.Scheme::bootstrapAndEqual(CTXT) -> Void
            xin = self.hec.bootstrap2(xin)
            
        # FHE 연산 16
        out = ApprSign_FHE.__call__(self, he.Ciphertext(xin))
        
        # FHE 연산 17
        # addConst
        # HEAAN.Scheme::addConstAndEqual(CTXT1, PTXT) -> Void
        tmp = hec.addConst(out, np.repeat(1, hec.parms.n), inplace=False)

        # FHE 연산 18 Ctxt * Ptxt 
        # 둘 다 같은 크기 (2 * 2**15 * size(ZZ))
        # HEAAN.Scheme::multByConstVec(CTXT, PTXT) -> CTXT
        tmp = hec.multByVec(tmp, np.repeat(1/2, hec.parms.n), inplace=False)
        
        # FHE 연산 19 - rescale 
        # HEAAN.Scheme::reScaleByAndEqual(CTXT) -> Void    
        hec.rescale(tmp)
        
        # FHE 연산 20 - mod switch
        # HEAAN.Scheme::modDownByAndEqual(CTXT) -> Void
        if xin.logq > tmp.logq:
            hec.match_mod(xin, tmp)
        elif xin.logq < tmp.logq:
            hec.match_mod(tmp, xin)
            
        # FHE 연산 21 - mult
        # HEAAN.Scheme::multAndEqual(CTXT1, CTXT2) -> Void
        hec.mult(xin, tmp, inplace=True)

        # FHE 연산 22 - rescale 
        # HEAAN.Scheme::reScaleByAndEqual(CTXT) -> Void    
        hec.rescale(xin)
        return xin
    
class ApprSign_FHE():
    def __init__(self, 
                 hec,
                alpha=12, 
                margin = 0.03, 
                eps=0.01, 
                xmin=-1,
                xmax=1,
                min_depth=True, 
                min_mult=False,
                debug=False):
        self.hec = hec
        self.alpha = alpha
        self.margin = margin
        self.eps = eps
        self.xmin = xmin
        self.xmax = xmax
        self.min_depth = min_depth
        self.min_mult = ~min_depth
        self.funs = None
        self.degrees = None
        self.debug=debug
        if self.alpha is not None:
            self._set_degree()
        if self._params_set():
            self._set_funs()

    def _params_set(self):
        return self.degrees is not None and self.margin is not None and self.eps is not None

    def _set_degree(self):
        if self.min_depth:
            self.degrees = MINIMUM_DEPTH[self.alpha]
        elif self.min_mult:
            self.degrees = MINIMUM_MULT[self.alpha]
    
    def _set_funs(self, degrees=None, xmin=None, xmax=None):
        degrees = self.degrees
        xmin = self.xmin
        xmax = self.xmax
        
        self.funs = _appr_sign_funs(degrees, xmin, xmax, 
                margin=self.margin, eps=self.eps)

    def __call__(self, xin):
        if self.funs is not None:
            
            # 15차 함수 5개 사용할 경우, 5회 반복
            for fun, deg in self.funs:
                if xin.logq <= ((1+np.ceil(np.log2(deg))) * self.hec.parms.logp):
                    # 필요에 따라 Bootstrapping 수행 
                    # HEAAN.Scheme::bootstrapAndEqual(CTXT) -> Void
                    xin = self.hec.bootstrap2(xin)

                # FHE 연산 16 - 다항식 계산 
                # HEAAN.SchemeAlgo::function_poly(CTXT, double*, long, long) -> CTXT
                # 15차 함수일 경우 xin은 15개의 double (vector)
                xin = self.hec.function_poly(fun.coef, xin)
            
            if xin.logq <= (3*self.hec.parms.logp):
                xin = self.hec.bootstrap2(xin)
            return xin
        else:
            self._set_funs()
            return self.__call__(xin)

# Step 2
# Basic Block
ResNet에서 3 회 반복되어 가장 주요한 함수 

In [None]:
def forward_bb(self, bb:ResNet20.BasicBlock, ctxt_in, outs_in, debug=False, verbose=True):
    # Bootstrap before shortcut
    if ctxt_in.logq <= 80:
        ctxt_in = self.hec.bootstrap2(ctxt_in)

    # FHE 연산 23 - 빈 Ciphertext 생성 
    # HEAAN.Ciphertext.Ciphertext(long, long, long) -> CTXT
    shortcut = he.Ciphertext(ctxt_in)

    _, ins, outs = get_conv_params(bb.conv1, outs_in)

    # FHE 연산 
    # Step 1-1과 동일
    ctxt = self.forward_convbn_par_fhe(bb.conv1,
                                    bb.bn1, ctxt_in, ins)
    # FHE 연산
    # Step 1-2와 동일
    ctxt = self.activation(ctxt)    
    
    _, ins, outs = get_conv_params(bb.conv2, outs)
    
    # FHE 연산 
    # Step 1-1과 동일
    ctxt = self.forward_convbn_par_fhe(bb.conv2,
                                    bb.bn2, ctxt, ins)

    # Shortcut
    if len(bb.shortcut) > 0:
        convl, bnl = bb.shortcut
        _, ins_, _ = get_conv_params(convl, outs_in)
        t0 = time()
        
        # FHE 연산 
        # Step 1-1과 동일
        shortcut = self.forward_convbn_par_fhe(convl, bnl, shortcut, ins_, 
                                            convl.kernel_size)

    # Add shortcut
    # FHE 연산 24 - rescale 
    # HEAAN.Scheme::reScaleByAndEqual(CTXT) -> Void    
    if ctxt.logp > shortcut.logp:
        self.hec.rescale(ctxt, shortcut.logp)
    elif ctxt.logp < shortcut.logp:
        self.hec.rescale(shortcut, ctxt.logp)

    # FHE 연산 25 - mod switch
    # HEAAN.Scheme::modDownByAndEqual(CTXT) -> Void
    if ctxt.logq > shortcut.logq:
        self.hec.match_mod(ctxt, shortcut)
    elif ctxt.logq < shortcut.logq:
        self.hec.match_mod(shortcut, ctxt)

    # FHE 연산 26 - Ctxt + Ctxt
    # HEAAN.Scheme::addAndEqual(CTXT1, CTXT2) -> Void
    self.hec.add(ctxt, shortcut, inplace=True)

    # Activation
    # FHE 연산 
    # Step 1-2와 동일
    ctxt = self.activation(ctxt)

    return ctxt, outs

# Step 3 AVGPool

In [None]:
def AVGPool(self, ct_in, ins, nslots, verbose=True):
    hec = self.hec
    
    # FHE 연산 27 - Ciphertext 복사
    # HEAAN.Ciphertext.Ciphertext(CTXT) -> CTXT
    ct_a = he.Ciphertext(ct_in)
    
    # FHE 연산 28 - 빈 Ciphertext 생성 
    # HEAAN.Ciphertext.Ciphertext(long, long, long) -> CTXT
    ct_b = self.gen_new_ctxt()
    
    hi,wi,ci,ki,ti,pi = [ins[k] for k in ins.keys()]

    # N회 반복
    for j in range(int(np.log2(wi))):
        # FHE 연산 29-1, 29-2
        # 회전 후 더하기 
        # HEAAN.Scheme::leftRotateFast(CTXT) -> CTXT
        # HEAAN.Scheme::addAndEqual(CTXT1, CTXT2)
        hec.add(ct_a, 
            hec.lrot(ct_a, 2**j*ki, inplace=False),
                    inplace=True)

    # N회 반복
    for j in range(int(np.log2(hi))):
        # FHE 연산 30-1, 30-2
        # 회전 후 더하기 
        # HEAAN.Scheme::leftRotateFast(CTXT) -> CTXT
        # HEAAN.Scheme::addAndEqual(CTXT1, CTXT2)
        hec.add(ct_a, 
            hec.lrot(ct_a, 2**j*ki*ki*wi, inplace=False),
                    inplace=True) 

    # FHE 연산 31 - mod switch
    # HEAAN.Scheme::modDownByAndEqual(CTXT) -> Void
    hec.modDownTo(ct_b, ct_a.logq - ct_a.logp)
    
    # 수십 회 반복 (64?)
    for i1 in range(ki):  
        for i2 in range(ti): 
            S_vec = select_AVG(nslots, ki*i2+i1, ki) / (hi*wi)
            
            # FHE 연산 32
            # HEAAN.Scheme::leftRotateFast(CTXT) -> CTXT
            tmp = hec.lrot(ct_a, (ki**2*hi*wi*i2 + ki*wi*i1 - ki*(ki*i2+i1)), inplace=False)
            
            # FHE 연산 33
            # HEAAN.Scheme::multByConstVec(CTXT, PTXT) -> CTXT
            hec.multByVec(tmp, S_vec, inplace=True)
            
            # FHE 연산 34
            # HEAAN.Scheme::reScaleByAndEqual(CTXT) -> Void    
            hec.rescale(tmp)
            
            # FHE 연산 35
            # HEAAN.Scheme::addAndEqual(CTXT1, CTXT2) -> Void
            hec.add(ct_b, tmp, inplace=True) 

    return ct_b


# Step4 Linear

In [None]:
def forward_linear(self, ctxt, linearl:nn.modules.Linear, verbose=True):
    hec = self.hec
    no, ni = linearl.weight.shape

    weight_vec = np.zeros(self.nslots)
    weight_vec[:no*ni] = np.ravel(linearl.weight.detach().numpy())

    
    for i in range(ceil(np.log2(no))):
        # FHE 연산 36-1, 36-2
        # 회전 후 더하기 
        # HEAAN.Scheme::leftRotateFast(CTXT)
        # HEAAN.Scheme::addAndEqual(CTXT1, CTXT2)
        hec.add(ctxt, 
            hec.lrot(ctxt, -2**i*ni, inplace=False),
                    inplace=True)

    # FHE 연산 37
    # HEAAN.Scheme::multByConstVec(CTXT, PTXT) -> Void
    hec.multByVec(ctxt, weight_vec, inplace=True)
    
    # FHE 연산 38
    # HEAAN.Scheme::reScaleByAndEqual(CTXT) -> Void
    hec.rescale(ctxt)

    # Sum 64 numbers each 
    for j in range(int(np.log2(ni))):
        # FHE 연산 39-1, 39-2
        # 회전 후 더하기 
        # HEAAN.Scheme::leftRotateFast(CTXT)
        # HEAAN.Scheme::addAndEqual(CTXT1, CTXT2)
        hec.add(ctxt, 
            hec.lrot(ctxt, 2**j, inplace=False),
                    inplace=True)

    return ctxt