# 분산 계산 예제 

In [1]:
import ast, inspect 
import numpy as np
import pype
from hemul.ciphertext import Ciphertext
from modifier import NumpyReplacer, RescaleAdder, BinOpReplacer, ModMatcher

binding to HEAAN


## 분산 계산

Pandas나 numpy에서 분산 계산은 간단하다.

```python
result = df.var()
# 혹은
result = np.var()
```



분산(variance)를 계산하는 수식도 비교적 단순하다. 

$$ S^2 = \frac{\Sigma(x_i - \bar{x})^2}{n} $$

numpy를 사용하여 vectorized 알고리즘을 작성하면 아래와 같을 것이다.

In [2]:
def var(data:Ciphertext): # Type hint 필수 
    """Calculate variance
    """
    m = np.mean(data)
    diff = (data - m)
    result = np.sum(diff*diff)/len(data)
    return result

In [3]:
arr = np.random.random(2**12)
ans = np.var(arr)
print("분산", ans)

분산 0.08386572571080012


#### 그러나 입력 데이터를 동형암호로 암호화하면 간단한 알고리즘도 쉽게 작동하지 않는다. 

In [4]:
# 동형암호 개발 준비 
(ev, algo, encoder, encryptor, decryptor) = pype.set_all(30, 600, 12)

FHE context is set


In [5]:
### FHE
ctxt = encryptor.encrypt(arr)
print(ctxt)

You can't read the content


In [6]:
# 기존 코드에 동형암호문 적용 
res = var(ctxt)

TypeError: unsupported operand type(s) for /: 'CiphertextStat' and 'int'

#### 본 컴파일러를 사용하면 기존 코드에 단 한 줄의 decorator를 추가함으로써 동형암호 연산을 적용할 수 있다.

In [7]:
@pype.jit  # <------ envoke JIT compiler 
def var(data:Ciphertext):
    """Calculate variance
    """
    m = np.mean(data)
    diff = (data - m)
    result = np.sum(diff*diff)/len(data)
    return result

[static_analyzer] Inferencing data types... 


In [8]:
res = var(ctxt)

In [9]:
print(decryptor.decrypt(res)[0])
print(ans)

0.08386579750546581
0.08386572571080012


FHE 연산 결과와 numpy의 결과와 일치

## F-E 작동 과정 

In [10]:
import ast, inspect 
import numpy as np
import pype
from hemul.ciphertext import Ciphertext
from modifier import NumpyReplacer, RescaleAdder, BinOpReplacer, ModMatcher

#### 0. type hint로부터 Ciphertext 추적
```python
def var(data:Ciphertext):
```


#### 1. 기존 코드에서 +, -, *, /을 대체
-> FHE library binding

In [11]:
def var(data:Ciphertext):
    """Calculate variance
    """
    m = np.mean(data)
    diff = (data - m)
    result = np.sum(diff*diff)/len(data)
    return result


tree = ast.parse(inspect.getsource(var))

visitor = BinOpReplacer()
visitor.visit(tree)

tree = ast.fix_missing_locations(tree)
print("------")
print(ast.unparse(tree))

------
def var(data: Ciphertext):
    """Calculate variance
    """
    m = np.mean(data)
    diff = ev.sub(data, m)
    result = ev.div_by_plain(np.sum(ev.mult(diff, diff)), len(data))
    return result


#### 2. 기존 코드에서 numpy 함수를 대체
-> FHE library binding

In [12]:
NumpyReplacer().visit(tree)
tree = ast.fix_missing_locations(tree)
print(ast.unparse(tree))

def var(data: Ciphertext):
    """Calculate variance
    """
    m = algo.mean(data)
    diff = ev.sub(data, m)
    result = ev.div_by_plain(algo.sum_reduce(ev.mult(diff, diff)), len(data))
    return result


#### 3. 동형암호 연산 뒤에 rescaling 자동 추가

In [13]:
# Rescale
RescaleAdder().visit(tree)
tree = ast.fix_missing_locations(tree)

print(ast.unparse(tree))

def var(data: Ciphertext):
    """Calculate variance
    """
    m = ev.rescale_next(algo.mean(data), inplace=False)
    diff = ev.sub(data, m)
    result = ev.div_by_plain(algo.sum_reduce(ev.rescale_next(ev.mult(diff, diff), inplace=False)), len(data))
    return result


#### 4. Mod switching 자동 추가 

In [14]:
ModMatcher().visit(tree)
tree = ast.fix_missing_locations(tree)

print(ast.unparse(tree))

def var(data: Ciphertext):
    """Calculate variance
    """
    m = ev.rescale_next(algo.mean(data), inplace=False)
    diff = ev.sub(ev.match_mod(data, m, inplace=False), m, inplace=False)
    result = ev.div_by_plain(algo.sum_reduce(ev.rescale_next(ev.mult(ev.match_mod(diff, diff, inplace=False), diff, inplace=False), inplace=False)), len(data))
    return result


#### AST 비교

In [15]:
def var(data:Ciphertext):
    """Calculate variance
    """
    m = np.mean(data)
    diff = (data - m)
    result = np.sum(diff*diff)/len(data)
    return result

tree = ast.parse(inspect.getsource(var))
print(ast.dump(tree, indent=2))

Module(
  body=[
    FunctionDef(
      name='var',
      args=arguments(
        posonlyargs=[],
        args=[
          arg(
            arg='data',
            annotation=Name(id='Ciphertext', ctx=Load()))],
        kwonlyargs=[],
        kw_defaults=[],
        defaults=[]),
      body=[
        Expr(
          value=Constant(value='Calculate variance\n    ')),
        Assign(
          targets=[
            Name(id='m', ctx=Store())],
          value=Call(
            func=Attribute(
              value=Name(id='np', ctx=Load()),
              attr='mean',
              ctx=Load()),
            args=[
              Name(id='data', ctx=Load())],
            keywords=[])),
        Assign(
          targets=[
            Name(id='diff', ctx=Store())],
          value=BinOp(
            left=Name(id='data', ctx=Load()),
            op=Sub(),
            right=Name(id='m', ctx=Load()))),
        Assign(
          targets=[
            Name(id='result', ctx=Store())],
          value=B

In [16]:
@pype.jit_verbose  # <------ envoke JIT compiler 
def var(data:Ciphertext):
    """Calculate variance
    """
    m = np.mean(data)
    diff = (data - m)
    result = np.sum(diff*diff)/len(data)
    return result


[static_analyzer] Inferencing data types... 
Module(
  body=[
    FunctionDef(
      name='var',
      args=arguments(
        posonlyargs=[],
        args=[
          arg(
            arg='data',
            annotation=Name(id='Ciphertext', ctx=Load()))],
        kwonlyargs=[],
        kw_defaults=[],
        defaults=[]),
      body=[
        Expr(
          value=Constant(value='Calculate variance\n    ')),
        Assign(
          targets=[
            Name(id='m', ctx=Store())],
          value=Call(
            func=Attribute(
              value=Name(id='ev', ctx=Load()),
              attr='rescale_next',
              ctx=Load()),
            args=[
              Call(
                func=Attribute(
                  value=Name(id='algo', ctx=Load()),
                  attr='mean',
                  ctx=Load()),
                args=[
                  Name(id='data', ctx=Load())],
                keywords=[])],
            keywords=[
              keyword(
                a