In [1]:
import pandas as pd
import numpy as np

from memory_profiler import profile

from Pyfhel import Pyfhel, PyPtxt, PyCtxt

import torch
import torch.nn as nn

import time
import os
import sys

working_directory = "/home/falcetta/PINPOINT_Secret"

device = "cpu"
module_path = os.path.abspath(working_directory)
sys.path.append(module_path) 

from pycrcnn.net_builder.encoded_net_builder_ts import build_from_pytorch
from pycrcnn.crypto.crypto import encrypt_matrix, decrypt_matrix
from train_utils import *

from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import mean_squared_error, mean_absolute_error

# Models

In [2]:
class Square(torch.nn.Module):
    def __init__(self):
        super().__init__()
 
    def forward(self, t):
        return torch.pow(t, 2)

class Cube(torch.nn.Module):
    def __init__(self):
        super().__init__()
 
    def forward(self, t):
        return torch.pow(t, 3)
    
class Printer(torch.nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, t):
        # print(t)
        print(t.shape)
        return t


class PINPOINT_1CONV(nn.Module):
    def __init__(self, input_size, output_horizon):
        super(PINPOINT_1CONV, self).__init__()

        n_kernels_1 = 32
        kernel_size_1 = 3
        out_conv_1 = n_kernels_1 * (input_size - kernel_size_1 + 1)

        self.main = nn.Sequential(           
            nn.Conv1d(in_channels=1, out_channels=n_kernels_1, kernel_size=kernel_size_1),
            Square(),
            nn.Flatten(),      
            
            nn.Linear(out_conv_1, int(out_conv_1/2)), #use without avgpool
            # nn.Linear(int(out_conv_1/2), output_horizon)   
            nn.Linear(int(out_conv_1/2), int(out_conv_1/4)),
            nn.Linear(int(out_conv_1/4), output_horizon)   
        )

    def forward(self, x):
        out = self.main(x)
        return out
    
    def __str__(self):
        return "PINPOINT_1CONV"

    
class PINPOINT_2CONV(nn.Module):
    def __init__(self, input_size, output_horizon):
        super(PINPOINT_2CONV, self).__init__()
        
        n_kernels_1 = 16
        n_kernels_2 = 32
        kernel_size_1 = 5
        kernel_size_2 = 3
        
        out_conv_1 = input_size - kernel_size_1 + 1
        out_conv_2 = n_kernels_2 * (out_conv_1 - kernel_size_2 + 1)

        self.main = nn.Sequential(           
            nn.Conv1d(in_channels=1, out_channels=n_kernels_1, kernel_size=kernel_size_1),
            Square(),
            nn.Conv1d(in_channels=n_kernels_1, out_channels=n_kernels_2, kernel_size=kernel_size_2),
            Square(),
            nn.Flatten(),      
            
            nn.Linear(out_conv_2, int(out_conv_2/2)), #use without avgpool
            # nn.Linear(int(out_conv_2/4), output_horizon)   
            nn.Linear(int(out_conv_2/2), int(out_conv_2/4)),
            nn.Linear(int(out_conv_2/4), output_horizon)   
        )

    def forward(self, x):
        out = self.main(x)
        return out
    
    def __str__(self):
        return "PINPOINT_2CONV"

In [3]:
experiment_name = "AirlinePassengers"
seq_length = 12
forecast_horizon = 6
model_class = "PINPOINT_2CONV"

In [4]:
model = torch.load(f"{working_directory}/Experiments/models/{experiment_name}_{forecast_horizon}_{model_class}.pt")

In [5]:
model

PINPOINT_2CONV(
  (main): Sequential(
    (0): Conv1d(1, 16, kernel_size=(5,), stride=(1,))
    (1): Square()
    (2): Conv1d(16, 32, kernel_size=(3,), stride=(1,))
    (3): Square()
    (4): Flatten(start_dim=1, end_dim=-1)
    (5): Linear(in_features=192, out_features=96, bias=True)
    (6): Linear(in_features=96, out_features=48, bias=True)
    (7): Linear(in_features=48, out_features=6, bias=True)
  )
)

# Dataset

In [6]:
monthly_passengers = pd.read_csv(f"{working_directory}/data/airline-passengers.csv", parse_dates=['Month'], index_col='Month')
monthly_passengers = monthly_passengers.loc[:, 'Passengers']
monthly_passengers.index.freq = 'MS'
entire_ts = monthly_passengers
train = entire_ts.loc[:pd.Timestamp("1958-01-01")]

validation_length = int(0.05 * len(train))
validation = entire_ts.loc[train.index[-1] + entire_ts.index.freq:train.index[-1] + validation_length * entire_ts.index.freq]
test = entire_ts.loc[validation.index[-1] + entire_ts.index.freq:]

plot_name = "Monthly passengers airline"
yaxis_name = "Passengers"

train = train.append(validation)

print(train)
print(test)

Month
1949-01-01    112
1949-02-01    118
1949-03-01    132
1949-04-01    129
1949-05-01    121
             ... 
1958-02-01    318
1958-03-01    362
1958-04-01    348
1958-05-01    363
1958-06-01    435
Freq: MS, Name: Passengers, Length: 114, dtype: int64
Month
1958-07-01    491
1958-08-01    505
1958-09-01    404
1958-10-01    359
1958-11-01    310
1958-12-01    337
1959-01-01    360
1959-02-01    342
1959-03-01    406
1959-04-01    396
1959-05-01    420
1959-06-01    472
1959-07-01    548
1959-08-01    559
1959-09-01    463
1959-10-01    407
1959-11-01    362
1959-12-01    405
1960-01-01    417
1960-02-01    391
1960-03-01    419
1960-04-01    461
1960-05-01    472
1960-06-01    535
1960-07-01    622
1960-08-01    606
1960-09-01    508
1960-10-01    461
1960-11-01    390
1960-12-01    432
Freq: MS, Name: Passengers, dtype: int64


# Expected outputs

In [7]:
expected_output = []

scaler = MinMaxScaler(feature_range=(-1, 1))
_ = scaler.fit_transform(train.values.reshape(-1, 1))

_train = train.copy()
_test = test.copy()

forecast = np.array([])

for i in range(0, int(len(_test) / forecast_horizon) + 1):
    model.eval()

    inputs = _train.values.reshape(len(_train), 1)

    inputs_normalized = scaler.transform(inputs)
    inputs_normalized = torch.FloatTensor(inputs_normalized[-seq_length:]).to(device)

    predict = model(inputs_normalized.reshape(1, 1, seq_length))
    predict = scaler.inverse_transform(predict.cpu().detach().numpy())
    forecast = np.append(forecast, predict)

    for j in range(0, forecast_horizon):
        if len(_test) > 0:
            _train[_train.index[-1] + train.index.freq] = _test.iloc[0]
            _test = _test.iloc[1:]

expected_output = pd.Series(data=forecast[:len(test)], index=test.index)

In [8]:
expected_output

Month
1958-07-01    520.900879
1958-08-01    527.536743
1958-09-01    454.099304
1958-10-01    375.765045
1958-11-01    325.655273
1958-12-01    362.828339
1959-01-01    244.982925
1959-02-01    316.782227
1959-03-01    354.687958
1959-04-01    313.899536
1959-05-01    280.673553
1959-06-01    373.962891
1959-07-01    562.605774
1959-08-01    556.341431
1959-09-01    449.869019
1959-10-01    368.655334
1959-11-01    308.667419
1959-12-01    356.108887
1960-01-01     55.480961
1960-02-01    200.349808
1960-03-01    259.226257
1960-04-01    169.851547
1960-05-01     70.018166
1960-06-01    179.222717
1960-07-01    476.959808
1960-08-01    497.164520
1960-09-01    418.113251
1960-10-01    345.942352
1960-11-01    265.923157
1960-12-01    358.050293
Freq: MS, dtype: float64

In [9]:
print(experiment_name)
print(f"MAE of model {model}, forecast horizon: {forecast_horizon}: {round(mean_absolute_error(test, expected_output), 2)}")

AirlinePassengers
MAE of model PINPOINT_2CONV, forecast horizon: 6: 108.68


## Encode the models

In [10]:
HE = Pyfhel()    
HE.contextGen(p=96155351715128, m=8192, intDigits=16, fracDigits=64) 
HE.keyGen()
HE.relinKeyGen(30, 3)

encoded_model = build_from_pytorch(HE, model.cpu().main)

# Encrypted processing

In [11]:
decrypted_output = None

scaler = MinMaxScaler(feature_range=(-1, 1))
_ = scaler.fit_transform(train.values.reshape(-1, 1))

_train = train.copy()
_test = test.copy()

forecast = np.array([])

for i in range(0, int(len(_test) / forecast_horizon) + 1):
    inputs = _train.values.reshape(len(_train), 1)

    inputs_normalized = scaler.transform(inputs)
    inputs_normalized = inputs_normalized[-seq_length:].reshape(1, 1, seq_length)

    encrypted_input = encrypt_matrix(HE, inputs_normalized)

    for layer in encoded_model:
        encrypted_input = layer(encrypted_input)
    
    _min = HE.encodeFrac(scaler.min_[0])
    _scale = HE.encodeFrac(1.0 / scaler.scale_[0])
    
    encrypted_input = [list(map(lambda x: (x - _min)*_scale, encrypted_input[0]))]
    predict = decrypt_matrix(HE, encrypted_input)

    forecast = np.append(forecast, predict)

    for j in range(0, forecast_horizon):
        if len(_test) > 0:
            _train[_train.index[-1] + train.index.freq] = _test.iloc[0]
            _test = _test.iloc[1:]

decrypted_output = pd.Series(data=forecast[:len(test)], index=test.index)

In [12]:
expected_output

Month
1958-07-01    520.900879
1958-08-01    527.536743
1958-09-01    454.099304
1958-10-01    375.765045
1958-11-01    325.655273
1958-12-01    362.828339
1959-01-01    244.982925
1959-02-01    316.782227
1959-03-01    354.687958
1959-04-01    313.899536
1959-05-01    280.673553
1959-06-01    373.962891
1959-07-01    562.605774
1959-08-01    556.341431
1959-09-01    449.869019
1959-10-01    368.655334
1959-11-01    308.667419
1959-12-01    356.108887
1960-01-01     55.480961
1960-02-01    200.349808
1960-03-01    259.226257
1960-04-01    169.851547
1960-05-01     70.018166
1960-06-01    179.222717
1960-07-01    476.959808
1960-08-01    497.164520
1960-09-01    418.113251
1960-10-01    345.942352
1960-11-01    265.923157
1960-12-01    358.050293
Freq: MS, dtype: float64

In [13]:
decrypted_output

Month
1958-07-01    521.027980
1958-08-01    527.565404
1958-09-01    454.053700
1958-10-01    375.776905
1958-11-01    325.713254
1958-12-01    362.837078
1959-01-01    245.168526
1959-02-01    316.849046
1959-03-01    354.743142
1959-04-01    314.094637
1959-05-01    281.088428
1959-06-01    374.355345
1959-07-01    562.802133
1959-08-01    556.443424
1959-09-01    449.866680
1959-10-01    368.710134
1959-11-01    308.754827
1959-12-01    356.150144
1960-01-01     55.902891
1960-02-01    200.731491
1960-03-01    259.617279
1960-04-01    170.707728
1960-05-01     71.623759
1960-06-01    180.796982
1960-07-01    477.936130
1960-08-01    498.060579
1960-09-01    418.491198
1960-10-01    346.357609
1960-11-01    266.377915
1960-12-01    358.474546
Freq: MS, dtype: float64

In [14]:
print(f"MAE of model {model}, forecast horizon: {forecast_horizon}: {round(mean_absolute_error(test, expected_output), 2)}")
print(f"MAE of model {model} used on encrypted inputs, forecast horizon: {forecast_horizon}: {round(mean_absolute_error(test, decrypted_output), 2)}")

MAE of model PINPOINT_2CONV, forecast horizon: 6: 108.68
MAE of model PINPOINT_2CONV used on encrypted inputs, forecast horizon: 6: 108.35


Difference between expected and obtained on encrypted data:

In [15]:
print(expected_output - decrypted_output)

Month
1958-07-01   -0.127101
1958-08-01   -0.028661
1958-09-01    0.045604
1958-10-01   -0.011860
1958-11-01   -0.057981
1958-12-01   -0.008740
1959-01-01   -0.185600
1959-02-01   -0.066819
1959-03-01   -0.055184
1959-04-01   -0.195101
1959-05-01   -0.414875
1959-06-01   -0.392455
1959-07-01   -0.196359
1959-08-01   -0.101994
1959-09-01    0.002339
1959-10-01   -0.054799
1959-11-01   -0.087408
1959-12-01   -0.041257
1960-01-01   -0.421930
1960-02-01   -0.381683
1960-03-01   -0.391022
1960-04-01   -0.856181
1960-05-01   -1.605593
1960-06-01   -1.574264
1960-07-01   -0.976322
1960-08-01   -0.896058
1960-09-01   -0.377947
1960-10-01   -0.415257
1960-11-01   -0.454758
1960-12-01   -0.424253
Freq: MS, dtype: float64
