# Import library and split data

In [4]:
import pandas as pd

In [5]:
df = pd.read_csv('../../Data/Blockchain Performance Data/Our/BPD.csv')
df

Unnamed: 0,Send Rates,Block Size,Avg Latency,Throughput
0,10.0,10,0.32,10.0
1,10.0,20,0.82,10.0
2,10.0,30,1.18,9.8
3,10.0,40,1.18,9.8
4,10.0,50,1.18,9.8
...,...,...,...,...
3115,194.1,760,2.57,153.0
3116,200.7,770,2.33,154.8
3117,195.1,780,2.17,158.4
3118,192.4,790,2.26,156.3


In [6]:
xy = df.values
xy

array([[1.000e+01, 1.000e+01, 3.200e-01, 1.000e+01],
       [1.000e+01, 2.000e+01, 8.200e-01, 1.000e+01],
       [1.000e+01, 3.000e+01, 1.180e+00, 9.800e+00],
       ...,
       [1.951e+02, 7.800e+02, 2.170e+00, 1.584e+02],
       [1.924e+02, 7.900e+02, 2.260e+00, 1.563e+02],
       [1.925e+02, 8.000e+02, 2.020e+00, 1.571e+02]])

In [7]:
import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [8]:
# set random seed
seed = 42
torch.manual_seed(seed)

<torch._C.Generator at 0x187fd9b1210>

In [9]:
# Read raw data and divide training set and test set
raw_data = xy
X = raw_data[:, :2]
Y1 = raw_data[:, -2:-1]
Y2 = raw_data[:, -1:]

In [10]:
X

array([[ 10. ,  10. ],
       [ 10. ,  20. ],
       [ 10. ,  30. ],
       ...,
       [195.1, 780. ],
       [192.4, 790. ],
       [192.5, 800. ]])

In [11]:
Y1

array([[0.32],
       [0.82],
       [1.18],
       ...,
       [2.17],
       [2.26],
       [2.02]])

In [12]:
Y2

array([[ 10. ],
       [ 10. ],
       [  9.8],
       ...,
       [158.4],
       [156.3],
       [157.1]])

# MLP1(send rates, block size) = latency

In [13]:
# Split training set and test set
Xtrain1, Xtest1, Ytrain1, Ytest1 = train_test_split(X, Y1, test_size=0.2, random_state=1)

In [14]:
# test set
Xtest1

array([[190.2, 720. ],
       [ 15.1, 310. ],
       [ 75.3,  40. ],
       ...,
       [105.4, 440. ],
       [100.1,  80. ],
       [110.5,  20. ]])

In [15]:
Ytest1

array([[2.99],
       [1.18],
       [0.33],
       [1.16],
       [1.15],
       [1.55],
       [0.9 ],
       [0.81],
       [1.2 ],
       [1.22],
       [1.23],
       [1.28],
       [0.92],
       [1.18],
       [0.91],
       [0.26],
       [1.18],
       [1.26],
       [1.17],
       [0.87],
       [1.18],
       [0.92],
       [0.94],
       [1.24],
       [0.88],
       [1.19],
       [2.32],
       [1.01],
       [1.66],
       [0.76],
       [0.77],
       [1.34],
       [0.89],
       [1.19],
       [0.65],
       [0.83],
       [1.13],
       [2.57],
       [0.92],
       [1.2 ],
       [0.97],
       [0.89],
       [0.88],
       [1.16],
       [2.29],
       [0.47],
       [1.15],
       [0.93],
       [0.85],
       [0.8 ],
       [1.19],
       [2.56],
       [1.18],
       [1.73],
       [1.23],
       [1.45],
       [2.1 ],
       [1.27],
       [1.52],
       [0.85],
       [1.17],
       [0.95],
       [0.86],
       [0.25],
       [0.84],
       [0.9 ],
       [1.

In [16]:
# property scaling
min_max_scaler1 = preprocessing.MinMaxScaler()

In [17]:
# Scaling training set data
Xtrain1_minmax = min_max_scaler1.fit_transform(Xtrain1)
Xtrain1_minmax

array([[0.8427673 , 0.51898734],
       [0.39412998, 0.92405063],
       [0.52568134, 0.46835443],
       ...,
       [0.34224319, 0.70886076],
       [0.05293501, 0.94936709],
       [0.34224319, 0.26582278]])

In [18]:
# Apply the same scaling to the test set data
Xtest1_minmax = min_max_scaler1.transform(Xtest1)
Xtest1_minmax

array([[0.94444444, 0.89873418],
       [0.02672956, 0.37974684],
       [0.34224319, 0.03797468],
       ...,
       [0.5       , 0.5443038 ],
       [0.47222222, 0.08860759],
       [0.52672956, 0.01265823]])

In [19]:
Xtest1_tensor = torch.from_numpy(Xtest1_minmax).type(torch.float32)
Ytest1_tensor = torch.from_numpy(Ytest1).type(torch.float32)

In [20]:
Xtest1_tensor

tensor([[0.9444, 0.8987],
        [0.0267, 0.3797],
        [0.3422, 0.0380],
        ...,
        [0.5000, 0.5443],
        [0.4722, 0.0886],
        [0.5267, 0.0127]])

In [21]:
# batch the training dataset
# prepare dataset
class BlockChainDataset(Dataset):
    def __init__(self, data, label):
        self.len = data.shape[0]
        self.x_data = torch.from_numpy(data).type(torch.float32)
        self.y_data = torch.from_numpy(label).type(torch.float32)
        
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

In [22]:
train_dataset = BlockChainDataset(Xtrain1_minmax, Ytrain1)
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=False)

In [23]:
import torch.nn as nn
import torch.optim as optim

In [24]:
# design model using class
class Lat(torch.nn.Module):
    def __init__(self):
        super(Lat, self).__init__()
        self.fc1 = nn.Linear(2, 64)
        self.fc2 = nn.Linear(64, 8)
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(8, 1)
        self.dropout = nn.Dropout(p=0.5)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.dropout(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.dropout(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out
    
    
model = Lat()

In [25]:
# net = nn.Sequential(
#     nn.Linear(2, 64), nn.BatchNorm1d(64), nn.Dropout(p=0.5), nn.ReLU(),
#     nn.Linear(64, 64), nn.BatchNorm1d(64), nn.Dropout(p=0.5), nn.ReLU(),
#     nn.Linear(64, 1))


# model = net

In [26]:
# construct loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [27]:
# traning cycle forward, backward, update
def train(epoch):
    train_loss = 0.0
    for batch_idx, (inputs, labels) in enumerate(train_loader, 0):
        y_pred = model(inputs)
        loss = criterion(y_pred, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * labels.shape[0]
    if (epoch + 1) % 10 == 0:
        print('epoch:', epoch + 1, 'train_loss:', train_loss / len(Xtrain1))
        

def test():
    with torch.no_grad():
        y_pred = model(Xtest1_tensor)
        loss = criterion(y_pred, Ytest1_tensor)
        print('test_loss:', loss)

In [None]:
if __name__ == '__main__':
    for epoch in range(10000):
        train(epoch)
        if (epoch + 1) % 10 == 0:
            model.eval()
            test()

In [None]:
Xtest1

In [None]:
Xtest1[:, 1].reshape(-1)

In [None]:
with torch.no_grad():
    y_pred = model(Xtest1_tensor)
y = pd.concat([pd.Series(Xtest1[:, 0].reshape(-1), name='send rates'), pd.Series(Xtest1[:, 1].reshape(-1), name='block size'), 
               pd.Series(Ytest1_tensor.numpy().reshape(-1), name='latency_true'), pd.Series(y_pred.numpy().reshape(-1), name='latency_pred')], 
              axis=1)
y.to_csv('../../Data/Result/Our/latency_true_pred.csv')

In [2]:
# Compute test set MAE RMSE MAPE
import pandas as pd

data = pd.read_csv('../../Data/Result/Our/latency_true_pred.csv')
data.drop('Unnamed: 0', axis=1, inplace=True)
data.head()

Unnamed: 0,send rates,block size,latency_true,latency_pred
0,190.2,720.0,2.99,2.32081
1,15.1,310.0,1.18,1.152351
2,75.3,40.0,0.33,0.454525
3,40.2,370.0,1.16,1.189866
4,30.1,210.0,1.15,1.186116


In [3]:
y = data['latency_true'].values
y

array([2.99, 1.18, 0.33, 1.16, 1.15, 1.55, 0.9 , 0.81, 1.2 , 1.22, 1.23,
       1.28, 0.92, 1.18, 0.91, 0.26, 1.18, 1.26, 1.17, 0.87, 1.18, 0.92,
       0.94, 1.24, 0.88, 1.19, 2.32, 1.01, 1.66, 0.76, 0.77, 1.34, 0.89,
       1.19, 0.65, 0.83, 1.13, 2.57, 0.92, 1.2 , 0.97, 0.89, 0.88, 1.16,
       2.29, 0.47, 1.15, 0.93, 0.85, 0.8 , 1.19, 2.56, 1.18, 1.73, 1.23,
       1.45, 2.1 , 1.27, 1.52, 0.85, 1.17, 0.95, 0.86, 0.25, 0.84, 0.9 ,
       1.09, 0.84, 1.24, 0.91, 1.2 , 1.25, 1.58, 2.9 , 1.59, 1.1 , 2.33,
       1.21, 1.25, 0.87, 2.18, 1.25, 0.86, 1.38, 2.14, 1.48, 1.26, 1.18,
       0.93, 1.  , 1.11, 1.21, 1.03, 0.81, 1.17, 1.24, 1.17, 1.24, 0.45,
       1.31, 1.15, 1.3 , 2.17, 0.9 , 1.07, 2.36, 1.17, 1.77, 1.25, 1.68,
       1.89, 0.89, 0.43, 0.92, 0.59, 1.16, 1.6 , 0.67, 0.94, 0.96, 1.04,
       0.95, 0.97, 0.87, 0.88, 1.16, 0.8 , 0.92, 1.17, 0.84, 1.16, 0.86,
       1.66, 0.95, 1.88, 0.87, 1.12, 1.17, 1.18, 1.87, 1.75, 3.07, 0.94,
       0.93, 0.87, 1.27, 0.8 , 1.24, 1.18, 1.02, 0.

In [4]:
y_hat = data['latency_pred'].values
y_hat

array([2.3208103 , 1.152351  , 0.45452467, 1.1898661 , 1.1861162 ,
       1.7515868 , 0.9486697 , 0.934467  , 1.1973859 , 1.1977936 ,
       1.2304422 , 1.2882113 , 0.9354242 , 1.1599122 , 0.91887003,
       0.30472273, 1.1739374 , 1.243377  , 1.1532828 , 0.74696815,
       1.145673  , 0.9025149 , 0.902382  , 1.3724489 , 0.87479097,
       1.6160862 , 2.0766547 , 1.1960893 , 1.9390342 , 0.86204463,
       0.9417342 , 1.4168537 , 0.89399344, 1.390674  , 0.7005626 ,
       0.9351986 , 1.1299369 , 2.2226622 , 0.92101866, 1.1236999 ,
       0.9650107 , 0.9070456 , 0.88209254, 1.1825789 , 2.4912877 ,
       0.5988471 , 1.1808361 , 0.9260364 , 0.9299646 , 0.86709535,
       1.1554462 , 2.5880766 , 1.07282   , 1.9384309 , 1.2471476 ,
       1.4291681 , 2.016405  , 1.2401351 , 1.6276275 , 0.93957084,
       1.2254604 , 0.9154847 , 0.90830106, 0.23859358, 0.8875993 ,
       1.001138  , 1.1144003 , 0.8999664 , 1.2292085 , 0.9316595 ,
       1.2094281 , 1.2047896 , 1.4490453 , 2.8490198 , 1.96965

In [7]:
from sklearn import metrics

ModuleNotFoundError: No module named 'sklearn'

In [6]:
MAE = metrics.mean_absolute_error(y, y_hat)
RMSE = metrics.mean_squared_error(y, y_hat) ** 0.5
MAPE = metrics.mean_absolute_percentage_error(y, y_hat)
print('MAE: ', MAE)
print('RMSE: ', RMSE)
print('MAPE: ', MAPE)

NameError: name 'metrics' is not defined

# MLP2(send rates, block size) = throughput

In [None]:
# Split training set and test set
Xtrain2, Xtest2, Ytrain2, Ytest2 = train_test_split(X, Y2, test_size=0.2, random_state=1)

In [None]:
Xtest2

In [None]:
Ytest2

In [None]:
# property scaling
min_max_scaler2 = preprocessing.MinMaxScaler()

In [None]:
# Scaling training set data
Xtrain2_minmax = min_max_scaler2.fit_transform(Xtrain2)
Xtrain2_minmax

In [None]:
# Apply the same scaling to the test set data
Xtest2_minmax = min_max_scaler2.transform(Xtest2)
Xtest2_minmax

In [None]:
Xtest2_tensor = torch.from_numpy(Xtest2_minmax).type(torch.float32)
Ytest2_tensor = torch.from_numpy(Ytest2).type(torch.float32)

In [None]:
train_dataset = BlockChainDataset(Xtrain2_minmax, Ytrain2)
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=False)

In [None]:
# design model using class
class Thr(torch.nn.Module):
    def __init__(self):
        super(Thr, self).__init__()
        self.fc1 = nn.Linear(2, 64)
        self.fc2 = nn.Linear(64, 8)
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(8, 1)
        self.dropout = nn.Dropout(p=0.5)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.dropout(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.dropout(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out
    
    
model1 = Thr()

In [None]:
# construct loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model1.parameters(), lr=0.001)

In [None]:
# traning cycle forward, backward, update
def train1(epoch):
    train_loss = 0.0
    for batch_idx, (inputs, labels) in enumerate(train_loader, 0):
        y_pred = model1(inputs)
        loss = criterion(y_pred, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * labels.shape[0]
    if (epoch + 1) % 10 == 0:
        print('epoch:', epoch + 1, 'train_loss:', train_loss / len(Xtrain2))
        

def test1():
    with torch.no_grad():
        y_pred = model1(Xtest2_tensor)
        loss = criterion(y_pred, Ytest2_tensor)
        print('test_loss:', loss)

In [None]:
if __name__ == '__main__':
    for epoch in range(10000):
        train1(epoch)
        if (epoch + 1) % 10 == 0:
            model1.eval()
            test1()

In [None]:
with torch.no_grad():
    y_pred = model1(Xtest2_tensor)
y = pd.concat([pd.Series(Xtest2[:, 0].reshape(-1), name='send rates'), pd.Series(Xtest2[:, 1].reshape(-1), name='block size'), 
               pd.Series(Ytest2_tensor.numpy().reshape(-1), name='throughput_true'), 
               pd.Series(y_pred.numpy().reshape(-1), name='throughput_pred')], axis=1)
y.to_csv('../../Data/Result/Our/throughput_true_pred.csv')

In [None]:
# Compute the test set MAE RMSE MAPE
import pandas as pd

data = pd.read_csv('../../Data/Result/Our/throughput_true_pred.csv')
data.drop('Unnamed: 0', axis=1, inplace=True)
data.head()

In [None]:
y = data['throughput_true'].values
y

In [None]:
y_hat = data['throughput_pred'].values
y_hat

In [None]:
from sklearn import metrics

In [None]:
MAE = metrics.mean_absolute_error(y, y_hat)
RMSE = metrics.mean_squared_error(y, y_hat) ** 0.5
MAPE = metrics.mean_absolute_percentage_error(y, y_hat)
print('MAE: ', MAE)
print('RMSE: ', RMSE)
print('MAPE: ', MAPE)

# Choose the optimal block size

## MLP1 to predict latency

In [None]:
TPS = []
for i in range(10,210,10):
    for j in range(20):
        TPS.append(i)
TPS

In [None]:
BS = list(range(10, 210, 10)) * 20
BS

In [None]:
len(BS)

In [None]:
XpBS = {"Xp": TPS, "BS": BS}
XpBS

In [None]:
XpBS = pd.DataFrame(XpBS)
XpBS

In [None]:
XpBS_values = XpBS.values
XpBS_values

In [None]:
# Normalization
XpBS_minmax = min_max_scaler1.transform(XpBS_values)
XpBS_minmax

In [None]:
XpBS_tensor = torch.from_numpy(XpBS_minmax).type(torch.float32)

In [None]:
with torch.no_grad():
    latency_pred = model(XpBS_tensor)
y = pd.concat([pd.Series(XpBS_values[:, 0].reshape(-1), name='send rates'), pd.Series(XpBS_values[:, 1].reshape(-1), name='block size'), 
               pd.Series(latency_pred.numpy().reshape(-1), name='latency_pred')], axis=1)
y.index = XpBS.index
y.to_csv('../../Data/Result/Our/latency_pred.csv')

In [None]:
y

## MLP2 to predict throughput

In [None]:
# Normalization
XpBS_minmax2 = min_max_scaler2.transform(XpBS_values)
XpBS_minmax2

In [None]:
XpBS_tensor2 = torch.from_numpy(XpBS_minmax2).type(torch.float32)

In [None]:
with torch.no_grad():
    throughput_pred = model1(XpBS_tensor2)
y = pd.concat([pd.Series(XpBS_values[:, 0].reshape(-1), name='send rates'), pd.Series(XpBS_values[:, 1].reshape(-1), name='block size'), 
               pd.Series(throughput_pred.numpy().reshape(-1), name='throughput_pred')], axis=1)
y.index = XpBS.index
y.to_csv('../../Data/Result/Our/throughput_pred.csv')

In [None]:
y

In [None]:
with torch.no_grad():
    latency_pred = model(XpBS_tensor)
    throughput_pred = model1(XpBS_tensor2)
y = pd.concat([pd.Series(XpBS_values[:, 0].reshape(-1), name='send rates'), pd.Series(XpBS_values[:, 1].reshape(-1), name='block size'), 
               pd.Series(latency_pred.numpy().reshape(-1), name='latency_pred'),
               pd.Series(throughput_pred.numpy().reshape(-1), name='throughput_pred')], axis=1)
y.index = XpBS.index
y.to_csv('../../Data/Result/Our/latency_throughput_pred.csv')

## Set the score function to chose the best block

In [None]:
y

In [None]:
for name, group in y.groupby('send rates', sort=False):
    print(name)
    print(group)

In [None]:
la_max_min = y.groupby('send rates', sort=False)['latency_pred'].agg(['max', 'min'])
la_max_min

In [None]:
la_col=['latency_max', 'latency_min']
la_max_min.columns = la_col
la_max_min

In [None]:
th_max_min = y.groupby('send rates', sort=False)['throughput_pred'].agg(['max', 'min'])
th_max_min

In [None]:
th_col=['throughput_max', 'throughput_min']
th_max_min.columns = th_col
th_max_min

In [None]:
# Restore the grouping index to the column
la_max_min.reset_index(inplace=True)
la_max_min

In [None]:
th_max_min.reset_index(inplace=True)
th_max_min

In [None]:
la_max = la_max_min['latency_max'].values
la_max = list(la_max)
la_max
len(la_max)

In [None]:
la_max_all = []
for i in range(len(la_max)):
    la_max_all.extend([la_max[i]]*20)
    
la_max_all

In [None]:
la_min = la_max_min['latency_min'].values
la_min = list(la_min)
la_min

In [None]:
la_min_all = []
for i in range(len(la_min)):
    la_min_all.extend([la_min[i]]*20)
    
la_min_all

In [None]:
y['latency_min'] = la_min_all
y

In [None]:
y['latency_max'] = la_max_all
y

In [None]:
th_max = th_max_min['throughput_max'].values
th_max = list(th_max)
th_max

In [None]:
th_max_all = []
for i in range(len(th_max)):
    th_max_all.extend([th_max[i]] * 20)
    
th_max_all

In [None]:
th_min = th_max_min['throughput_min'].values
th_min = list(th_min)
th_min

In [None]:
th_min_all = []
for i in range(len(th_min)):
    th_min_all.extend([th_min[i]] * 20)
    
th_min_all

In [None]:
y['throughput_min'] = th_min_all
y

In [None]:
y['throughput_max'] = th_max_all
y

In [None]:
y.to_csv('../../Data/Result/Our/latency_throughput_pred_max_min.csv')

### choose different weights

In [None]:
# Read the processed data directly
y = pd.read_csv('../../Data/Result/Our/latency_throughput_pred_max_min.csv')
y

In [None]:
alpha = 0.1
beta = 0.9

In [None]:
latency_score = alpha * ((y['latency_max'] - y['latency_pred']) / (y['latency_max'] - y['latency_min']))

In [None]:
throughput_score = beta * ((y['throughput_pred'] - y['throughput_min']) / (y['throughput_max'] - y['throughput_min']))

In [None]:
y_score = latency_score.values + throughput_score.values
y_score

In [None]:
y['score'] = y_score

In [None]:
y.to_csv('../../Data/Result/Our/latency_throughput_score_alpha' + str(alpha) + '_beta' + str(beta) + '.csv')

In [None]:
y