## Comparing NN to FL

In [1]:
CHECKPOINT_VER = 3
NUM_TEST_QUERIES = 30

In [2]:
from nn_sampler import NNSampler
from lazy_greedy_sampler import CHECKPOINT_NAME
from checkpoint_manager_v2 import CheckpointManager

nn_sample = CheckpointManager.load(NNSampler.CHECKPOINT_NAME, CHECKPOINT_VER)
lg_sample = CheckpointManager.load(f'{CHECKPOINT_NAME}_sample', CHECKPOINT_VER)

In [3]:
from sample_loss_calculator import SampleLossCalculator

sample_loss_calculator = SampleLossCalculator(
    f'./{CheckpointManager.basePath}/{CHECKPOINT_VER}/triu_distance_matrix.npz')

In [4]:
from config_manager_v2 import ConfigManager
from query_generator_v2 import QueryGenerator
from random import randint

table = ConfigManager.get_config('queriesConfig.table')
schema = ConfigManager.get_config('queriesConfig.schema')
pivot = ConfigManager.get_config('queriesConfig.pivot')
query_generator = QueryGenerator(schema, table, pivot)

queries = []
for i in range(NUM_TEST_QUERIES):
    queries.append(query_generator.get_query(randint(1, 3)))

Connecting to the mysql database...
Connected to localhost:datasets.


In [5]:
from tqdm import tqdm

nn_losses = []
lg_losses = []
for sql in tqdm(queries):
    nn_losses.append(sample_loss_calculator.test_query(sql, nn_sample))
    lg_losses.append(sample_loss_calculator.test_query(sql, lg_sample))

100%|██████████| 30/30 [00:00<00:00, 124.71it/s]


In [6]:
lg_losses

[array([0.01282, 0.01274, 0.00964, 0.01022, 0.     , 0.0094 , 0.     ,
        0.01   , 0.01025, 0.01289, 0.01264, 0.00649, 0.01006, 0.00928,
        0.0091 , 0.00984, 0.01335, 0.01025, 0.00592, 0.0127 , 0.01261,
        0.01322, 0.01399, 0.00911, 0.01056, 0.01026, 0.01063, 0.01294,
        0.01023, 0.01295, 0.01015, 0.00925, 0.     , 0.01296]),
 array([0.03013, 0.     , 0.03456, 0.03092, 0.0284 , 0.03151, 0.0308 ,
        0.03056, 0.02835, 0.03163, 0.03089]),
 array([0.00034, 0.00034, 0.00022, 0.00022, 0.00034, 0.00034, 0.     ,
        0.00036, 0.00022, 0.00034, 0.00034, 0.00036, 0.00035, 0.00037,
        0.00023, 0.00037, 0.00033, 0.00035, 0.00036, 0.00022, 0.00022,
        0.00035, 0.     , 0.00022, 0.00034, 0.00036, 0.00035, 0.00035,
        0.00022, 0.00035, 0.00036, 0.00035, 0.00034, 0.00036, 0.00034,
        0.00022, 0.00034, 0.00037, 0.00022, 0.00036, 0.00036, 0.00022,
        0.00036, 0.00034, 0.     , 0.00034, 0.00036, 0.     , 0.00034,
        0.00034, 0.00033, 0.00034, 0. 

In [None]:
import matplotlib.pyplot as plt
import numpy as np

nn_sum_losses = [np.sum(l) for l in nn_losses]
lg_sum_losses = [np.sum(l) for l in lg_losses]

plt.bar(np.arange(NUM_TEST_QUERIES), nn_sum_losses, label='nn')
plt.bar(np.arange(NUM_TEST_QUERIES), lg_sum_losses, lable='lazy')
plt.xlabel("Query num")
plt.ylabel("loss")
plt.title("Sum error - Neural Network vs Lazy Greedy")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

nn_avg_losses = [np.average(l) for l in nn_losses]
lg_avg_losses = [np.average(l) for l in lg_losses]

plt.bar(np.arange(NUM_TEST_QUERIES), nn_avg_losses, label='nn')
plt.bar(np.arange(NUM_TEST_QUERIES), lg_avg_losses, label='lazy')
plt.xlabel("Query num")
plt.ylabel("loss")
plt.title("Average error - Neural Network vs Lazy Greedy")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

nn_max_losses = [np.average(l) for l in nn_losses]
lg_max_losses = [np.average(l) for l in lg_losses]

plt.bar(np.arange(NUM_TEST_QUERIES), nn_max_losses, label='nn')
plt.bar(np.arange(NUM_TEST_QUERIES), lg_max_losses, label='lazy')
plt.xlabel("Query num")
plt.ylabel("loss")
plt.title("Max error - Neural Network vs Lazy Greedy")
plt.show()