In [20]:
import numpy as np
import time

### Load Profiled Data

In [21]:
entropies = [0.0, 0.001, 0.005, 0.01, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]

data_pl = {} # e -> [exit_layer_count, eval_time, actual_cost/full_cost, accuracy]
for e in entropies:
    data_pl[e] = np.load('./../plotting2/saved_models/bert_base-SST-2-two_stage/entropy_{}.npy'.format(e), allow_pickle=True)

Bucket Accuracy

In [22]:
acc_buckets = {} # for acc in range(60, 100, 5):
min_exit_samples = 0 # N/k

for e in entropies:
    for exit_layer in data_pl[e][3]:
        acc = int(data_pl[e][3][exit_layer] * 20) * 5
        if acc >= 60 and data_pl[e][0][exit_layer] >= min_exit_samples:
            if acc not in acc_buckets:
                acc_buckets[acc] = []
            latency = int(data_pl[e][1][exit_layer] * 1000)
            exit_layer_count = data_pl[e][0][exit_layer]
            acc_buckets[int(acc/5)*5].append((latency, exit_layer, e, exit_layer_count))

print("TUPLE - (Latency, Exit Layer Index, Entropy, Number of samples at that exit layer)")
for acc in acc_buckets:
    print(f"accuracy - {acc}", f"Bucket - {sorted(acc_buckets[acc])}")

TUPLE - (Latency, Exit Layer Index, Entropy, Number of samples at that exit layer)
accuracy - 90 Bucket - [(21, 1, 0.5, 107), (32, 2, 0.3, 299), (35, 2, 0.2, 200), (48, 3, 0.05, 54), (92, 6, 0.15, 102), (96, 6, 0.2, 96), (99, 6, 0.05, 89), (102, 7, 0.001, 13), (111, 7, 0.15, 114), (111, 7, 0.3, 89), (120, 8, 0.15, 104), (123, 8, 0.1, 138), (128, 8, 0.2, 82), (177, 12, 0.0, 872)]
accuracy - 100 Bucket - [(30, 2, 0.01, 8), (32, 2, 0.001, 2), (35, 2, 0.005, 2), (37, 2, 0.05, 54), (47, 3, 0.005, 9), (47, 3, 0.01, 15), (62, 4, 0.005, 23), (65, 4, 0.05, 60), (72, 4, 0.001, 3), (74, 5, 0.005, 9), (78, 5, 0.1, 33), (79, 5, 0.01, 12), (80, 5, 0.001, 7), (80, 5, 0.05, 23), (99, 6, 0.001, 32), (105, 7, 0.005, 46), (119, 8, 0.6, 5), (128, 9, 0.6, 2), (154, 11, 0.15, 2), (156, 11, 0.3, 2), (158, 11, 0.4, 1), (159, 11, 0.001, 15), (175, 11, 0.1, 8), (219, 12, 0.5, 1)]
accuracy - 85 Bucket - [(16, 1, 0.6, 271), (32, 2, 0.4, 385), (34, 2, 0.5, 421), (47, 3, 0.3, 111), (50, 3, 0.4, 98), (61, 4, 0.15, 9

A greedy heuristic to get the entropy vector given accuracy and latency constraints

In [23]:
# Accuracy is prioritized. We acheive the highest accuracy possible within the latency budget.
def get_entropies(targt_accuracy, target_latency):
    entropies = [(0, 0, 10)] * 12 # e, acc, latency
    for acc in acc_buckets:
        if acc < targt_accuracy:
            continue
        for latency, exit_layer, e, _ in acc_buckets[acc]:
            if latency > target_latency:
                continue
            if entropies[exit_layer - 1][1] < acc or (entropies[exit_layer - 1][1] == acc and entropies[exit_layer - 1][2] > latency):
                entropies[exit_layer - 1] = (e, acc, latency)
   
    res = []
    for e, _, _ in entropies:
        res.append(e)
    
    for i in range(len(res) - 1, 0, -1):
        if res[i] != 0:
            res[i] = 1
            break
    return tuple(res)

### Simulation

In [25]:
target_accuracies = [90, 90, 90, 80, 80, 80, 70, 70, 70, 70]
target_latencies = [150, 100, 80, 150, 120, 100, 120, 100, 80, 50]
sleep_times = [1, 15, 5, 1, 1, 1, 1, 1, 1, 1]

for i in range(len(target_accuracies)):
    time.sleep(sleep_times[i])
    print(f'Entropy vector for {target_accuracies[i]}% acc, {target_latencies[i]}ms latency:', 'Corresponding S vector-',get_entropies(target_accuracies[i], target_latencies[i]))

Entropy vector for 90% acc, 150ms latency: Corresponding S vector- (0.4, 0.01, 0.005, 0.005, 0.005, 0.001, 0.005, 0.6, 0.6, 1, 0, 0)
Entropy vector for 90% acc, 100ms latency: Corresponding S vector- (0.4, 0.01, 0.005, 0.005, 0.005, 1, 0, 0, 0, 0, 0, 0)
Entropy vector for 90% acc, 80ms latency: Corresponding S vector- (0.4, 0.01, 0.005, 0.005, 1, 0, 0, 0, 0, 0, 0, 0)
Entropy vector for 80% acc, 150ms latency: Corresponding S vector- (0.4, 0.01, 0.005, 0.005, 0.005, 0.001, 0.005, 0.6, 0.6, 1, 0, 0)
Entropy vector for 80% acc, 120ms latency: Corresponding S vector- (0.4, 0.01, 0.005, 0.005, 0.005, 0.001, 0.005, 1, 0, 0, 0, 0)
Entropy vector for 80% acc, 100ms latency: Corresponding S vector- (0.4, 0.01, 0.005, 0.005, 0.005, 1, 0, 0, 0, 0, 0, 0)
Entropy vector for 70% acc, 120ms latency: Corresponding S vector- (0.4, 0.01, 0.005, 0.005, 0.005, 0.001, 0.005, 1, 0, 0, 0, 0)
Entropy vector for 70% acc, 100ms latency: Corresponding S vector- (0.4, 0.01, 0.005, 0.005, 0.005, 1, 0, 0, 0, 0, 0, 

### Evaluation

In [12]:
def compute_overall_accuracy_latency(data_accLat):
    total = 0
    acc = 0
    latency = 0
    for exit_layer in data_accLat[0]:
        if exit_layer in data_accLat[3]:
            total += data_accLat[0][exit_layer]
            acc += data_accLat[0][exit_layer] * data_accLat[3][exit_layer]
            latency += data_accLat[0][exit_layer] * data_accLat[1][exit_layer]
    return acc/total, latency/total

In [13]:
data_accLat_90_150 = np.load('./../plotting2/saved_models/bert_base-SST-2-two_stage/accLat_{}_{}.npy'.format(90, 150), allow_pickle=True)

acc, latency = compute_overall_accuracy_latency(data_accLat_90_150)
print("Target Accuracy: 90%, Target Latency: 150ms ")
print(f"Achieved Accuracy: {round(acc*100)}%, Average Latency: {round(latency * 1000)}ms")

Target Accuracy: 90%, Target Latency: 150ms 
Achieved Accuracy: 89%, Average Latency: 78ms


In [14]:
data_accLat_80_120 = np.load('./../plotting2/saved_models/bert_base-SST-2-two_stage/accLat_{}_{}.npy'.format(80, 120), allow_pickle=True)

acc, latency = compute_overall_accuracy_latency(data_accLat_80_120)
print("Target Accuracy: 80%, Target Latency: 120ms ")
print(f"Achieved Accuracy: {round(acc*100)}%, Average Latency: {round(latency * 1000)}ms")

Target Accuracy: 80%, Target Latency: 120ms 
Achieved Accuracy: 88%, Average Latency: 90ms


In [15]:
data_accLat_75_100 = np.load('./../plotting2/saved_models/bert_base-SST-2-two_stage/accLat_{}_{}.npy'.format(75, 100), allow_pickle=True)
acc, latency = compute_overall_accuracy_latency(data_accLat_75_100)
print("Target Accuracy: 75%, Target Latency: 100ms ")
print(f"Achieved Accuracy: {round(acc*100)}%, Average Latency: {round(latency * 1000)}ms")

Target Accuracy: 75%, Target Latency: 100ms 
Achieved Accuracy: 85%, Average Latency: 70ms


| Target Accuracy | Target Latency | Entropy Vector | Achieved Accuracy| Achieved Latency
|-----------------|----------------|----------------|--------------------|---------------|
|90|11|(0.4, 0.01, 0.005, 0.005, 0.005, 0.001, 0.005, 0.6, 0.6, 1, 0, 0)|90|11|
|90|6|(0.4, 0.01, 0.005, 0.005, 0.005, 1, 0, 0, 0, 0, 0, 0)|84|6|
|90|4|(0.4, 0.01, 0.005, 0.005, 1, 0, 0, 0, 0, 0, 0, 0)|78|1|
|80|11|(0.4, 0.01, 0.005, 0.005, 0.005, 0.001, 0.005, 0.6, 0.6, 1, 0, 0)|90|11|
|80|8|(0.4, 0.01, 0.005, 0.005, 0.005, 0.001, 0.005, 1, 0, 0, 0, 0)|88|8.5|
|80|6|(0.4, 0.01, 0.005, 0.005, 0.005, 1, 0, 0, 0, 0, 0, 0)|84|6|
|70|8|(0.4, 0.01, 0.005, 0.005, 0.005, 0.001, 0.005, 1, 0, 0, 0, 0)|88|8.5|
|70|6|(0.4, 0.01, 0.005, 0.005, 0.005, 1, 0, 0, 0, 0, 0, 0)|84|6|
|70|4|(0.4, 0.01, 0.005, 0.005, 1, 0, 0, 0, 0, 0, 0, 0)|70|3.5|
|70|1|(0.4, 0.01, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)|78|1|
