In [44]:
import torch
from torch import nn

import sys
sys.path.append("..")
import pandas as pd

from low_precision_utils import utils
from low_precision_utils import metrics
import numpy as np
import copy
import torch.optim
import qtorch

In [45]:
DESIRED_STEPS = 1000
DATASET_SIZE = 700
BATCH_SIZE = 8
STEPS_PER_EPOCH = DATASET_SIZE // BATCH_SIZE
EPOCHS = DESIRED_STEPS // STEPS_PER_EPOCH + 1 

In [46]:
def get_grad(model):
    return model.linear.network.weight.grad


def diff_of_grad(wrapper, model_weight, master_weight, data, target):
    # at the same traning step
    # that is have the same training data and target
    # we compute the gradient on full precison model
    # then compute the same thing on low precision model (with different seed)
    # we check if for each parameter the estimation is biased

    # this time, we check the difference between activation quantise only
    # and full precision model

    master_weight.zero_grad()
    reference_loss = master_weight.loss_acc(data, target)["loss"]
    reference_loss.backward()
    reference_grad = get_grad(master_weight)
    master_weight.zero_grad()

    grad_estimation_samples = []
    for i in range(100):
        model_weight.zero_grad()
        sample_loss = model_weight.loss_acc(data, target)["loss"]
        sample_loss.backward()
        sample_grad = get_grad(model_weight)
        grad_estimation_samples.append(np.array(sample_grad.detach().cpu()))
        model_weight.zero_grad()

    return np.array(reference_grad.detach().cpu()), grad_estimation_samples
    


In [47]:
class LogisticRegression(nn.Module):
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(5, 1)
        self.input_size = (5,)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))

    def loss_acc(self, x, y):
        output = self(x)
        loss = nn.BCELoss()(output, y)
        pred = output.round()
        acc = pred.eq(y.view_as(pred)).sum().item() / len(y)
        return {"loss": loss, "acc": acc}

def loadSmallTableData(device):
    train_data = pd.read_csv('../train_data.csv')
    X_train = train_data.drop('purchased', axis=1).values
    y_train = train_data['purchased'].values

    # Load the test data
    test_data = pd.read_csv('../test_data.csv')
    X_test = test_data.drop('purchased', axis=1).values
    y_test = test_data['purchased'].values

    # Convert the data to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

    return X_train_tensor.to(device), y_train_tensor.to(device), X_test_tensor.to(device), y_test_tensor.to(device)

def getBatches(X, y):
    length = len(X)
    idx = torch.randperm(length)
    X = X[idx]
    y = y[idx]
    for i in range(0, length, BATCH_SIZE):
        yield X[i:i+BATCH_SIZE], y[i:i+BATCH_SIZE]


In [48]:
device = "cuda"
bit1 = qtorch.FloatingPoint(8, 1)
X_train, y_train, X_test, y_test = loadSmallTableData(device)

In [49]:
model = LogisticRegression().to(device)

In [50]:
model = utils.replace_linear_with_quantized(model)

In [51]:
master_weight = model
model_weight = copy.deepcopy(master_weight)


In [52]:
model_weight = utils.apply_number_format(model_weight, bit1, bit1, "stochastic", "stochastic")

In [53]:
opt = torch.optim.SGD(master_weight.parameters(), lr=0.03, momentum=0)
scheduler = torch.optim.lr_scheduler.ConstantLR(opt)
wrapper = utils.MasterWeightOptimizerWrapper(
    master_weight,
    model_weight,
    opt,
    scheduler,
)

In [54]:
@torch.no_grad()
def test(network, dataset):
    network.eval()
    correct = 0
    total_loss = 0
    i = 0
    n = 0
    with torch.no_grad():
        for data, target in dataset:
            i += 1
            n += len(data)
            loss_acc = network.loss_acc(data, target)
            total_loss += loss_acc["loss"].item()
            correct += loss_acc["acc"] * len(data)
    accuracy = correct / n
    avg_loss = total_loss / i
    network.train()
    return {"test_acc": accuracy, "test_loss": avg_loss}

training_time = 0
import tqdm
import time
test_result_m = {}
result_log = {}
stepi=0
bar = tqdm.tqdm()
grad_real_estimations = []
for epoch in range(EPOCHS):
    start_time = time.perf_counter()
    train_losses, train_accs = [], []
    master_weight.train()
    model_weight.train()

    for X,y in getBatches(X_train,y_train):
        stepi += 1
        result_log.update(wrapper.train_on_batch(X, y))
        bar.update(1)
        bar.set_postfix(result_log)
        result_log["lr"] = opt.param_groups[0]["lr"]
        if stepi >= DESIRED_STEPS:
            test_result_m = test(master_weight, ((X_test, y_test),))
            grad_entire = metrics.grad_on_dataset(master_weight, X_train, y_train)
            result_log.update(grad_entire)
            break
    grad_real_estimations.append(diff_of_grad(wrapper, model_weight, master_weight, X, y))

    # wrapper.master_params_to_model_params(quantize=False)
    result_log.update(metrics.grad_on_dataset(master_weight, X_train, y_train))
    training_time += time.perf_counter()-start_time
    test_result_m = test(master_weight, ((X_test, y_test),))
    print(f'epoch % 2d  test loss m %.3f   test acc m %.3f  training time %.2f'%(epoch+1, test_result_m["test_loss"], test_result_m["test_acc"], training_time))
  

1000it [2:30:19,  9.02s/it, loss=0.35, acc=0.875, lr=0.03, grad_norm_entire=0.0334, zero_percentage=0]
104it [00:01, 32.38it/s, loss=0.32, acc=1, lr=0.03, grad_norm_entire=0.256, zero_percentage=0]     

epoch  1  test loss m 0.437   test acc m 0.923  training time 1.63


194it [00:03, 41.44it/s, loss=0.232, acc=1, lr=0.03, grad_norm_entire=0.152, zero_percentage=0]    

epoch  2  test loss m 0.332   test acc m 0.937  training time 3.18


281it [00:05, 36.92it/s, loss=0.237, acc=1, lr=0.03, grad_norm_entire=0.109, zero_percentage=0]    

epoch  3  test loss m 0.285   test acc m 0.940  training time 4.84


369it [00:06, 32.77it/s, loss=0.273, acc=0.875, lr=0.03, grad_norm_entire=0.085, zero_percentage=0]

epoch  4  test loss m 0.259   test acc m 0.937  training time 6.41


458it [00:08, 40.08it/s, loss=0.134, acc=1, lr=0.03, grad_norm_entire=0.07, zero_percentage=0]     

epoch  5  test loss m 0.242   test acc m 0.937  training time 8.00


545it [00:09, 33.31it/s, loss=0.572, acc=0.625, lr=0.03, grad_norm_entire=0.0591, zero_percentage=0]

epoch  6  test loss m 0.230   test acc m 0.937  training time 9.61


634it [00:11, 44.25it/s, loss=0.122, acc=1, lr=0.03, grad_norm_entire=0.0511, zero_percentage=0]    

epoch  7  test loss m 0.221   test acc m 0.937  training time 11.16


721it [00:12, 42.33it/s, loss=0.416, acc=0.875, lr=0.03, grad_norm_entire=0.0452, zero_percentage=0]

epoch  8  test loss m 0.214   test acc m 0.940  training time 12.78


809it [00:14, 34.37it/s, loss=0.172, acc=1, lr=0.03, grad_norm_entire=0.0405, zero_percentage=0]    

epoch  9  test loss m 0.209   test acc m 0.940  training time 14.40


896it [00:16, 41.24it/s, loss=0.739, acc=0.625, lr=0.03, grad_norm_entire=0.0363, zero_percentage=0]

epoch  10  test loss m 0.204   test acc m 0.940  training time 16.01


985it [00:17, 41.70it/s, loss=0.232, acc=0.875, lr=0.03, grad_norm_entire=0.0331, zero_percentage=0] 

epoch  11  test loss m 0.201   test acc m 0.937  training time 17.66


1000it [00:18, 56.43it/s, loss=0.235, acc=0.875, lr=0.03, grad_norm_entire=0.0331, zero_percentage=0]

epoch  12  test loss m 0.200   test acc m 0.933  training time 18.82


1000it [00:28, 56.43it/s, loss=0.235, acc=0.875, lr=0.03, grad_norm_entire=0.0331, zero_percentage=0]

In [38]:
def mean_of_vectors(vs):
    sum = np.zeros_like(vs[0])
    for v in vs:
        sum += v
    return sum / len(vs)


def var_of_vectors(vs):
    mean = mean_of_vectors(vs)
    sum = np.zeros_like(vs[0])
    for v in vs:
        sum += (v - mean) ** 2
    return sum / len(vs)

In [39]:
def show_stats(stats, i):
    print(mean_of_vectors(stats[i][1]))
    print(var_of_vectors(stats[i][1]))
    print(stats[i][0])



In [40]:
show_stats(grad_real_estimations, 2)

[[-0.06915283  0.02345215 -0.16585937  0.09882812 -0.17070313]]
[[0.00011669 0.00095896 0.00019269 0.00174728 0.00058624]]
[[-0.07003069  0.01795811 -0.16687551  0.08843183 -0.1695634 ]]


In [41]:
import numpy as np
import scipy.stats as stats

# True vector
def t_test(true_vector, estimated_vectors):
    estimated_vectors = np.array(estimated_vectors)
    print(estimated_vectors.shape)
    # Calculate the average estimated vector
    average_estimated_vector = np.mean(estimated_vectors, axis=0)

    # Calculate the differences
    differences = estimated_vectors - true_vector

    # Perform Paired t-test
    t_statistics, p_values = [], []
    for i in range(true_vector.shape[0]):
        t_statistic, p_value = stats.ttest_1samp(differences[:, i], 0)
        t_statistics.append(t_statistic)
        p_values.append(p_value)

    # Print results
    print(f"True Vector: {true_vector}")
    print(f"Average Estimated Vector: {average_estimated_vector}")
    print(f"Paired t-test p-values: {p_values}")
    significant_differences = np.sum(np.array(p_values) < 0.05)
    print(f"Number of components with significant differences: {significant_differences} out of {true_vector.shape[0]}")

    # Additional Metrics
    # Calculate MSE, RMSE, and Correlation Coefficient for each estimation
    mses = np.mean(differences ** 2, axis=1)
    rmses = np.sqrt(mses)
    correlation_coefficients = [np.corrcoef(true_vector, estimated_vector)[0, 1] for estimated_vector in estimated_vectors]

    # Calculate average MSE, RMSE, and Correlation Coefficient
    average_mse = np.mean(mses)
    average_rmse = np.mean(rmses)
    average_correlation_coefficient = np.mean(correlation_coefficients)

    print(f"Average MSE: {average_mse}")
    print(f"Average RMSE: {average_rmse}")
    print(f"Average Correlation Coefficient: {average_correlation_coefficient}")

In [70]:
def perform_big_t_test(true_vectors, estimated_vectors_list):
    """
    Perform a combined t-test on multiple true vectors and their estimated vectors.
    
    Parameters:
    true_vectors (list of np.array): List of true vectors.
    estimated_vectors_list (list of list of np.array): List of lists of estimated vectors.
    
    Returns:
    dict: A dictionary containing the t-statistic and p-value of the combined t-test.
    """
    all_differences = []

    for true_vector, estimated_vectors in zip(true_vectors, estimated_vectors_list):
        estimated_vectors = np.array(estimated_vectors)
        differences = estimated_vectors - true_vector
        all_differences.extend(differences)

    all_differences = np.array(all_differences)
    combined_differences = np.mean(all_differences, axis=0)
    
    combined_differences = combined_differences[0,:]
    print(combined_differences)
    t_statistic, p_value = stats.ttest_1samp(combined_differences, 0)

    return {
        't_statistic': t_statistic,
        'p_value': p_value
    }

def t_test(true_vector, estimated_vectors):
    """
    Perform t-test and calculate metrics to evaluate the estimation quality.
    
    Parameters:
    true_vector (np.array): The true values of the vector.
    estimated_vectors (list): A list of estimated vectors.
    """
    estimated_vectors = np.array(estimated_vectors)
    
    metrics = calculate_metrics(true_vector, estimated_vectors)
    print_metrics(true_vector, metrics)


def calculate_metrics(true_vector, estimated_vectors):
    """
    Calculate various metrics to evaluate the estimation quality.
    
    Parameters:
    true_vector (np.array): The true values of the vector.
    estimated_vectors (np.array): The estimated vectors.

    Returns:
    dict: A dictionary containing average estimated vector, MSE, RMSE, and correlation coefficients.
    """
    # Calculate the average estimated vector
    average_estimated_vector = np.mean(estimated_vectors, axis=0)
    
    # Calculate the differences
    differences = estimated_vectors - true_vector
    
    # Perform Paired t-test
    t_statistics, p_values = [], []
    for i in range(true_vector.shape[0]):
        t_statistic, p_value = stats.ttest_1samp(differences[:, i], 0)
        t_statistics.append(t_statistic)
        p_values.append(p_value)
    
    # Calculate MSE, RMSE, and Correlation Coefficient for each estimation
    mses = np.mean(differences ** 2, axis=1)
    rmses = np.sqrt(mses)
    correlation_coefficients = [np.corrcoef(true_vector, estimated_vector)[0, 1] for estimated_vector in estimated_vectors]
    
    # Calculate average MSE, RMSE, and Correlation Coefficient
    average_mse = np.mean(mses)
    average_rmse = np.mean(rmses)
    average_correlation_coefficient = np.mean(correlation_coefficients)
    
    return {
        'average_estimated_vector': average_estimated_vector,
        'p_values': p_values,
        'significant_differences': np.sum(np.array(p_values) < 0.05),
        'average_mse': average_mse,
        'average_rmse': average_rmse,
        'average_correlation_coefficient': average_correlation_coefficient,
        'mean_difference': np.mean(differences),
        'direction': 'overestimate' if np.mean(differences) > 0 else 'underestimate' if np.mean(differences) < 0 else 'no bias'
    }

def print_metrics(true_vector, metrics):
    """
    Print the calculated metrics.
    
    Parameters:
    true_vector (np.array): The true values of the vector.
    metrics (dict): A dictionary containing the calculated metrics.
    """
    print(f"True Vector: {true_vector}")
    print(f"Average Estimated Vector: {metrics['average_estimated_vector']}")
    print(f"Paired t-test p-values: {metrics['p_values']}")
    print(f"Number of components with significant differences: {metrics['significant_differences']} out of {true_vector.shape[0]}")
    print(f"Average MSE: {metrics['average_mse']}")
    print(f"Average RMSE: {metrics['average_rmse']}")
    print(f"Average Correlation Coefficient: {metrics['average_correlation_coefficient']}")
    print(f"Mean Difference: {metrics['mean_difference']}")
    print(f"Overall Bias Direction: {metrics['direction']}")


for true_vector, estimated_vectors in grad_real_estimations:
    t_test(true_vector, estimated_vectors)
    print("*"*20)

# Perform a combined t-test across all vectors
true_vectors = [true_vector for true_vector, _ in grad_real_estimations]
estimated_vectors_list = [estimated_vectors for _, estimated_vectors in grad_real_estimations]

big_t_test_result = perform_big_t_test(true_vectors, estimated_vectors_list)
print(f"\nCombined t-test result: t-statistic = {big_t_test_result['t_statistic']}, p-value = {big_t_test_result['p_value']}")

True Vector: [[ 0.2585148   0.22784749 -0.203497   -0.1550882  -0.08595779]]
Average Estimated Vector: [[ 0.26472655  0.23930664 -0.20344727 -0.158125   -0.08921875]]
Paired t-test p-values: [array([0.00852391, 0.01760394, 0.98086672, 0.45991027, 0.08965652])]
Number of components with significant differences: 2 out of 1
Average MSE: 0.0010781824821606278
Average RMSE: 0.024582665413618088
Average Correlation Coefficient: 0.9936792916620897
Mean Difference: 0.0022845782805234194
Overall Bias Direction: overestimate
********************
True Vector: [[ 0.19589204  0.08953348 -0.14765157 -0.06848784  0.01696922]]
Average Estimated Vector: [[ 0.19839722  0.09141602 -0.14982422 -0.07458984  0.0171875 ]]
Paired t-test p-values: [array([0.18931731, 0.47785141, 0.09905809, 0.34159537, 0.9431865 ])]
Number of components with significant differences: 0 out of 1
Average MSE: 0.0012455843389034271
Average RMSE: 0.024625740945339203
Average Correlation Coefficient: 0.9754962307333724
Mean Differen

: 

In [69]:
stats.ttest_1samp?

[0;31mSignature:[0m
[0mstats[0m[0;34m.[0m[0mttest_1samp[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0ma[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpopmean[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnan_policy[0m[0;34m=[0m[0;34m'propagate'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0malternative[0m[0;34m=[0m[0;34m'two-sided'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkeepdims[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Calculate the T-test for the mean of ONE group of scores.

This is a test for the null hypothesis that the expected value
(mean) of a sample of independent observations `a` is equal to the given
population mean, `popmean`.

Parameters
----------
a : array_like
    Sample observations.
popmean : float or array_like
    Expected value in null h

In [43]:
for i in range(len(grad_real_estimations)):
    t_test(grad_real_estimations[i][0], grad_real_estimations[i][1])
    print("*"*10)

(100, 1, 5)
True Vector: [[ 0.32465073  0.1262475  -0.2092338  -0.19066718  0.06783204]]
Average Estimated Vector: [[ 0.32726562  0.12816407 -0.20972656 -0.1953125   0.07077149]]
Paired t-test p-values: [array([0.33836574, 0.20539645, 0.77088319, 0.16036817, 0.35392142])]
Number of components with significant differences: 0 out of 1
Average MSE: 0.0006663842359557748
Average RMSE: 0.019829940050840378
Average Correlation Coefficient: 0.9961007561448865
**********
(100, 1, 5)
True Vector: [[-0.00974826 -0.06938097 -0.11721987 -0.12058986 -0.00532948]]
Average Estimated Vector: [[-0.00894531 -0.07382812 -0.11748535 -0.1209082  -0.00269531]]
Paired t-test p-values: [array([0.76312473, 0.11252516, 0.85586587, 0.92282292, 0.07047263])]
Number of components with significant differences: 0 out of 1
Average MSE: 0.0005938939866609871
Average RMSE: 0.01872951351106167
Average Correlation Coefficient: 0.944159301396356
**********
(100, 1, 5)
True Vector: [[-0.07003069  0.01795811 -0.16687551  0.