<a href="https://colab.research.google.com/github/ETheBest3/perm_inv_in_the_LTH_public/blob/main/perm_inv_in_the_LTH.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Permutation invariances in the Lottery Ticket Hypothesis

Cloning the repository:

In [None]:
!git config --global user.email USER_EMAIL
!git config --global user.name USERNAME
!git clone https://github.com/ETheBest3/perm_inv_in_the_LTH_public.git

In [None]:
%cd perm_inv_in_the_LTH_public/open_lth

In [None]:
!pip install torch_geometric

Setting the hyperparameters for our experiments:

In [1]:
MAX_DIST = 5

NUM_LEVELS = 10
NUM_TRAININGS = 36
MODEL_NAME = "gcn_1_64_64_64_64_64"
LR = 0.007
GAMMA = 0.4
MILESTONE_STEPS = "50ep"
TRAINING_STEPS = "200ep"
MOMENTUM = 0.9
LOSS_CRITERION = "CrossEntropy"
PRUNING_FRACTION = 0.3

Running the experiments:

In [None]:
for distance in range(1, MAX_DIST+1):
  PATH = "/content/perm_inv_in_the_LTH_public/open_lth/datasets/graph_dataset/graph_dataset_5000_100_3_2_2_"+str(distance)+"/."

  for train_t in range(1, NUM_TRAININGS+1):
    !python open_lth.py lottery --replicate $train_t --dataset_name graph --batch_size 64 --model_name $MODEL_NAME --model_init kaiming_normal --batchnorm_init uniform --optimizer_name sgd --lr $LR --training_steps $TRAINING_STEPS --root $PATH --num_graphs 5000 --num_nodes 100 --k 3 --num_colors 2  --dist $distance --pruning_strategy sparse_global --levels $NUM_LEVELS --milestone_steps $MILESTONE_STEPS --gamma $GAMMA --momentum $MOMENTUM --loss_criterion $LOSS_CRITERION


In [None]:
def get_last_accuracy(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
    for line in reversed(lines):
        if 'test_accuracy' in line:
            _, _, accuracy = line.strip().split(',')
            return float(accuracy)
    return None

def get_last_loss(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
    for line in reversed(lines):
        if 'test_loss' in line:
            _, _, loss = line.strip().split(',')
            return float(loss)
    return None

In [None]:
import scipy.stats as stats
import math

In [None]:
models = {}
means = {}
for distance in range(1, MAX_DIST+1):
  mean = []
  means['mean'+str(distance)] = []
  std = []
  hash = []
  train_path = "/content/open_lth_data/dist_"+str(distance)+"/"
  sparsity = 100
  for lev in range(0, NUM_LEVELS+1):
    curr_accuracy = []
    for train_t in range(1, NUM_TRAININGS+1):
      if train_t == 32:
        continue
      curr_accuracy.append(get_last_accuracy(train_path+"replicate_"+str(train_t)+"/level_"+str(lev)+"/main/logger"))
    curr_mean = sum(curr_accuracy) / len(curr_accuracy)
    curr_variance = sum([((x - curr_mean) ** 2) for x in curr_accuracy]) / len(curr_accuracy)
    curr_std = curr_variance ** 0.5
    mean.append(curr_mean)
    means['mean'+str(distance)].append((sparsity, curr_mean))
    std.append(curr_std)
    sparsity = sparsity * (1- PRUNING_FRACTION)
    # let's assume these are your values


  successful_ticket_mean_accuracy = mean[0]
  models['model'+str(distance)] = []
  sparsity = 100

  for lev in range(1, NUM_LEVELS+1):
    sparsity = sparsity * (1-PRUNING_FRACTION)
    ticket_mean = mean[lev]
    std_dev = std[lev]
    # calculate the z-score
    z = (successful_ticket_mean_accuracy - ticket_mean) / std_dev

    # add sparsity and p_value to data
    p_value = 1-stats.norm.cdf(successful_ticket_mean_accuracy, loc=ticket_mean, scale=std_dev)
    models['model'+str(distance)].append((sparsity, p_value))

  print("Mean:")
  print(mean)
  print("STD:")
  print(std)
  print("Data:")
  print(models['model'+str(distance)])

Plotting the results (probability of finding a winning ticket):

In [None]:
import matplotlib.pyplot as plt

# Assuming you have your data in the following format:
# data = {'model1': [(sparsity1, pvalue1), (sparsity2, pvalue2), ...],
#         'model2': [(sparsity1, pvalue1), (sparsity2, pvalue2), ...],
#         ...}

models_data = {
    'dist 1': models['model1'],
    'dist 2': models['model2'],
    'dist 3': models['model3'],
    'dist 4': models['model4'],
    'dist 5': models['model5']
}

for model, values in models_data.items():
    values = values['p_values']
    values.sort()  # Sort the values to ensure the line connects points in the correct order
    sparsity = [x[0] for x in values]
    pvalue = [x[1] for x in values]
    plt.scatter(sparsity, pvalue, s=8, label=model)  # s=10 makes the points smaller
    plt.plot(sparsity, pvalue)  # Connects the points with a line

plt.xlabel('Percent of Weights Remaining')
plt.ylabel('Probability of finding a winning ticket')
plt.legend()
plt.show()

Plotting the results (mean accuracy of tickets):

In [None]:
import matplotlib.pyplot as plt

# Assuming you have your data in the following format:
# data = {'model1': [(sparsity1, pvalue1), (sparsity2, pvalue2), ...],
#         'model2': [(sparsity1, pvalue1), (sparsity2, pvalue2), ...],
#         ...}

means_data = {
    'dist 1': means['mean1'],
    'dist 2': means['mean2'],
    'dist 3': means['mean3'],
    'dist 4': means['mean4'],
    'dist 5': means['mean5']
}

for model, values in means_data.items():
    values = values['p_values']
    values.sort()  # Sort the values to ensure the line connects points in the correct order
    sparsity = [x[0] for x in values]
    pvalue = [x[1] for x in values]
    plt.scatter(sparsity, pvalue, s=8, label=model)  # s=10 makes the points smaller
    plt.plot(sparsity, pvalue)  # Connects the points with a line

plt.xlabel('Percent of Weights Remaining')
plt.ylabel('Mean Accuracy')
plt.legend()
plt.show()

Saving the results in open_lth_data.zip:

In [None]:
dir_name="open_lth_data"
zip_file="open_lth_data.zip"
!zip -r "${zip_file}" "${dir_name}"