In [None]:
import itertools
from sentence_transformers import LoggingHandler, SentenceTransformer, InputExample, evaluation
from sentence_transformers import models, losses
import logging
import gzip
import csv
import torch
import numpy as np
import pandas as pd

In [None]:
logging.basicConfig(format="%(asctime)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO, handlers=[LoggingHandler()])
teacher_model_name = "stsb-bert-base"
teacher_model = SentenceTransformer(teacher_model_name)

In [3]:
# We use the STS benchmark dataset to see how much performance we loose
sts_dataset_path = "datasets/stsbenchmark.tsv.gz"

In [None]:
# We use the STS benchmark dataset to measure the performance of student model im comparison to the teacher model
logging.info("Read STSbenchmark dev dataset")
dev_samples = []
with gzip.open(sts_dataset_path, "rt", encoding="utf8") as fIn:
    reader = csv.DictReader(fIn, delimiter="\t", quoting=csv.QUOTE_NONE)
    for row in reader:
        if row["split"] == "dev":
            score = float(row["score"]) / 5.0  # Normalize score to range 0 ... 1
            dev_samples.append(InputExample(texts=[row["sentence1"], row["sentence2"]], label=score))

In [None]:
# List of numbers from 0 to 11
numbers = list(range(12))

# Generate all combinations of 4 numbers
combinations = list(itertools.combinations(numbers, 4))

# Generate all combinations of 6 numbers
# combinations = list(itertools.combinations(numbers, 6))

# Convert the combinations to a NumPy array
combinations_np = np.array(combinations)

# Print the NumPy array
combinations_np[0]

In [None]:
len(combinations_np)

In [None]:
data = []
for combo in combinations_np:
    student_model = SentenceTransformer(teacher_model_name)
    auto_model = student_model._first_module().auto_model
    
    layers_to_keep = combo
    logging.info("Remove layers from student. Only keep these layers: {}".format(layers_to_keep))

    new_layers = torch.nn.ModuleList(
        [layer_module for i, layer_module in enumerate(auto_model.encoder.layer) if i in layers_to_keep]
    )

    auto_model.encoder.layer = new_layers
    auto_model.config.num_hidden_layers = len(layers_to_keep)
    
    dev_evaluator_sts = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, name="sts-dev")
    evaluator_results = dev_evaluator_sts(student_model)
    logging.info("Teacher Performance:")

    data.append([
        combo,
        evaluator_results["sts-dev_spearman_cosine"],
        evaluator_results["sts-dev_pearson_cosine"],
        evaluator_results["sts-dev_spearman_manhattan"],
        evaluator_results["sts-dev_pearson_manhattan"],
        evaluator_results["sts-dev_spearman_euclidean"],
        evaluator_results["sts-dev_pearson_euclidean"],
        evaluator_results["sts-dev_spearman_dot"],
        evaluator_results["sts-dev_pearson_dot"]
    ])

In [None]:
data

In [None]:
cols = [
    "layer",
    "spearman_cosine",
    "pearson_cosine",
    "spearman_manhattan",
    "pearson_manhattan",
    "spearman_euclidean",
    "pearson_euclidean",
    "spearman_dot",
    "pearson_dot"
]
excel = pd.DataFrame(data, columns=cols)
excel

In [None]:
# determining the name of the file
file_name = 'Combination_Layer_Results.xlsx'

# saving the excel
excel.to_excel(file_name, index=False)
print('DataFrame is written to Excel File successfully.')