In [23]:
!pip install sacremoses

Collecting sacremoses
  Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/897.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.4/897.5 kB[0m [31m3.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m890.9/897.5 kB[0m [31m12.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sacremoses
Successfully installed sacremoses-0.1.1


In [24]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Define model name
model_name = "microsoft/biogpt"

# Load and save the model and tokenizer
biogpt_tokenizer = AutoTokenizer.from_pretrained(model_name)
biogpt_model = AutoModelForCausalLM.from_pretrained(model_name)

# Define model name
model_name = "dmis-lab/biobert-base-cased-v1.2"

# Load and save the model and tokenizer
biobert_tokenizer = AutoTokenizer.from_pretrained(model_name)
biobert_model = AutoModelForCausalLM.from_pretrained(model_name)



pytorch_model.bin:   0%|          | 0.00/1.56G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`


In [45]:
class TeacherModel:
    def __init__(self):
        self.tasks = {}

    def add_model(self, task, model_name, model_type, value):
        # Dynamically create nested models and tasks
        if task not in self.tasks:
            self.tasks[task] = TeacherTask()  # Create a new Task if it doesn't exist
        self.tasks[task].add_model(model_name, model_type, value)

    def __getattr__(self, task):
        # This method handles dynamic access to models
        if task in self.tasks:
            return self.tasks[task]  # Return the task object if it exists
        raise AttributeError(f"'TeacherModel' object has no attribute '{task}'")


class TeacherTask:
    def __init__(self):
        self.models = {}

    def add_model(self, model_name, model_type, value):
        # Add a model to the task, keyed by model_name
        if model_name not in self.models:
            self.models[model_name] = TeacherTaskModel()
        self.models[model_name].add_values(model_type, value)

    def __getattr__(self, model_name):
        # Allow dynamic access to models by model_name
        if model_name in self.models:
            return self.models[model_name]
        raise AttributeError(f"'Task' object has no attribute '{model_name}'")


class TeacherTaskModel:
    def __init__(self):
        self.tokenizer = None
        self.model = None

    def add_values(self, model_type, value):
        if model_type == "tokenizer":
            self.tokenizer = value
        elif model_type == "model":
            self.model = value


# Example usage:
teacher_model = TeacherModel()

# Add models dynamically
teacher_model.add_model("qna", "biogpt", "model", biogpt_model)
teacher_model.add_model("qna", "biogpt", "tokenizer", biogpt_tokenizer)

tokenizer = teacher_model.qna.biogpt.tokenizer
model = teacher_model.qna.biogpt.model

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = model.generate(inputs["input_ids"], max_length=200, num_return_sequences=1)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(answer)


Hello, my dog is cute, I am a dog, I am a man.


In [70]:
def ties_merge_models(model_a, model_b, threshold=1e-5):
    """
    Perform TIES merging on two Hugging Face models.

    Args:
        model_a: Model A (e.g., BioGPT).
        model_b: Model B (e.g., BioBERT).
        threshold: Threshold to consider marginal parameter changes.
    Returns:
        A merged Hugging Face model.
    """
    state_dict_a = model_a.state_dict()
    state_dict_b = model_b.state_dict()
    merged_state_dict = {}

    for key in state_dict_a.keys():
        if key in state_dict_b:
            param_a = state_dict_a[key]
            param_b = state_dict_b[key]

            # Reset marginal changes
            if torch.abs(param_a - param_b).mean() < threshold:
                merged_state_dict[key] = param_a
                continue

            # Resolve sign conflicts
            sign_mask = torch.sign(param_a) == torch.sign(param_b)
            resolved_param = torch.where(sign_mask, (param_a + param_b) / 2, param_a.abs().max(param_b.abs()))

            # Selective merging based on resolved sign
            merged_state_dict[key] = resolved_param
        else:
            merged_state_dict[key] = state_dict_a[key]  # Default to model A's parameters

    # Load the merged state dict into model A's architecture
    model_a.load_state_dict(merged_state_dict)

    return model_a

def merge_tokenizer_vocabularies(tokenizer_a, tokenizer_b):
    """
    Merge the vocabularies of two tokenizers.

    Args:
        tokenizer_a: first tokenizer
        tokenizer_b: second tokenizer
    """

    # Extract vocabularies
    vocab_a = set(tokenizer_a.get_vocab().keys())
    vocab_b = set(tokenizer_b.get_vocab().keys())

    # Merge vocabularies
    merged_vocab = vocab_a.union(vocab_b)

    # Update tokenizer A's vocabulary (base tokenizer)
    current_vocab = tokenizer_a.get_vocab()
    added_tokens = [token for token in merged_vocab if token not in current_vocab]
    tokenizer_a.add_tokens(added_tokens)


    return tokenizer_a


In [76]:
class StudentModel:
    def __init__(self):
        self.tasks = {}

    def add_model(self, task, model_type, value):
        # Dynamically create nested models and tasks
        if task not in self.tasks:
            self.tasks[task] = StudentTask()  # Create a new Task if it doesn't exist
        self.tasks[task].add_values(model_type, value)

    def __getattr__(self, task):
        # This method handles dynamic access to models
        if task in self.tasks:
            return self.tasks[task]  # Return the task object if it exists
        raise AttributeError(f"'StudentModel' object has no attribute '{task}'")


class StudentTask:
    def __init__(self):
        self.tokenizer = None
        self.model = None

    def add_values(self, model_type, value):
        if model_type == "tokenizer" and self.tokenizer:
            # Merge tokenizers if already present
            self.tokenizer = merge_tokenizer_vocabularies(value, self.tokenizer)

            # Manually resize the model's embedding layer to the new vocabulary size
            new_vocab_size = len(self.tokenizer.get_vocab())  # Get the new vocabulary size
            self.model.resize_token_embeddings(new_vocab_size)

        elif model_type == "tokenizer" and not self.tokenizer:
            # Set the tokenizer if not present
            self.tokenizer = value

        elif model_type == "model" and self.model:
            # Merge models if already present
            self.model = ties_merge_models(value, self.model)
        else:
            # Set the model if not present
            self.model = value


# Example usage:
student_model = StudentModel()

# Add models dynamically
student_model.add_model("qna", "model", biobert_model)
student_model.add_model("qna", "tokenizer", biobert_tokenizer)

# Add and merge more models dynamically
student_model.add_model("qna", "model", biogpt_model)
student_model.add_model("qna", "tokenizer", biogpt_tokenizer)

tokenizer = student_model.qna.tokenizer
model = student_model.qna.model

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = model.generate(inputs["input_ids"], max_length=200, num_return_sequences=1)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(answer)

Hello , my dog is cute .
