# Demo: How to Modify Models Using GRADIEND

1. Select a model to modify

In [None]:
from transformers import AutoModel, AutoTokenizer, pipeline, logging

# Suppress warnings from Hugging Face transformers library
logging.set_verbosity_error()

In [None]:
import yaml

model = 'bert-base-cased'
distilbert_de = 'distilbert-base-german-cased'
bert_de = 'bert-base-german-cased'

config = yaml.safe_load(open("config.yml"))['M_F_N_leipzig']



2. Train the GRADIEND model

In [None]:
from gradiend.training.gradiend_training import train

# you may override some default behavior of gradiend.training.trainer.train() with the model_config
model_config = {
    'eval_max_size': 0.1, # use all of the validation data
    'epochs': 1,
    'lr': 1e-4,
}

gradiend_model_dir = train(bert_de,config, model_config, multi_task=False, n=1, dim=2)

3. [Optional]: Analyze the Encoder

In [1]:
import yaml
from gradiend.evaluation.analyze_encoder import analyze_models
from gradiend.export.encoder_stats import print_encoder_stats
from gradiend.export.encoder_plot import plot

config = yaml.safe_load(open("config.yml"))['M_F_N_leipzig']
gradiend_model_dir = "results/models/gradiend"



In [2]:
print(config)

{'plot_name': 'MFN_leipzig', 'palette': {'M': 'blue', 'F': 'purple', 'N': 'yellow'}, 'categories': {'M': {'labels': ['AM', '_AM'], 'articles': ['den'], 'codes': [0, 1, 2, 3], 'encoding': -1}, 'F': {'labels': ['AF', '_AF'], 'articles': ['die'], 'codes': [4, 5, 6, 7], 'encoding': 1}, 'N': {'labels': ['AN', '_AN'], 'articles': ['das'], 'codes': [8, 9, 10, 11], 'encoding': 0}}, 'combinations': ['AM_mfn', 'AF_mfn', 'AN_mfn'], 'articles': ['die', 'das', 'den'], 'default_predictions': ['die', 'das', 'den', 'most_likely_token', 'label'], 'token_to_ignore': ['der', 'die', 'das', 'den', 'der', 'Den', 'DEN', 'Der', 'Die', 'Das', 'DER', 'DIE', 'DAS'], 'NM': {'mask': '[NM]', 'inverse': 'die', 'code': 0, 'encoding': -1}, '_NM': {'mask': '[NM]', 'inverse': 'das', 'code': 0, 'encoding': -1}, 'AM': {'mask': '[AM]', 'inverse': 'die', 'code': 2, 'encoding': -1}, '_AM': {'mask': '[AM]', 'inverse': 'das', 'code': 2, 'encoding': -1}, 'NF': {'mask': '[NF]', 'inverse': 'der', 'code': 4, 'encoding': 1}, '_NF':

In [None]:

analyze_models(gradiend_model_dir, config=config, multi_task=False, shared=True)

In [None]:
print_encoder_stats(gradiend_model_dir, config=config)

# plot the encoded values distribution across different datasets

In [None]:
plot(config, gradiend_model_dir, multi_grad=True)

In [None]:
gradiend_model_dir = "gradiend/results/experiments/gradiend/multi_task/gradient/latent_2/1e-05/batch_16_nom_only/epoch_3/bert-base-german-cased/0_epoch_3"
config = yaml.safe_load(open("config.yml"))['DAS_PRON']

plot(config, gradiend_model_dir)

4. Analyze the Decoder

In [None]:
from gradiend.evaluation.analyze_decoder import default_evaluation
default_evaluation(gradiend_model_dir)

5. Create modified models based on the base models by selecting parameters based on the analysis and the BPI, FPI, and MPI metrics

In [None]:
from gradiend.evaluation.select_models import select
result = select(gradiend_model_dir, force=False, plot=False)

6. Load the modified models and do something with them

In [None]:
for suffix in ['N', 'F', 'M']:
    model_name = f'results/changed_models/{model}-{suffix}'
    print(f'Loading model {model_name}')
    modified_model = AutoModel.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    # do something with the model
    # ...
    
    # Example: Use the pipeline to predict the masked word    
    fill_mask = pipeline("fill-mask", model=model_name, tokenizer=model_name)
    text = 'The man worked as a [MASK].'
    result = fill_mask(text)
    predicted = result[0]['token_str']
    predicted_prob = result[0]['score']
    print(f'Predicted for {suffix}: {predicted} ({predicted_prob})')

7. [Optional]: Evaluate the modified models on a simple masking task to evaluate overfitting

In [None]:
from gradiend.evaluation.analyze_decoder import evaluate_gender_prediction_for_models
from gradiend.export.gender_predictions import plot_all

for targets in [('man', 'woman'), ('woman', 'man')]:
    evaluate_gender_prediction_for_models(model, target_words=targets)
    suffix = '_'.join(targets)
    plot_all(f'results/gender_prediction/{model}.csv', suffix=suffix)

8. [Optional]: Generate some example predictions for the model

In [None]:
from gradiend.export.example_predictions import run_for_model
run_for_model(model)