# Merge Tags Workflow

In [None]:
# Run this workflow to merge tags 
token = "" # @param {type: "string"}
!pip install -q -U RelevanceAI==3.2.14
from relevanceai.utils import decode_workflow_token

config = decode_workflow_token(token)

# config = {
#     "inputField": "_surveytag_.B2OE.example.label",
#     "outputField": "_surveytag_.B2OE.example_fixed",
#     "tagsToMerge": {
#         "Teaching": "Teach",
#         "teachers_2": "life"
#     },
#     "authorizationToken": "",
#     "datasetId": "teachers-data-csv"
# }
input_field = config['inputField']
output_field = config['outputField']
tags_to_merge = config['tagsToMerge']


filters = config.get("filters", [])
refresh = config.get("refresh", False)
from relevanceai import Client
client = Client(config['authorizationToken'])
ds = client.Dataset(config['dataset_id'])

from relevanceai.operations_new.transform_base import TransformBase
class MergeTagsTransform(TransformBase):
    def __init__(
        self,
        input_field,
        output_field,
        tags_to_merge,
        **kwargs,
    ):
        self.input_field = input_field
        self.output_alias = output_field
        self.output_field = ".".join(input_field.split(".")[:-2]) + "." + self.output_alias
        self.tags_to_merge = tags_to_merge
        for k, v in kwargs.items():
            setattr(self, k, v)

    def transform(self, documents):
        # For each document, update the field
        new_chunk = []
        tag_field = ".".join(input_field.split(".")[:-1])
        for d in documents:
          tag_docs =  client.get_field(tag_field, d)
          new_tag_docs = tag_docs.copy()
          new_doc = {"_id": d['_id']}
          # Loop through tag documents and replace
          current_tags = client.get_field_across_documents('label', tag_docs, missing_treatment="skip")
          for new_d in new_tag_docs: 
            if new_d['label'] in tags_to_merge and :
              new_tag = tags_to_merge[new_d['label']]
              if new_tag not in current_tags:
                new_d['label'] = new_tag
                current_tags.append(new_tag)

          client.set_field(self.output_field, new_doc, new_tag_docs)
          new_chunk.append(new_doc)
        return new_chunk

    @property
    def name(self):
        return "mergetags"


from relevanceai.operations_new.ops_base import OperationAPIBase

class MergeTagOps(OperationAPIBase, MergeTagsTransform):
    """ 
    Add ratings
    """
    def __init__(
        self,
        input_field,
        output_field,
        tags_to_merge,
        **kwargs,
    ):
        self.input_field = input_field
        self.output_field = output_field
        self.tags_to_merge = tags_to_merge
        for k, v in kwargs.items():
            setattr(self, k, v)

    @property
    def name(self):
        return "mergetags"

ops = MergeTagOps(
  credentials=ds.credentials,      
  input_field=input_field,
  output_field=output_field,
  tags_to_merge=tags_to_merge
)
filters += ds[input_field].exists()

ops.run(
    ds,
    filters=filters,
    select_fields=[input_field],
    chunksize=100,
    batched=True,
    output_fields=[output_field],
    refresh=refresh,
    warmup_chunksize=1
)