In [None]:
# default_exp problem_types.regression
%load_ext autoreload
%autoreload 2
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [None]:
# test setup
import tensorflow as tf
import numpy as np
from m3tl.test_base import TestBase
from m3tl.input_fn import train_eval_input_fn
from m3tl.test_base import test_top_layer
test_base = TestBase()
params = test_base.params

hidden_dim = params.bert_config.hidden_size

train_dataset = train_eval_input_fn(params=params)
one_batch = next(train_dataset.as_numpy_iterator())


  return torch._C._cuda_getDeviceCount() > 0


Adding new problem weibo_fake_ner, problem type: seq_tag
Adding new problem weibo_cws, problem type: seq_tag
Adding new problem weibo_fake_multi_cls, problem type: multi_cls
Adding new problem weibo_fake_cls, problem type: cls
Adding new problem weibo_masklm, problem type: masklm
Adding new problem weibo_pretrain, problem type: pretrain
Adding new problem weibo_fake_regression, problem type: regression
Adding new problem weibo_fake_vector_fit, problem type: vector_fit
Adding new problem weibo_premask_mlm, problem type: premask_mlm


  elif np.issubdtype(type(feature), np.float):


INFO:tensorflow:sampling weights: 
INFO:tensorflow:weibo_fake_cls_weibo_fake_ner_weibo_fake_regression_weibo_fake_vector_fit: 0.2631578947368421
INFO:tensorflow:weibo_fake_multi_cls: 0.2631578947368421
INFO:tensorflow:weibo_masklm: 0.2236842105263158
INFO:tensorflow:weibo_premask_mlm: 0.25


# Regression(regression)

This module includes neccessary part to register regression problem type.

## Imports and utils


In [None]:
# export
from typing import Dict, List, Tuple

import numpy as np
import tensorflow as tf
from m3tl.base_params import BaseParams
from m3tl.problem_types.utils import (empty_tensor_handling_loss,
                                      nan_loss_handling)
from m3tl.special_tokens import PREDICT, TRAIN
from m3tl.utils import get_phase, variable_summaries


## Top Layer

In [None]:
# export

def mse_wrapper(labels, logits, from_logits=True):
    return tf.keras.losses.mean_squared_error(labels, logits)


class Regression(tf.keras.Model):
    def __init__(self, params: BaseParams, problem_name: str) -> None:
        super(Regression, self).__init__(name=problem_name)
        self.params = params
        self.problem_name = problem_name
        self.num_classes = 1
        self.dense = tf.keras.layers.Dense(self.num_classes)

    def call(self, inputs: Tuple[Dict]):
        mode = get_phase()
        feature, hidden_feature = inputs
        pooled_hidden = hidden_feature['pooled']

        logits = self.dense(pooled_hidden)
        if self.params.detail_log:
            for weight_variable in self.weights:
                variable_summaries(weight_variable, self.problem_name)

        if mode != PREDICT:
            # this is actually a float
            label = feature['{}_label_ids'.format(self.problem_name)]

            loss = empty_tensor_handling_loss(label, logits, mse_wrapper)
            loss = nan_loss_handling(loss)
            self.add_loss(loss)

            self.add_metric(tf.math.negative(
                loss), name='{}_neg_mse'.format(self.problem_name), aggregation='mean')
        return logits

In [None]:
test_top_layer(Regression, problem='weibo_fake_regression', params=params, sample_features=one_batch, hidden_dim=hidden_dim)

Testing Regression


## Get or make label encoder function


In [None]:
# export
def regression_get_or_make_label_encoder_fn(params: BaseParams, problem: str, mode: str, label_list: List[str], *args, **kwargs):
    if mode == TRAIN:
        # set params num_classes for this problem
        params.set_problem_info(problem=problem, info_name='num_classes', info=1)
    return None


## Label handing function

In [None]:
# export
def regression_label_handling_fn(target, label_encoder=None, tokenizer=None, decoding_length=None, *args, **kwargs):
    # return label_id and label mask
    label_id = float(target)
    return label_id, None

