In [1]:
from numpy.random import seed
import random
seed(1)
import tensorflow as tf
tf.random.set_seed(2)

In [2]:
import nltk
import re
import pandas as pd
import numpy as np
import os
from pathlib import Path
from scipy import stats
from tensorflow.keras.layers import Input, Dense, Lambda

In [12]:
import keras.backend as K
import numpy as np


def gaussian_nll(ytrue, ypreds):
    """Keras implmementation of multivariate Gaussian negative loglikelihood loss function. 
    This implementation implies diagonal covariance matrix.
    
    Parameters
    ----------
    ytrue: tf.tensor of shape [n_samples, n_dims]
        ground truth values
    ypreds: tf.tensor of shape [n_samples, n_dims*2]
        predicted mu and logsigma values (e.g. by your neural network)
        
    Returns
    -------
    neg_log_likelihood: float
        negative loglikelihood averaged over samples
        
    This loss can then be used as a target loss for any keras model, e.g.:
        model.compile(loss=gaussian_nll, optimizer='Adam') 
    
    """
    
    n_dims = int(int(ypreds.shape[1])/2)
    mu = ypreds[:, 0:n_dims]
    logsigma = ypreds[:, n_dims:]
    
    mse = -0.5*K.sum(K.square((ytrue-mu)/K.exp(logsigma)),axis=1)
    sigma_trace = -K.sum(logsigma, axis=1)
    log2pi = -0.5*n_dims*np.log(2*np.pi)
    
    log_likelihood = mse+sigma_trace+log2pi

    return K.mean(-log_likelihood)

In [4]:
test_eval_data = "/Users/zxj/Desktop/study/semester3/MCS/humor/data/task-1/data/task-1/test_eval.csv"

test_eval=pd.read_csv(test_eval_data)
test_eval_label=test_eval.meanGrade

#get orignial news
test_eval_news=test_eval.original
test_eval_funny_word=test_eval.edit

In [5]:
from nltk.corpus import stopwords
stopwords=set(stopwords.words('english'))
tt=nltk.tokenize.regexp.WordPunctTokenizer()
lemmatizer=nltk.stem.wordnet.WordNetLemmatizer()

punctuation = '!,;:?"\'.\'/<>'
def removePunctuation(text):
    text = re.sub(r'[{}]+'.format(punctuation),'',text)
    return text.strip()

#catch the changed word
def find_tag(news):
    p = re.compile(r'[<](.*?)/[>]', re.S)
    return re.findall(p, news)

def preprocess_news(sentence,funny_word):
    final=[]
    num=0
    bad_sign=["’","‘","-"]
    
    for event in sentence:
        lines=[]       
        event=str(event)
        changed_word=find_tag(event)   
        event=removePunctuation(event)
        event=tt.tokenize(event)
        #tokenize and remove Punctuation
        
        for i in event:
            if i not in bad_sign:
                
                if i in changed_word:
                    #add the funny behind the changed word
                    #lines.append(i)
                    lines.append(funny_word[num])
                else:
                    lines.append(i)
                    
        lines=' '.join(lines)
        lines='[CLS]'+' '+lines+' '+'[SEP]'
        final.append(lines)
        num+=1
    return final

In [6]:
test_eval_news=preprocess_news(test_eval_news,test_eval_funny_word)

In [7]:
test_eval_news[0]

'[CLS] The Latest Election tally shows Cars turning right [SEP]'

In [8]:
import tensorflow as tf

def negative_binomial_layer(x):
    """
    Lambda function for generating negative binomial parameters
    n and p from a Dense(2) output.
    Assumes tensorflow 2 backend.
    
    Usage
    -----
    outputs = Dense(2)(final_layer)
    distribution_outputs = Lambda(negative_binomial_layer)(outputs)
    
    Parameters
    ----------
    x : tf.Tensor
        output tensor of Dense layer
        
    Returns
    -------
    out_tensor : tf.Tensor
        
    """
    
    # Get the number of dimensions of the input
    num_dims = len(x.get_shape())
    
    # Separate the parameters
    n, p = tf.unstack(x, num=2, axis=-1)
    
    # Add one dimension to make the right shape
    n = tf.expand_dims(n, -1)
    p = tf.expand_dims(p, -1)
        
    # Apply a softplus to make positive
    n = tf.keras.activations.softplus(n)
    
    # Apply a sigmoid activation to bound between 0 and 1
    p = tf.keras.activations.sigmoid(p)

    # Join back together again
    out_tensor = tf.concat((n, p), axis=num_dims-1)

    return out_tensor

def negative_binomial_loss(y_true, y_pred):
    """
    Negative binomial loss function.
    Assumes tensorflow backend.
    
    Parameters
    ----------
    y_true : tf.Tensor
        Ground truth values of predicted variable.
    y_pred : tf.Tensor
        n and p values of predicted distribution.
        
    Returns
    -------
    nll : tf.Tensor
        Negative log likelihood.
    """

    # Separate the parameters
    n, p = tf.unstack(y_pred, num=2, axis=-1)
    
    # Add one dimension to make the right shape
    n = tf.expand_dims(n, -1)
    p = tf.expand_dims(p, -1)
    
    # Calculate the negative log likelihood
    nll = (
        tf.math.lgamma(n) 
        + tf.math.lgamma(y_true + 1)
        - tf.math.lgamma(n + y_true)
        - n * tf.math.log(p)
        - y_true * tf.math.log(1 - p)
    )                  

    return nll

In [9]:
from transformers import AlbertTokenizer, TFAlbertModel
#import tensorflow as tf
import numpy as np
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2',token_ids_1=None)
inputs_train = tokenizer(test_eval_news,padding=True,return_tensors="pt")
inputs_train=np.array(inputs_train['input_ids'])

In [20]:
input_layer=tf.keras.Input(shape=(37,),dtype='int64')
bert=TFAlbertModel.from_pretrained('albert-base-v2', return_dict=True)(input_layer)
layer1=bert[1]
regression=tf.keras.layers.Dense(2,activation='linear')(layer1)
#result=Lambda(negative_binomial_layer)(regression)
model=tf.keras.Model(inputs=input_layer,outputs=regression)
model.summary()

Some weights of the model checkpoint at albert-base-v2 were not used when initializing TFAlbertModel: ['predictions']
- This IS expected if you are initializing TFAlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFAlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFAlbertModel were initialized from the model checkpoint at albert-base-v2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFAlbertModel for predictions without further training.


Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 37)]              0         
_________________________________________________________________
tf_albert_model_2 (TFAlbertM TFBaseModelOutputWithPool 11683584  
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 1538      
Total params: 11,685,122
Trainable params: 11,685,122
Non-trainable params: 0
_________________________________________________________________


In [21]:
opt=tf.keras.optimizers.Adam(learning_rate=3e-4, epsilon=1e-08, clipnorm=1.0)
metric = tf.keras.metrics.MeanSquaredError()
model.compile(optimizer=opt, loss=gaussian_nll)
model_fit = model.fit(inputs_train, test_eval_label, 
                      batch_size=100, epochs=4
                   )

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [18]:
def predict_prob(model, x, batch_size=2048):
    """Make predictions given model and 2d data
    """

    ypred = model.predict(x, batch_size=batch_size, verbose=1)
    n_outs = int(ypred.shape[1] / 2)
    mean = ypred[:, 0:n_outs]
    sigma = np.exp(ypred[:, n_outs:])

    return mean, sigma

In [74]:
print(np.var(test_eval_label))

0.33027509093914764


In [23]:
predict_prob(model,inputs_train[:9])



(array([[1.1050736],
        [1.1050733],
        [1.1050735],
        [1.1050735],
        [1.1050736],
        [1.1050733],
        [1.1050733],
        [1.1050735],
        [1.1050733]], dtype=float32), array([[0.60179824],
        [0.60179836],
        [0.60179824],
        [0.6017982 ],
        [0.6017983 ],
        [0.60179806],
        [0.6017982 ],
        [0.60179824],
        [0.601798  ]], dtype=float32))

In [87]:
print(inputs_train[10])

[    2     2    21    78   949   898  2062  5077  7192  5310    26   469
    13 11881    16  4778 13458   140   132   436  1894   119  1158 10527
     3     3     0     0     0     0     0     0     0     0     0     0]


In [97]:
model.predict(inputs_train[:6])



[array([[-6.428763 ],
        [-6.4259925],
        [-6.4203596],
        [-6.4225764],
        [-6.4274507],
        [-6.4285426]], dtype=float32), array([[0.12186965],
        [0.12236995],
        [0.12289025],
        [0.12279273],
        [0.12233145],
        [0.12194616]], dtype=float32)]