# AX or Diagnostic Task

## Preparing the Environment

In [None]:
#Google Colab - Drive Mounting
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

In [None]:
!pip install -q keras-nlp

In [None]:
#Import the libraries
import tensorflow as tf
import keras_nlp
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import TextVectorization
import numpy as np
import os
import re
import string
import random
import pandas as pd

## Load Test data and MNLI model

In [None]:
# Restore MNLI model
# Model can predict on raw text
restored_model = keras.models.load_model("path_to_stored_MNLI_model", compile=False)

The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.


In [None]:
# Import Diagnostic Data
current_path = "path_to_glue"
test_path = os.path.join(current_path, 'AX.tsv')


def generate_test(path):
    '''
    Given a file path, read the file and preprocess the data
    Return a tensor of the test sentences
    '''
    tokens, classes = [], []
    df = pd.read_csv(path, sep='\t', skiprows = 1,error_bad_lines=False, encoding = "ISO-8859-1")
    for _, row in df.iterrows():
        text_a, text_b = row['sentence1'], row['sentence2']
        if not isinstance(text_a, str) or not isinstance(text_b, str):
            continue
        text = (text_a)+ '[SEP]' + (text_b) + '[SEP]'
        tokens.append(text)
    tokens = np.array(tokens)
    return tf.constant(tokens)

#Load test data
ax_test = generate_test(test_path)

## Generate and format predictions

In [None]:
# Generate predictions
pred = restored_model.predict_generator(ax_test, verbose=True).argmax(axis=-1)

In [None]:
# Class labels
REV_CLASSES = {
    0: 'neutral',
    1: 'entailment',
    2: 'contradiction',
}

# Adapt prediction to labels
results = []
for p in pred:
  results.append(REV_CLASSES[p])

In [None]:
#Format results into dataframe, ready to be uploaded on gluebenchmark.com
df = pd.DataFrame(results)

df.to_csv("AX.tsv",sep='\t', encoding='utf-8')

In [None]:
#Code to automatically stop the run time for Google Colab
import time
time.sleep(60)
from google.colab import runtime
runtime.unassign()