In [3]:
import matplotlib.pyplot as plt
from keras_nlp.models import DebertaV3Preprocessor
from keras_nlp.models import DebertaV3Classifier
import keras
import tensorflow as tf

import numpy as np
import pandas as pd

preprocessor = DebertaV3Preprocessor.from_preset(
    preset="deberta_v3_extra_small_en",
    sequence_length=512,
)


def input_process(input, label):
    return (preprocessor(input), label)


def create_DebertaV3_model(model_preset, manual_preprocess=False):
    if not manual_preprocess:
        debertaV3 = DebertaV3Classifier.from_preset(model_preset, num_classes=6)
    else:
        debertaV3 = DebertaV3Classifier.from_preset(
            model_preset, preprocessor=None, num_classes=6
        )
    inputs = debertaV3.input
    outputs = debertaV3(inputs)

    prob_outputs = keras.layers.Activation("sigmoid")(outputs)
    model = keras.Model(inputs, prob_outputs)

    return model


def label_process(y):
    n = len(y)
    z = np.zeros((n, 6), "float32")
    for i in range(n):
        s = y[i]
        z[i, :s] = 1
    return z

### Chấm điểm cho dữ liệu test

In [None]:
test_df = pd.read_csv("test.csv")
test_X = test_df["full_text"].tolist()
test_y = label_process(test_df["score"].tolist())
test_ds = tf.data.Dataset.from_tensor_slices((test_X,test_y)).map(input_process)

model_test = create_DebertaV3_model("deberta_v3_extra_small_en")
model_test.load_weights("model.h5")

predicts = model_test.predict(test_ds, verbose=0)

for i in range(len(test_df)):
   predict = predicts[i]
   print(test_df.iloc[i]['essay_id'],": ",np.sum(predict >= 0.5))

### Một vài ví dụ cho dữ liệu train

In [None]:
df_sample = pd.read_csv("train.csv").sample(n=10)
sample_X = df_sample["full_text"].tolist()
sample_y = label_process(df_sample["score"].tolist())
sample_ds = tf.data.Dataset.from_tensor_slices((sample_X,sample_y)).map(input_process)
predicts = model_test.predict(sample_ds, verbose=0)

for i in range(len(predicts)):
    predict = predicts[i]
    print("Essay: ", df_sample.iloc[i]["full_text"])
    print("Prediction: ", np.sum(predict >= 0.5))
    print("Truth Score: ", df_sample.iloc[i]["score"])