In [78]:
import matplotlib.pyplot as plt
from keras_nlp.models import DebertaV3Preprocessor
from keras_nlp.models import DebertaV3Classifier
import keras
import tensorflow as tf

import numpy as np
import pandas as pd

preprocessor = DebertaV3Preprocessor.from_preset(
    preset="deberta_v3_extra_small_en",
    sequence_length=512,
)


def input_process(input, label = None):
    if label is None:
        return preprocessor(input)
    return (preprocessor(input), label)


def create_DebertaV3_model(model_preset):
    debertaV3 = DebertaV3Classifier.from_preset(model_preset, preprocessor=None, num_classes=6)

    inputs = debertaV3.input
    outputs = debertaV3(inputs)

    prob_outputs = keras.layers.Activation("sigmoid")(outputs)
    model = keras.Model(inputs, prob_outputs)

    return model


def label_process(y):
    n = len(y)
    z = np.zeros((n, 6), "float32")
    for i in range(n):
        s = y[i]
        z[i, :s] = 1
    return z

### Chấm điểm cho dữ liệu test

In [79]:
test_df = pd.read_csv("test.csv")
test_X = test_df["full_text"].tolist()
test_ds = tf.data.Dataset.from_tensor_slices(test_X).map(input_process).batch(len(test_X))

model_test = create_DebertaV3_model("deberta_v3_extra_small_en")
model_test.load_weights("model.weights.h5")

predicts = model_test.predict(test_ds, verbose=1)

for i in range(len(test_df)):
   predict = predicts[i]
   print(test_df.iloc[i]['essay_id'],": ",np.sum(predict >= 0.5))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
000d118 :  3
000fe60 :  3
001ab80 :  4


### Một vài ví dụ cho dữ liệu train

In [80]:
df_sample = pd.read_csv("train.csv").sample(n=5)
sample_X = df_sample["full_text"].tolist()
sample_y = label_process(df_sample["score"].tolist())
sample_ds = tf.data.Dataset.from_tensor_slices((sample_X,sample_y)).map(input_process).batch(5)
predicts = model_test.predict(sample_ds, verbose=0)

for i in range(len(predicts)):
    predict = predicts[i]
    print("\033[1;32mTruth Score: \033[0m", df_sample.iloc[i]["score"])
    print("\033[1;36mPrediction: \033[0m", np.sum(predict >= 0.5))
    print("\033[1;33mEssay: \033[0m", df_sample.iloc[i]["full_text"])
    print('\n')
    if i != len(predicts)-1:
        print(100 * "-","\n")

[1;32mTruth Score: [0m 3
[1;36mPrediction: [0m 2
[1;33mEssay: [0m No, i dont think we should have driveless cars that is a hazoired sitiouions.

Because in that type of thing the driver is not in controle of what so ever the driver has no purpose to be there at all and it can be a very dangerous on there part because like instead of the person controling the car that is his life in his own hands

But say the car is in controle he/she will be putting there life in a computers chip they have to be 100% sure with that in order for it to be ok but i bet 50% of the people won't even care about there own life why you say.

because they will be to busy enjoying there new drivless car what if perhapes the car has a milefuntion on the passenger and it hits a person say kills that person what would you blame that on the person or the car the peron had nothing to do with it why because he/she was to depended on their new drive less car.

You also have to think about the legal lieablitiy wit