In [26]:
import numpy as np
import pandas as pd
import keras
import keras_nlp
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('DATASET.csv')
data.head()

Unnamed: 0,Review,label
0,"Great music service, the audio is high quality...",POSITIVE
1,Please ignore previous negative rating. This a...,POSITIVE
2,"This pop-up ""Get the best Spotify experience o...",NEGATIVE
3,Really buggy and terrible to use as of recently,NEGATIVE
4,Dear Spotify why do I get songs that I didn't ...,NEGATIVE


In [3]:
data.isna().sum(), len(data)

(Review    16
 label      0
 dtype: int64,
 52702)

In [4]:
data = data.dropna()
len(data)

52686

In [5]:
data['label'] = data['label'].map({'NEGATIVE': 0, 'POSITIVE': 1})
data.head()

Unnamed: 0,Review,label
0,"Great music service, the audio is high quality...",1
1,Please ignore previous negative rating. This a...,1
2,"This pop-up ""Get the best Spotify experience o...",0
3,Really buggy and terrible to use as of recently,0
4,Dear Spotify why do I get songs that I didn't ...,0


In [6]:
X = data['Review']
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25)

In [7]:
preprocessor = keras_nlp.models.DistilBertPreprocessor.from_preset(
    "distil_bert_base_en_uncased",
    sequence_length = 128
)

classifier = keras_nlp.models.DistilBertClassifier.from_preset(
    "distil_bert_base_en_uncased",
    preprocessor = preprocessor, 
    num_classes=1
)
classifier.summary()

In [8]:
classifier.compile(
    loss=keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(1e-5),
    metrics= ["accuracy"]  
)

# Fit
history = classifier.fit(x=X_train,
                         y=y_train,
                         batch_size=256,
                         epochs=2, 
                         validation_data=(X_val, y_val)
                        )

Epoch 1/2
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m421s[0m 3s/step - accuracy: 0.6777 - loss: 0.5230 - val_accuracy: 0.9267 - val_loss: 0.1933
Epoch 2/2
[1m124/124[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m351s[0m 3s/step - accuracy: 0.9259 - loss: 0.1971 - val_accuracy: 0.9390 - val_loss: 0.1607


In [25]:
y_pred = classifier.predict(X_test)
y_test = list(y_test)
pred = [(1 if p>=0 else 0) for p in y_pred]
matching = [(1 if pred[i]==y_test[i] else 0) for i in range(len(y_test))]
test_accuracy = np.sum(matching)/len(matching)
print('Accuracy: ' + f'{test_accuracy*100:.2f}%')

[1m330/330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 127ms/step
Accuracy: 94.01%
