In [75]:
import numpy as np 
import pandas as pd 
import tensorflow as tf

In [81]:

df = pd.read_csv("/kaggle/input/spotify-app-reviews-2022/reviews.csv")


df["label"] = df["Rating"]
df["text"] = df["Review"]

df = df.drop(["Reply", "Rating", "Total_thumbsup", "Time_submitted", "Review"], axis=1)

df.info()
df.describe()

In [57]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

ds = tf.data.Dataset.from_tensor_slices(dict(df))

def normalize(input):
    print(input, "\n\n\n")
    return input["text"], input["label"] / 5

ds = ds.map(normalize)
ds = ds.shuffle(BUFFER_SIZE).prefetch(tf.data.AUTOTUNE)

for example, label in ds.take(1):
    print(example, "\n\n\n", label)

In [52]:
train_ds = ds.take(50000).batch(BATCH_SIZE)
remaining = ds.skip(50000)
val_ds = remaining.take(6000).batch(BATCH_SIZE)
test_ds = remaining.skip(6000).batch(BATCH_SIZE)

In [53]:
max_features = 10000

encoder = tf.keras.layers.TextVectorization(max_tokens=max_features)

def text(input, out):
    return input

text_ds = ds.map(text)

encoder.adapt(text_ds)

vocab = np.array(encoder.get_vocabulary())
vocab_size = len(vocab)
print(vocab_size)
vocab[:20]

In [54]:
class SpotifyReviewModel(tf.keras.Model):
    def __init__(self):
        super().__init__(self)
        
        self.encoder = encoder
        self.embedding = tf.keras.layers.Embedding(input_dim=len(encoder.get_vocabulary()), output_dim=64, mask_zero=True)
        self.bidirectional = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64))
        self.dense1 = tf.keras.layers.Dense(64, activation='relu')
        self.dense2 = tf.keras.layers.Dense(1)
        
    def call(self, inputs):
        x = inputs
        x = self.encoder(x)
        x = self.embedding(x)
        x = self.bidirectional(x)
        x = self.dense1(x)
        x = self.dense2(x)
        return x
    
model = SpotifyReviewModel()

loss = tf.losses.MeanAbsoluteError()

model.compile(loss=loss, optimizer="adam", metrics=["accuracy"])

for x, y in train_ds.take(1):
    print(x[0])
    print(y.numpy()[0], end="\n\n")
    predictions = model([x[0]])
    print(predictions)

In [72]:
EPOCHS = 5

history = model.fit(train_ds, epochs=EPOCHS, validation_data=val_ds)

In [73]:
test_loss, test_acc = model.evaluate(test_ds)

print(test_loss, test_acc)

In [80]:
bad = "Really buggy and terrible to use as of recently"
good = "b'Really good experience. Amazing app and I highly reccomend!"
okay = "It was okay there were definetly some problems but overall a decent app. However the not being able to download kind of sucks"

predictions = model.predict(np.array([bad, okay, good]))

print(np.round(predictions * 5))