In [1]:
import pandas as pd
import keras
from keras import layers
import numpy as np
import random
import io

In [2]:
data = pd.read_csv('trustpilot_en_50_page_corrected.csv')
data

Unnamed: 0.1,Unnamed: 0,Brand,Headline,Review_not_corrected,Grade,Review
0,0,Just Eat,owe me money and won’t pay,Owed money for a meal half empty and cold sinc...,1,Owed money for a meal half empty and cold sinc...
1,1,Just Eat,P1ss poor. Won’t attempt to use again,"Having ordered last night, I was given a deliv...",1,"Having ordered last night, I was given a deliv..."
2,2,Just Eat,Compensation thieves!,"Be careful of these money stealing, no deliver...",1,"He careful of these money stealing, no deliver..."
3,3,Just Eat,The WORST customer service,The WORST customer service. Never received my ...,1,The WORST customer service. Never received my ...
4,4,Just Eat,Awful customer service,Awful customer service. For past few months I'...,1,Awful customer service. For past few months I'...
...,...,...,...,...,...,...
6062,6062,Grubhub,Grubhub was great,Grubhub was great! They delivered early and t...,5,Grubhub was great! They delivered early and t...
6063,6063,Grubhub,Grubhub is the best in Manhattan.,Grubhub is the best delivery service in Manhat...,5,Grubhub is the best delivery service in Manhat...
6064,6064,Grubhub,What the fu*#!!!,What the fu*#!!!Crap site. Going to bed hungry...,1,That the ff*#!!!Trap site. Going to bed hungry...
6065,6065,Grubhub,Grub hub Driver ate my order.Haven't used Grub...,Haven't used Grub hub for sometime. They sent ...,1,Haven't used Rub hut for sometime. They sent m...


In [3]:
text = ' '.join(data['Review'].sample(n=500, random_state=50))

In [4]:
chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

x = np.zeros((len(sentences), maxlen, len(chars)), dtype="bool")
y = np.zeros((len(sentences), len(chars)), dtype="bool")
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Total chars: 102
Number of sequences: 42363


In [5]:
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(chars))),
        layers.LSTM(128),
        layers.Dense(len(chars), activation="softmax"),
    ]
)
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)


In [6]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [7]:
epochs = 40
batch_size = 128

for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print("...Diversity:", diversity)

        generated = ""
        sentence = text[start_index : start_index + maxlen]
        print('...Generating with seed: "' + sentence + '"')

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("...Generated: ", generated)
        print("-")



Generating text after epoch: 0
...Diversity: 0.2
...Generating with seed: " who can get there on time you should fi"
...Generated:  nd and and the realle and the profred mere and and compering of merichate and the procher and the provering the prochen and of the forder and and welling and and and of dod the profred and mererered and of wastererereding and the proverine and waster waster and the frechenting and for the ford and mere the recanting waster and and the recand and the sorvered and and the procher and were and and wa
-
...Diversity: 0.5
...Generating with seed: " who can get there on time you should fi"
...Generated:  ff of dichen welling menle of the foud and they the repllinery it ot to fold ther tos and mestome and washer is wash doutseriverastill ofricasteringastat and wasterereint werecion hasteredichor mericere of the mericher and wantely the fouster allicher willlice to gand therenowerithe to mereredes arling ushare,. To persanger and ware the lithint andithe a dronke

In [8]:
model.save('location.keras')