In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Dropout, GRU, LSTM, SpatialDropout1D, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import utils
from tensorflow import config as tfconfig
import pandas as pd
import numpy as np
gpus = tfconfig.list_physical_devices('GPU')

if gpus:
    try:
        for gpu in gpus:
            tfconfig.set_logical_device_configuration(gpu, [tfconfig.LogicalDeviceConfiguration(memory_limit=4096)])
    except RuntimeError as e:
        
        print(e)

In [5]:
num_words = 10000
max_text_len = 300

In [6]:
train = pd.read_csv('train.csv', sep='\t')
test = pd.read_csv('test.csv', sep='\t')

In [7]:
texts = train['Text']
y_train = (train['Score'].astype('category')).cat.codes

In [8]:
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(texts)

In [9]:
x_train = tokenizer.texts_to_sequences(texts)
x_train = pad_sequences(x_train, maxlen=max_text_len)

In [13]:
model = Sequential()
model.add(Embedding(num_words, 256, input_length=max_text_len))
model.add(LSTM(64, return_))
model.add(Dense(1, activation='sigmoid'))

In [14]:
model.compile(optimizer='adam', 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

In [15]:
model_save_path = 'best_model.h5'
checkpoint_callback = ModelCheckpoint(model_save_path, 
                                      monitor='val_accuracy',
                                      save_best_only=True,
                                      verbose=1)

history = model.fit(x_train, 
                              y_train, 
                              epochs=15,
                              batch_size=512,
                              validation_split=0.2,
                              callbacks=[checkpoint_callback])

Epoch 1/15
Epoch 1: val_accuracy improved from -inf to 0.58364, saving model to best_model.h5
Epoch 2/15
Epoch 2: val_accuracy improved from 0.58364 to 0.59322, saving model to best_model.h5
Epoch 3/15
Epoch 3: val_accuracy improved from 0.59322 to 0.59373, saving model to best_model.h5
Epoch 4/15
Epoch 4: val_accuracy improved from 0.59373 to 0.59392, saving model to best_model.h5
Epoch 5/15
Epoch 5: val_accuracy improved from 0.59392 to 0.59396, saving model to best_model.h5
Epoch 6/15
Epoch 6: val_accuracy improved from 0.59396 to 0.59397, saving model to best_model.h5
Epoch 7/15
Epoch 7: val_accuracy did not improve from 0.59397
Epoch 8/15
Epoch 8: val_accuracy improved from 0.59397 to 0.59425, saving model to best_model.h5
Epoch 9/15
Epoch 9: val_accuracy did not improve from 0.59425
Epoch 10/15
Epoch 10: val_accuracy did not improve from 0.59425
Epoch 11/15
Epoch 11: val_accuracy did not improve from 0.59425
Epoch 12/15
Epoch 12: val_accuracy did not improve from 0.59425
Epoch 13

In [13]:
tests = test['Text']
x_test = tokenizer.texts_to_sequences(tests)
x_test = pad_sequences(x_test, maxlen=max_text_len)

In [14]:
y_test = model.predict(x_test)



In [15]:
ans = np.rint(y_test)
ans = ans.astype(int)
ans = pd.DataFrame(ans)[0]
ans = ans.astype('category')
ans = ans.cat.rename_categories(['Negative', 'Positive'])

In [16]:
df = pd.concat([test['idx'], ans], axis=1)

In [17]:
df = df.rename(columns={0:'Score'})

In [18]:
df

Unnamed: 0,idx,Score
0,13999,Negative
1,14000,Negative
2,14001,Negative
3,14002,Negative
4,14003,Negative
...,...,...
5995,19994,Positive
5996,19995,Positive
5997,19996,Negative
5998,19997,Positive


In [19]:
df.to_csv('ans.csv', sep = '\t')