In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
%%time
import pandas as pd, tqdm, re
from tensorflow import keras

In [None]:
data_train = pd.read_csv('/kaggle/input/word2vec-nlp-tutorial/labeledTrainData.tsv.zip', compression='zip', sep='\t')
data_test = pd.read_csv('/kaggle/input/word2vec-nlp-tutorial/testData.tsv.zip', compression='zip', sep='\t')
data_train.head(2)

In [None]:
data_test.head(2)

In [None]:
X_train, y_train = data_train.iloc[:,2], data_train.iloc[:,1]
X_test = data_test.iloc[:,1]

In [None]:
tokenizer = keras.preprocessing.text.Tokenizer()

tokenizer.fit_on_texts(X_train)

X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)
print('done')

In [None]:
max_len = max([len(word) for word in X_train + X_test])
print(max_len)

In [None]:
X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=300)
X_test = keras.preprocessing.sequence.pad_sequences(X_test, maxlen=300)
print('done')

In [None]:
vocabulary = tokenizer.word_index
print(len(vocabulary))

In [None]:
net = keras.Sequential([
    keras.layers.Embedding(input_dim=len(vocabulary)+1, output_dim=128, input_length=300),
    keras.layers.LSTM(128, dropout=0.1, return_sequences=True),
    keras.layers.LSTM(64, dropout=0.1, return_sequences=True),
    keras.layers.LSTM(32, dropout=0.1),
    keras.layers.Dense(1, activation='sigmoid')
]) 
net.summary()

In [None]:
net.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[keras.metrics.BinaryAccuracy()]
)
print('done')

In [None]:
net.fit(X_train, y_train, epochs=20)

In [None]:
y_pred = net.predict(X_test)

In [None]:
submission = pd.DataFrame({
    'id': data_test['id'].values, 
    'sentiment': y_pred.flatten()  
})
submission.to_csv('submission.csv', index=False)