In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import numpy as np

In [None]:
train = pd.read_csv(
    '../data/preprocessed_data.csv', 
    delimiter=',', 
    usecols=['comment', 'isSarcastic'])


In [None]:
x_train, x_BMA_val, y_train, y_BMA_val = train_test_split(
    train['comment'].astype(str), 
    train['isSarcastic'].astype(int), 
    test_size=0.33, 
    random_state=42, 
    shuffle=True, 
    stratify=train['isSarcastic'].astype(int))

In [None]:
# It is possible to specify different parameters, e.g. stopwords, lowercase
from sklearn.feature_extraction.text import CountVectorizer

count_vect = CountVectorizer(stop_words=None, lowercase=True, min_df=10)
count_vect = count_vect.fit(x_train)
bow = count_vect.transform(x_train)

In [None]:
def convert_sparse_matrix_to_sparse_tensor(X):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.sparse.reorder(tf.SparseTensor(indices, coo.data, coo.shape))

In [None]:
bow_tensor = convert_sparse_matrix_to_sparse_tensor(bow)

In [None]:
with tf.device('/cpu:0'):
    train = tf.data.Dataset.from_tensor_slices(
        (bow_tensor, y_train.values)
        )

    # val = tf.data.Dataset.from_tensor_slices(
    #     (x_val['comment'].values, y_val.values)
    #     )

    AUTOTUNE = tf.data.AUTOTUNE
    batch_size = 64
    seed = 996

    train = train.shuffle(bow.shape[0], seed = seed).repeat().batch(batch_size)
    train = train.cache().prefetch(buffer_size=AUTOTUNE)

    # val = val.repeat().batch(batch_size)
    # val = val.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
import tensorflow.keras as keras

input = keras.Input(shape=bow.shape[1], name='input')
x = keras.layers.Dense(500, activation='relu')(input)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(100, activation='relu')(x)
x = keras.layers.Dropout(0.5)(x)
x = keras.layers.Dense(10, activation='relu')(x)
x = keras.layers.Dropout(0.5)(x)
output = keras.layers.Dense(1, activation='sigmoid', name='output')(x)

model = keras.Model(inputs=input, outputs=output)

In [None]:
model.summary()

In [None]:
model.compile(
    loss=keras.losses.BinaryCrossentropy(from_logits=False),
    optimizer=keras.optimizers.Adam(),
    metrics=[keras.metrics.BinaryAccuracy(), keras.metrics.Precision(), keras.metrics.Recall()]
    )

In [None]:
steps_per_epoch = bow.shape[0] // batch_size

In [None]:
history = model.fit(train, steps_per_epoch= steps_per_epoch, epochs=10)