In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, GRU, SimpleRNN, Embedding
from tensorflow.keras.layers import Dense, Input, GlobalMaxPooling1D

In [None]:
df = pd.read_csv('bbc.csv')
df.head()
df['targets'] = df['labels'].astype('category').cat.codes

In [None]:
K=df['targets'].max()+1

df_train,df_test=train_test_split(df,test_size=0.3)

MAX_VOCAB_SIZE=2000
tokenizer=Tokenizer(num_words=MAX_VOCAB_SIZE)
tokenizer.fit_on_texts(df_train['text'])

sequences_train = tokenizer.texts_to_sequences(df_train['text'])
sequences_test  = tokenizer.texts_to_sequences(df_test['text'])

In [None]:
word2idx = tokenizer.word_index
V = len(word2idx)
print("Unique tokens: ", V)

In [None]:
data_train=pad_sequences(sequences_train)
print("Shape of the data train tensor",data_train.shape)
T=data_train.shape[1]

In [None]:
data_test=pad_sequences(sequences_test, maxlen=T)
print("Shape of the data train tensor",data_test.shape)


In [None]:
D = 20

i = Input(shape = (T,))
x = Embedding(V+1, D) (i)
x = LSTM(32, return_sequences=True)(x)
x = GlobalMaxPooling1D()(x)
x = Dense(K)(x)
model = Model(i, x)


In [None]:
model.compile(
    loss = SparseCategoricalCrossentropy(from_logits=True),
    optimizer="adam",
    metrics = ['accuracy']
)

print("Training Model")

r = model.fit(
    data_train,
    df_train['targets'],
    epochs = 50,
    validation_data=(data_test, df_test['targets'])
)


In [None]:
plt.plot(r.history['loss'], label = 'train loss')
plt.plot(r.history['val_loss'], label = 'val loss')
plt.legend()
