In [21]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import text,sequence
from tensorflow.keras.layers import Bidirectional, LSTM, Dropout, GlobalMaxPooling1D, GRU
from tensorflow.keras.layers import Embedding,Dense,SpatialDropout1D,concatenate,Input,GlobalAveragePooling1D


In [22]:
EMBEDDING_SIZE = 300
VOCAB_SIZE = 100000
MAX_LEN = 150
BATCH_SIZE = 32
EPOCHS = 5

In [27]:
train = pd.read_excel('Data_Train.xlsx')
tok = text.Tokenizer()
tok.fit_on_texts(train['STORY'])

In [26]:
seq = tok.texts_to_sequences(train['STORY'])

In [30]:
vec = sequence.pad_sequences(seq,maxlen=MAX_LEN)

In [57]:
X, y = vec, pd.get_dummies(train['SECTION'])

In [58]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [59]:
X_train.shape

(6102, 150)

In [60]:
inp = Input(shape=(MAX_LEN, ))
x = Embedding(VOCAB_SIZE, EMBEDDING_SIZE)(inp)
x = SpatialDropout1D(0.3)(x)
x = Bidirectional(LSTM(80, return_sequences=True))(x)
avg_pool = GlobalAveragePooling1D()(x)
max_pool = GlobalMaxPooling1D()(x)
conc = concatenate([avg_pool, max_pool])
drop1 = Dropout(0.3)(conc)
# dense1 = Dense(64,activation='relu')(drop1)
# drop2 = Dropout(0.1)(dense1)
outp = Dense(4, activation="sigmoid")(drop1)

model = Model(inputs=inp, outputs=outp)
model.compile(loss='categorical_crossentropy',
            optimizer='adam',
            metrics=['accuracy'])


In [61]:
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 150)]                0         []                            
                                                                                                  
 embedding_7 (Embedding)     (None, 150, 300)             3000000   ['input_4[0][0]']             
                                                          0                                       
                                                                                                  
 spatial_dropout1d_6 (Spati  (None, 150, 300)             0         ['embedding_7[0][0]']         
 alDropout1D)                                                                                     
                                                                                            

In [62]:
model.fit(X_train, y_train, batch_size=32)



<keras.src.callbacks.History at 0x2fa0c4a90>

In [67]:
pred = model.predict(X_test)

out = np.argmax(pred, axis=1)



In [68]:
truth = np.argmax(y_test, axis=1)

In [69]:
accuracy_score(out, truth)

0.9003931847968545