In [65]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,SimpleRNN,Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import save_model

In [52]:
train=pd.read_csv(r"Dataset\train\train.csv")
test=pd.read_csv(r"Dataset\test\test.csv")

In [53]:
train.head()

Unnamed: 0,Class Index,Title,Description
0,3,Wall St. Bears Claw Back Into the Black (Reuters),"Reuters - Short-sellers, Wall Street's dwindli..."
1,3,Carlyle Looks Toward Commercial Aerospace (Reu...,Reuters - Private investment firm Carlyle Grou...
2,3,Oil and Economy Cloud Stocks' Outlook (Reuters),Reuters - Soaring crude prices plus worries\ab...
3,3,Iraq Halts Oil Exports from Main Southern Pipe...,Reuters - Authorities have halted oil export\f...
4,3,"Oil prices soar to all-time record, posing new...","AFP - Tearaway world oil prices, toppling reco..."


In [54]:
train['text']=train['Title']+" "+train['Description']
test['text']=test['Title']+" "+test['Description']
train['Class Index']=train['Class Index'].apply(lambda x: x-1)
test['Class Index']=test['Class Index'].apply(lambda x: x-1)
vocab_size=5000
max_length=200 

In [55]:
train['text'][0]

"Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\\band of ultra-cynics, are seeing green again."

In [56]:
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(train['text'])
train_sequences = tokenizer.texts_to_sequences(train['text'])
test_sequences = tokenizer.texts_to_sequences(test['text'])

In [57]:
X_train = pad_sequences(train_sequences, maxlen=max_length, padding='pre')
X_test = pad_sequences(test_sequences, maxlen=max_length, padding='pre')
y_train=train['Class Index']
y_test=test['Class Index']

In [58]:
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (120000, 200)
X_test shape: (7600, 200)
y_train shape: (120000,)
y_test shape: (7600,)


In [59]:
train['text'][0]

"Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\\band of ultra-cynics, are seeing green again."

In [60]:
X_train[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   

In [61]:
model = Sequential([
    Embedding(input_dim=5000, output_dim=64, input_length=200),
    SimpleRNN(64),
    Dense(32, activation='relu'),
    Dense(4, activation='softmax')
])



In [62]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [63]:
from tensorflow.keras.callbacks import EarlyStopping
early_stop=EarlyStopping(monitor="val_loss",patience=7,restore_best_weights=True)

In [64]:
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test),callbacks=[early_stop])

Epoch 1/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m118s[0m 31ms/step - accuracy: 0.6657 - loss: 0.7565 - val_accuracy: 0.8854 - val_loss: 0.3730
Epoch 2/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 33ms/step - accuracy: 0.8860 - loss: 0.3627 - val_accuracy: 0.8934 - val_loss: 0.3491
Epoch 3/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 33ms/step - accuracy: 0.9028 - loss: 0.3108 - val_accuracy: 0.9000 - val_loss: 0.3388
Epoch 4/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 33ms/step - accuracy: 0.8308 - loss: 0.4612 - val_accuracy: 0.7745 - val_loss: 0.5922
Epoch 5/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 33ms/step - accuracy: 0.8102 - loss: 0.5377 - val_accuracy: 0.6087 - val_loss: 0.9648
Epoch 6/20
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m123s[0m 33ms/step - accuracy: 0.7786 - loss: 0.6016 - val_accuracy: 0.7513 - val_loss: 0.677

In [66]:
model.save("model1.h5")



In [69]:
import pickle
pickle.dump(tokenizer,open("tokenizer.pkl","wb"))

In [3]:
import numpy as np
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
max_length=200
model = load_model("Models/rnn.h5")
tokenizer=pickle.load(open("Preprocessing/tokenizer.pkl","rb"))
custom_text = [r"Giddy Phelps Touches Gold for First Time,Michael Phelps won the gold medal in the 400 individual medley and set a world record in a time of 4 minutes 8.26 seconds."]
custom_sequence = tokenizer.texts_to_sequences(custom_text)
custom_padded = pad_sequences(custom_sequence, maxlen=max_length, padding='pre')
custom_padded=np.array(custom_padded)
predictions = model.predict(custom_padded)
predicted_class = np.argmax(predictions, axis=-1)
print(f"The article : {custom_text[0]}")
if predicted_class[0]==0:
    print(f"The article should be labelled as World News with prediction class of {predicted_class[0]}")
if predicted_class[0]==1:
    print(f"The article should be labelled as Sports News with prediction class of {predicted_class[0]}")
if predicted_class[0]==2:
    print(f"The article should be labelled as business News with prediction class of {predicted_class[0]}")
if predicted_class[0]==3:
    print(f"The article should be labelled as Science/Tech News with prediction class of {predicted_class[0]}")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step
The article : Giddy Phelps Touches Gold for First Time,Michael Phelps won the gold medal in the 400 individual medley and set a world record in a time of 4 minutes 8.26 seconds.
The article should be labelled as Sports News with prediction class of 1


In [4]:
predictions

array([[0.01184475, 0.9795155 , 0.00518114, 0.00345866]], dtype=float32)