In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
import nltk
import re

# Ensure necessary NLTK resources are downloaded
nltk.download('stopwords')
nltk.download('wordnet')

# Preprocessing function
def preprocess_text(text):
    lemma = nltk.WordNetLemmatizer()
    text = re.sub('[^a-zA-Z]', ' ', text).lower()
    text = ' '.join([lemma.lemmatize(word) for word in text.split() if word not in nltk.corpus.stopwords.words('english')])
    return text

# Load dataset
df = pd.read_csv('trainDisaster.csv')

# Fill missing values
df['text'] = df['text'].apply(preprocess_text)
df['keyword'] = df['keyword'].fillna('unknown')
df['location'] = df['location'].fillna('unknown')

# Check target distribution
print(df['target'].value_counts())

# Convert target to integers
df['target'] = df['target'].astype(int)

# Prepare tokenizer and word sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['text'])
word_index = tokenizer.word_index

# Prepare input data (X) and target data (y)
X = tokenizer.texts_to_sequences(df['text'])
y = df['target']

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weights = dict(enumerate(class_weights))
print("Class weights:", class_weights)

# Pad sequences
max_len = 30
X = pad_sequences(X, maxlen=max_len)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=len(word_index)+1, output_dim=100, input_length=max_len))
model.add(SimpleRNN(145, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
earlystopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2,  callbacks=[earlystopping])

# Save the trained model
model.save('disaster_rnn_model.h5')

# Function to predict new news
def preprocess_input(news):
    words = news.lower().split()
    encoded_review = [word_index.get(word, 2) for word in words]  # Using '2' for out-of-vocabulary words
    padded_review = pad_sequences([encoded_review], maxlen=max_len)
    return padded_review

def predict_news(news):
    preprocessed_text = preprocess_input(news)
    prediction = model.predict(preprocessed_text)
    sentiment = 'DisasterRelated' if prediction[0][0] > 0.5 else 'Not Related'
    return sentiment, prediction[0][0]

# Example usage
news = "earthquake shakes the city, people are trapped"
result, score = predict_news(news)
print(f'Result: {result}')
print(f'Score: {score}')


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\negia\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\negia\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


0    4342
1    3271
Name: target, dtype: int64
Class weights: {0: 0.8766697374481806, 1: 1.1637114032405993}
Epoch 1/20




[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 22ms/step - accuracy: 0.6064 - loss: 0.7790 - val_accuracy: 0.7677 - val_loss: 0.5151
Epoch 2/20
[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 0.8625 - loss: 0.3558 - val_accuracy: 0.7841 - val_loss: 0.4913
Epoch 3/20
[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step - accuracy: 0.9393 - loss: 0.1581 - val_accuracy: 0.7488 - val_loss: 0.6530
Epoch 4/20
[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 0.9758 - loss: 0.0742 - val_accuracy: 0.7586 - val_loss: 0.6411
Epoch 5/20
[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step - accuracy: 0.9910 - loss: 0.0338 - val_accuracy: 0.7430 - val_loss: 0.7602
Epoch 6/20
[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - accuracy: 0.9929 - loss: 0.0252 - val_accuracy: 0.7258 - val_loss: 0.8056
Epoch 7/20
[1m153/153[0m [32m━



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 182ms/step
Result: DisasterRelated
Score: 0.7737054824829102


In [15]:
news = "In 2022, flooding and landslides in the northeastern state of Assam killed at least 192 people|"
result, score = predict_news(news)
print(f'Result: {result}')
print(f'Score: {score}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Result: DisasterRelated
Score: 0.9846987128257751


In [18]:
!pip install redis

Collecting redis
  Downloading redis-5.2.0-py3-none-any.whl.metadata (9.1 kB)
Downloading redis-5.2.0-py3-none-any.whl (261 kB)
   ---------------------------------------- 0.0/261.4 kB ? eta -:--:--
   - -------------------------------------- 10.2/261.4 kB ? eta -:--:--
   ---- ---------------------------------- 30.7/261.4 kB 445.2 kB/s eta 0:00:01
   ------------------ --------------------- 122.9/261.4 kB 1.2 MB/s eta 0:00:01
   ---------------------------------------  256.0/261.4 kB 2.0 MB/s eta 0:00:01
   ---------------------------------------  256.0/261.4 kB 2.0 MB/s eta 0:00:01
   ---------------------------------------- 261.4/261.4 kB 1.3 MB/s eta 0:00:00
Installing collected packages: redis
Successfully installed redis-5.2.0
