In [None]:
# Import necessary libraries
import pandas as pd
import pickle
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive

In [None]:
# Mount Google Drive
drive.mount('/content/drive')  

In [None]:
# Load the pre-trained LSTM model from Google Drive
with open('/content/drive/MyDrive/ML_Project/lstm.pkl', 'rb') as f:
    model = pickle.load(f)

In [None]:
# Load the tokenized tweets dataset
path = "/content/drive/MyDrive/ML_Project/tweets.csv"
df = pd.read_csv(path)
df = df.dropna(subset=['text'])  # Drop rows with missing 'text'

In [None]:
# Create a tokenizer and fit it on the tweet texts
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['text'])  # Learn word indices from the dataset
total_words = len(tokenizer.word_index) + 1  # Total vocabulary size

In [None]:
# Convert the text data to sequences and pad them
sequences = tokenizer.texts_to_sequences(df['text'])
padded_sequences = pad_sequences(sequences)  # Pad sequences to uniform length

In [None]:
# Load the scraped dataset for classification
path = "/content/drive/MyDrive/ML_Project/fb_scraped.csv"
scraped_df = pd.read_csv(path)

In [None]:
# Display the scraped data (optional for debugging)
print(scraped_df)

In [None]:
# Test the model on a single custom input
new_tweet = "You are Amee Storm, a pink Simic Barbarian who's from the royal court and wields a blood stained tooth"
sequences = tokenizer.texts_to_sequences([new_tweet])  # Tokenize the new tweet
padded_sequences = pad_sequences(sequences, maxlen=padded_sequences.shape[1])  # Pad to training input length
prediction = model.predict(padded_sequences)

# Use a threshold to classify the tweet
threshold = 0.5
if prediction[0, 0] >= threshold:
    print(f'Tweet: "{new_tweet}" is a disaster.')
else:
    print(f'Tweet: "{new_tweet}" is not a disaster.')

In [None]:
# Iterate over the scraped dataset and classify each post
for texts in scraped_df['Text']:
    new_tweet = texts
    sequences = tokenizer.texts_to_sequences([new_tweet])  # Tokenize each text
    padded_sequences = pad_sequences(sequences, maxlen=padded_sequences.shape[1])  # Pad to uniform length
    prediction = model.predict(padded_sequences)

    # Classify based on the threshold
    if prediction[0, 0] >= threshold:
        print(f'Post: "{new_tweet[:35]}" is a disaster.')
    else:
        print(f'Post: "{new_tweet[:35]}" is not a disaster.')