In [None]:
import pandas as pd
import numpy as np
import nltk
import string
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [None]:
# Download NLTK resources
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
# Load datasets
train_df = pd.read_csv('/content/twitter_training.csv', header=None, names=['id', 'entity', 'category', 'clean_text'])
val_df = pd.read_csv('/content/twitter_validation.csv', header=None, names=['id', 'entity', 'category', 'clean_text'])


In [None]:
# Combine and filter
df = pd.concat([train_df, val_df])
df = df[['clean_text', 'category']].dropna()
df = df[df['category'].isin(['Positive', 'Neutral', 'Negative'])]

In [None]:
# Label encoding
label_map = {'Negative': 0, 'Neutral': 1, 'Positive': 2}
df['label'] = df['category'].map(label_map)

In [None]:
# Text Preprocessing
import nltk
nltk.download('punkt')
stop_words = set(stopwords.words('english')) - {"not", "no", "nor"}
def preprocess(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    words = word_tokenize(text)
    filtered_words = [word for word in words if word not in stop_words]
    return ' '.join(filtered_words)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
df['clean_text'] = df['clean_text'].astype(str).apply(preprocess)

In [None]:
# Tokenization and padding
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(df['clean_text'])
sequences = tokenizer.texts_to_sequences(df['clean_text'])
X = pad_sequences(sequences, maxlen=60, padding='post', truncating='post')
y = to_categorical(df['label'], num_classes=3)

In [None]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Build RNN model
model = Sequential([
    Embedding(input_dim=5000, output_dim=64, input_length=60),
    SimpleRNN(64),
    Dropout(0.5),
    Dense(3, activation='softmax')
])



In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [None]:
# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=10, validation_data=(X_test, y_test))


Epoch 1/50
[1m4956/4956[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 18ms/step - accuracy: 0.3606 - loss: 1.1007 - val_accuracy: 0.3644 - val_loss: 1.0955
Epoch 2/50
[1m4956/4956[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 19ms/step - accuracy: 0.3807 - loss: 1.1061 - val_accuracy: 0.3660 - val_loss: 1.0992
Epoch 3/50
[1m4956/4956[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 19ms/step - accuracy: 0.3519 - loss: 1.1043 - val_accuracy: 0.3646 - val_loss: 1.0973
Epoch 4/50
[1m4956/4956[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 18ms/step - accuracy: 0.3626 - loss: 1.0992 - val_accuracy: 0.3646 - val_loss: 1.1116
Epoch 5/50
[1m4956/4956[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 19ms/step - accuracy: 0.3464 - loss: 1.1028 - val_accuracy: 0.3102 - val_loss: 1.1116
Epoch 6/50
[1m4956/4956[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 19ms/step - accuracy: 0.3551 - loss: 1.1030 - val_accuracy: 0.3763 - val_loss: 1.0934
Epo

<keras.src.callbacks.history.History at 0x782b45fd9890>

In [32]:
# Prediction function
def predict_sentiment(text):
    text = preprocess(text)
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=60, padding='post')
    pred = model.predict(padded)
    label = np.argmax(pred)
    return {0: 'Negative', 1: 'Neutral', 2: 'Positive'}[label]

In [33]:
# Interactive input
while True:
    user_input = input("Enter a tweet (or type 'exit'): ")
    if user_input.lower() == 'exit':
        break
    sentiment = predict_sentiment(user_input)
    print(f"Predicted Sentiment: {sentiment}")

Enter a tweet (or type 'exit'): bad movie
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 387ms/step
Predicted Sentiment: Negative
Enter a tweet (or type 'exit'): spider man movie was mind blowing as always
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
Predicted Sentiment: Neutral
Enter a tweet (or type 'exit'): conjuring movie was amazing
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
Predicted Sentiment: Negative
Enter a tweet (or type 'exit'): exit


In [34]:
!pip install streamlit pyngrok --quiet


In [35]:
import pickle

# Save model
model.save('sentiment_rnn_model.h5')

# Save tokenizer
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)




In [36]:
with open('label_map.pickle', 'wb') as handle:
    pickle.dump(label_map, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [37]:
app_code = """
import streamlit as st
import tensorflow as tf
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load trained model and tokenizer
model = tf.keras.models.load_model('sentiment_rnn_model.h5')
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

# Try loading label encoder
try:
    with open('label_encoder.pickle', 'rb') as handle:
        label_encoder = pickle.load(handle)
    use_label_encoder = True
except:
    use_label_encoder = False

# Set max sequence length (same as training)
MAX_LEN = 200

st.title("Sentiment Analysis with RNN")
st.subheader("Enter text below to get sentiment prediction:")

user_input = st.text_area("Your Input:")

if st.button("Predict Sentiment"):
    if user_input.strip() == "":
        st.warning("Please enter some text.")
    else:
        sequence = tokenizer.texts_to_sequences([user_input])
        padded = pad_sequences(sequence, maxlen=MAX_LEN)
        prediction = model.predict(padded)[0]
        sentiment_class = prediction.argmax()

        if use_label_encoder:
            sentiment = label_encoder.inverse_transform([sentiment_class])[0]
        else:
            sentiment = ["Negative", "Neutral", "Positive"][sentiment_class]

        st.success(f"Predicted Sentiment: {sentiment}")
"""

with open("app.py", "w") as f:
    f.write(app_code)


In [40]:
!streamlit run app.py &>/dev/null &
from pyngrok import ngrok

ngrok.set_auth_token("30d7b5xfBENTlBOsOoBtNpORBGs_4siWMU9NYyjDTvXBBBiPf")


# Disconnect all existing tunnels
for tunnel in ngrok.get_tunnels():
    ngrok.disconnect(tunnel.public_url)
    print(f"Disconnected tunnel: {tunnel.public_url}")

# Connect to a new tunnel
public_url = ngrok.connect(8501)
print(f"New tunnel created: {public_url}")

New tunnel created: NgrokTunnel: "https://5cc4a97e7469.ngrok-free.app" -> "http://localhost:8501"
