In [5]:
pip install pandas numpy nltk tensorflow keras scikit-learn


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import pandas as pd
import numpy as np
import re
import nltk
import tensorflow as tf
from nltk.corpus import stopwords
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Download stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Load Airline Sentiment dataset
def load_data(filepath):
    df = pd.read_csv(filepath)
    
    # Keep only sentiment and text columns
    df = df[['airline_sentiment', 'text']]
    
    return df

# Preprocess text (remove special characters, stopwords, etc.)
def preprocess_text(text):
    text = text.lower()  # Lowercase text
    text = re.sub(r'http\S+|www\S+', '', text)  # Remove URLs
    text = re.sub(r'@\w+', '', text)  # Remove mentions
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove non-alphabetic characters
    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

# Load and preprocess data
data_path = "Tweets.csv"  # Change this to your dataset path
df = load_data(data_path)

# Apply text preprocessing
df['text'] = df['text'].apply(preprocess_text)

# Encode labels (convert 'positive', 'negative', 'neutral' to numbers)
label_encoder = LabelEncoder()
df['target'] = label_encoder.fit_transform(df['airline_sentiment'])

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['target'], test_size=0.2, random_state=42)

# Tokenization parameters
max_words = 5000  # Max number of words in tokenizer vocabulary
max_len = 50  # Max sequence length

# Tokenize and pad sequences
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_seq = pad_sequences(tokenizer.texts_to_sequences(X_train), maxlen=max_len, padding='post')
X_test_seq = pad_sequences(tokenizer.texts_to_sequences(X_test), maxlen=max_len, padding='post')

# Build the RNN (LSTM) model
model = Sequential([
    Embedding(max_words, 128, input_length=max_len),
    SpatialDropout1D(0.2),
    LSTM(100, dropout=0.2, recurrent_dropout=0.2),
    Dense(3, activation='softmax')  # 3 output classes (positive, neutral, negative)
])

# Compile model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train model
model.fit(X_train_seq, y_train, epochs=5, batch_size=64, validation_data=(X_test_seq, y_test))

# Function to predict sentiment for new input text
def predict_sentiment(text):
    text = preprocess_text(text)
    text_seq = pad_sequences(tokenizer.texts_to_sequences([text]), maxlen=max_len, padding='post')
    prediction = model.predict(text_seq)
    sentiment_label = label_encoder.inverse_transform([np.argmax(prediction)])[0]
    return sentiment_label

# Test with user input
while True:
    user_text = input("\nEnter a tweet (or 'exit' to quit): ")
    if user_text.lower() == 'exit':
        break
    sentiment = predict_sentiment(user_text)
    print(f"Predicted Sentiment: {sentiment}")


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


Epoch 1/5




[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 35ms/step - accuracy: 0.6169 - loss: 0.9396 - val_accuracy: 0.6452 - val_loss: 0.8953
Epoch 2/5
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 29ms/step - accuracy: 0.6208 - loss: 0.9229 - val_accuracy: 0.6452 - val_loss: 0.8943
Epoch 3/5
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 27ms/step - accuracy: 0.6290 - loss: 0.9150 - val_accuracy: 0.6452 - val_loss: 0.8960
Epoch 4/5
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 28ms/step - accuracy: 0.6197 - loss: 0.9247 - val_accuracy: 0.6452 - val_loss: 0.8954
Epoch 5/5
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 31ms/step - accuracy: 0.6230 - loss: 0.9221 - val_accuracy: 0.6452 - val_loss: 0.8944
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 208ms/step
Predicted Sentiment: negative
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Predicted Sentiment: nega

In [6]:
pip install tensorflow


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip
