In [1]:
import pandas as pd

In [2]:
# import dataset from csv file
df = pd.read_csv('tweet_sentiment_450K_shuffled.csv')

In [3]:
review_df = df[['text', 'airline_sentiment']]
review_df = review_df[review_df['airline_sentiment'] != 'neutral']
print(review_df.shape)
print(review_df.head(5))
print(review_df.tail(5))

(450201, 2)
                                                text airline_sentiment
0  @creativeleagueD creativeleague  Hey, you foll...          positive
1  @DonnieWahlberg  No probs Donnie as long as yo...          positive
2               @LukeP182 good luck you dumb shit..           positive
3               k so i need a new cellphone charger           negative
4                          DONT WRITE ANYTHING MEAN           negative
                                                     text airline_sentiment
450196  Great day  going out again.. Marsascala anyone...          positive
450197  @mileycyrus so i have the same insomnia prob a...          negative
450198  @Angelkiss283 Working is no fun!  How about to...          negative
450199                           chicago bulls lost    ((          negative
450200  @lucykatecrafts Makes  the effort of a tutoria...          positive


In [4]:
print(review_df["airline_sentiment"].value_counts())


negative    226824
positive    223377
Name: airline_sentiment, dtype: int64


In [5]:
sentiment_label = review_df.airline_sentiment.factorize()
tweet = review_df.text.values

print(sentiment_label)


(array([0, 0, 0, ..., 1, 1, 0], dtype=int64), Index(['positive', 'negative'], dtype='object'))


In [6]:
from keras.preprocessing.sequence import data_utils
from keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(num_words=100000)
tokenizer.fit_on_texts(tweet)
tokenizer.word_index # dictionary of words and their index
encoded_docs = tokenizer.texts_to_sequences(tweet)



In [7]:
from keras.models import load_model, Sequential

loaded_model: Sequential = load_model("./sa_450K") # type: ignore

In [8]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import data_utils
from tensorflow import Tensor
from numpy import float32

new_tokenizer = Tokenizer(num_words=100000)
new_tokenizer.fit_on_texts(tweet)

def predict_sentiment_loaded(text: str):
    tw = new_tokenizer.texts_to_sequences([text])
    tw = data_utils.pad_sequences(tw, maxlen=200)
    tensor: Tensor = loaded_model.__call__(tw, training = True) # type: ignore
    raw_prediction: float32 = tensor.dtype.as_numpy_dtype(
        tensor.numpy()[0][0])  # type: ignore
    prediction = int(raw_prediction.round().item())
    # Calculate confidence using raw prediction
    confidence = ((raw_prediction.item() if prediction == 1 else 1 - raw_prediction.item())-0.5)*2
    return (sentiment_label[1][prediction], confidence)


In [11]:
# Get and predict sentiment of text

while True:
    user_input = input("Enter text to analyze: ")
    if user_input == "!exit":
        break
    prediction, confidence = predict_sentiment_loaded(user_input)
    print(f"Input: {user_input}")
    print(f"Sentiment: {prediction}")
    print(f"Confidence: {confidence*100:.2f}%")
    print("================================")
    

Input: kkjkjhjghfgsd
Sentiment: positive
Confidence: 4.76%
Input: mansi
Sentiment: negative
Confidence: 6.11%
Input: Pramit
Sentiment: positive
Confidence: 10.19%
Input: jenboi
Sentiment: negative
Confidence: 14.53%


In [10]:

# Random Assertions

assert predict_sentiment_loaded("I am so happy")[0] == "positive"
assert predict_sentiment_loaded("I am so sad")[0] == "negative"
try:  assert predict_sentiment_loaded("I am so angry!")[0] == "negative"
except AssertionError: print("errored as expected")
print(predict_sentiment_loaded("I am so angry!"))


errored as expected
('positive', 0.4834062457084656)
