In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense,Conv1D,MaxPooling1D
from keras.layers import LSTM, Dropout, SpatialDropout1D
from keras.layers import Bidirectional
from keras.layers import Flatten
from keras.models  import Model
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.callbacks import ModelCheckpoint

In [2]:
df = pd.read_csv('./Tweets.csv')

In [3]:
def process_data(_df):
    print(_df.shape)
    df = _df[_df['airline_sentiment'] != 'neutral']
    print(df.shape)
    df = df[["tweet_id", "text", "airline_sentiment"]]

    df['text'] = df['text'].str.replace('@\S+', '')
    df['airline_sentiment'] = df['airline_sentiment'].map({'negative': 0, 'positive': 1})

    return df

def split_data(df):
    X = df['text'].values
    y = df['airline_sentiment'].values

    X_train, y_train = X[:int(len(X) * 0.8)], y[:int(len(y) * 0.8)]
    X_test, y_test = X[int(len(X) * 0.8):], y[int(len(y) * 0.8):]

    return X_train, y_train, X_test, y_test

In [4]:
df_processed = process_data(df)

X_train, y_train, X_test, y_test = split_data(df_processed)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)

X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)

print("X Train: ", len(X_train))
print("X Test: ", len(X_test))

max_len = 150

X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)

print("Shape of X Train: ", X_train.shape)
print("Shape of X Test: ", X_test.shape)

(14640, 15)
(11541, 15)
X Train:  9232
X Test:  2309
Shape of X Train:  (9232, 150)
Shape of X Test:  (2309, 150)


  df['text'] = df['text'].str.replace('@\S+', '')


In [9]:
model = Sequential()
model.add(Embedding(10000, 150, input_length=150))
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
#model.add(SpatialDropout1D(0.3))
model.add(Bidirectional(LSTM(128, dropout=0.2)))
#model.add(Flatten())
#model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [10]:
model.fit(X_train, y_train, shuffle=True, validation_data=(X_test, y_test), epochs=6, batch_size=32)
#model.save_weights(checkpoint_path)

Epoch 1/6


2022-05-02 15:18:19.472881: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-02 15:18:19.697351: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-02 15:18:19.707666: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-02 15:18:20.702782: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-02 15:18:20.718008: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2022-05-02 15:18:38.617589: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-02 15:18:38.702618: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-02 15:18:38.709942: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x2c1c3a2b0>

In [11]:
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 92.20%


In [12]:
tweet = "I am so happy and joyful after that flight"
tweet = tokenizer.texts_to_sequences([tweet])
tweet = sequence.pad_sequences(tweet, maxlen=150)
print(model.predict(tweet))

tweet = "I hate this movie"
tweet = tokenizer.texts_to_sequences([tweet])
tweet = sequence.pad_sequences(tweet, maxlen=150)
print(model.predict(tweet))

tweet = "That flight was terrible"
tweet = tokenizer.texts_to_sequences([tweet])
tweet = sequence.pad_sequences(tweet, maxlen=150)
print(model.predict(tweet))

tweet = "I had a great time"
tweet = tokenizer.texts_to_sequences([tweet])
tweet = sequence.pad_sequences(tweet, maxlen=150)
print(model.predict(tweet))

2022-05-02 15:20:44.519134: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-02 15:20:44.598154: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-05-02 15:20:44.611023: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


[[0.99947685]]
[[0.80145144]]
[[8.222269e-05]]
[[0.9998634]]
