# Importing libraries

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('judge-1377884607_tweet_product_company.csv', encoding='latin1')

In [3]:
data.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion


# Removing unnecessary columns

In [4]:
pd.set_option('display.max_colwidth',None)
data = data[['tweet_text','is_there_an_emotion_directed_at_a_brand_or_product']]
data.columns = ["text","sentiment"]

In [5]:
data.head()

Unnamed: 0,text,sentiment
0,".@wesley83 I have a 3G iPhone. After 3 hrs tweeting at #RISE_Austin, it was dead! I need to upgrade. Plugin stations at #SXSW.",Negative emotion
1,"@jessedee Know about @fludapp ? Awesome iPad/iPhone app that you'll likely appreciate for its design. Also, they're giving free Ts at #SXSW",Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. They should sale them down at #SXSW.,Positive emotion
3,@sxsw I hope this year's festival isn't as crashy as this year's iPhone app. #sxsw,Negative emotion
4,"@sxtxstate great stuff on Fri #SXSW: Marissa Mayer (Google), Tim O'Reilly (tech books/conferences) &amp; Matt Mullenweg (Wordpress)",Positive emotion


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9093 entries, 0 to 9092
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text       9092 non-null   object
 1   sentiment  9093 non-null   object
dtypes: object(2)
memory usage: 142.2+ KB


In [7]:
data.isna().sum()

text         1
sentiment    0
dtype: int64

In [8]:
data = data.dropna(subset = ['text'])

In [9]:
data.isna().sum()

text         0
sentiment    0
dtype: int64

In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9092 entries, 0 to 9092
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text       9092 non-null   object
 1   sentiment  9092 non-null   object
dtypes: object(2)
memory usage: 213.1+ KB


In [11]:
data.sentiment.nunique()

4

In [12]:
data.sentiment.unique()

array(['Negative emotion', 'Positive emotion',
       'No emotion toward brand or product', "I can't tell"], dtype=object)

# Tokenization

In [13]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences

In [14]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['text'])
X = tokenizer.texts_to_sequences(data['text'])
X = pad_sequences(X)

In [15]:
X[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0, 5869,   23,
         51,   11,  607,   18,  257,  111, 2582,  634,    6, 1351,   25,
         32,   86,  893,   23,  104,    5, 1112, 2583, 3955,    6,    1])

# Defining labels

In [16]:
y = pd.get_dummies(data['sentiment'])

In [17]:
y.head()

Unnamed: 0,I can't tell,Negative emotion,No emotion toward brand or product,Positive emotion
0,False,True,False,False
1,False,False,False,True
2,False,False,False,True
3,False,True,False,False
4,False,False,False,True


In [18]:
y.replace({True:1, False:0}, inplace=True)

In [19]:
y.head()

Unnamed: 0,I can't tell,Negative emotion,No emotion toward brand or product,Positive emotion
0,0,1,0,0
1,0,0,0,1
2,0,0,0,1
3,0,1,0,0
4,0,0,0,1


In [20]:
from sklearn.model_selection import train_test_split

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Model Building

In [22]:
from keras.layers import Dense, LSTM, Embedding
from keras.models import Sequential

In [26]:
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=100))
model.add(LSTM(50,return_sequences=True))
model.add(LSTM(64))
model.add(Dense(50,activation='relu'))
model.add(Dense(units=y.shape[1], activation='softmax'))

In [27]:
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [28]:
model.fit(X_train, y_train, epochs = 10, validation_split = 0.1)

Epoch 1/10
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 37ms/step - accuracy: 0.5703 - loss: 1.0201 - val_accuracy: 0.6154 - val_loss: 0.8705
Epoch 2/10
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 34ms/step - accuracy: 0.6994 - loss: 0.7322 - val_accuracy: 0.6717 - val_loss: 0.8035
Epoch 3/10
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 34ms/step - accuracy: 0.8062 - loss: 0.5017 - val_accuracy: 0.6841 - val_loss: 0.8408
Epoch 4/10
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 34ms/step - accuracy: 0.8551 - loss: 0.3807 - val_accuracy: 0.6772 - val_loss: 0.9106
Epoch 5/10
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 34ms/step - accuracy: 0.8859 - loss: 0.2846 - val_accuracy: 0.6813 - val_loss: 1.0186
Epoch 6/10
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 34ms/step - accuracy: 0.8997 - loss: 0.2397 - val_accuracy: 0.6786 - val_loss: 1.3233
Epoch 7/10
[1m205/2

<keras.src.callbacks.history.History at 0x1442a3d6610>

# Prediction

In [29]:
y_pred = model.predict(X_test)

[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step


In [30]:
from sklearn.metrics import accuracy_score
accuracy_score(y_pred, y_test)

ValueError: Classification metrics can't handle a mix of continuous-multioutput and multilabel-indicator targets

In [None]:
new_tweet = ["I love my Samsung A15, expecially its new features."]
new_tweet_seq = tokenizer.texts_to_sequences(new_tweet)
new_tweet_padded = pad_sequences(new_tweet_seq, maxlen=X.shape[1])
prediction = model.predict(new_tweet_padded)
print("Predictions:", prediction)