In [5]:
#import necessary libraries
import numpy as np # linear algebra
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
# from keras.utils.np_utils import to_categorical
from sklearn.utils import resample
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix,classification_report
import re

In [6]:
df = pd.read_csv(r'data_train.csv',encoding= 'latin1')

In [7]:
df.head()

Unnamed: 0,Emotion,Text
0,neutral,There are tons of other paintings that I thin...
1,sadness,"Yet the dog had grown old and less capable , a..."
2,fear,When I get into the tube or the train without ...
3,fear,This last may be a source of considerable disq...
4,anger,She disliked the intimacy he showed towards so...


In [8]:
df.tail()

Unnamed: 0,Emotion,Text
7930,sadness,When I felt alone and without love.
7931,anger,Irina hung up in exasperation .
7932,sadness,No wonder she was now inconsolable at the pros...
7933,fear,My friend had been telling me about a certain ...
7934,anger,I've created a monster ... you're a better ba...


In [9]:
df.shape

(7935, 2)

In [10]:
df.columns

Index(['Emotion', 'Text'], dtype='object')

In [11]:
#print the unique value of sentiment data
print('The unique numbers in the sentiment data are as follow :')
df.nunique()

The unique numbers in the sentiment data are as follow :


Emotion       5
Text       7265
dtype: int64

In [12]:
df.isnull().sum()

Emotion    1
Text       1
dtype: int64

In [13]:
df.isnull().sum().sum()

2

In [14]:
df.dtypes

Emotion    object
Text       object
dtype: object

In [15]:
df.dropna(inplace=True)

In [16]:
df.Emotion.value_counts()

Emotion
sadness    1641
joy        1619
neutral    1616
anger      1566
fear       1492
Name: count, dtype: int64

In [17]:
def text_clean(Text): 
    # changing to lower case
    lower = Text.str.lower()
    
    # Replacing the repeating pattern of &#039;
    pattern_remove = lower.str.replace("&#039;", "")
    
    # Removing all the special Characters
    special_remove = pattern_remove.str.replace(r'[^\w\d\s]',' ')
    
    # Removing all the non ASCII characters
    ascii_remove = special_remove.str.replace(r'[^\x00-\x7F]+',' ')
    
    # Removing the leading and trailing Whitespaces
    whitespace_remove = ascii_remove.str.replace(r'^\s+|\s+?$','')
    
    # Replacing multiple Spaces with Single Space
    multiw_remove = whitespace_remove.str.replace(r'\s+',' ')
    
    # Replacing Two or more dots with one
    dataframe = multiw_remove.str.replace(r'\.{2,}', ' ')
    
    return dataframe

In [18]:
df['text_clean'] = text_clean(df['Text'])

In [19]:
df.head()

Unnamed: 0,Emotion,Text,text_clean
0,neutral,There are tons of other paintings that I thin...,there are tons of other paintings that i thin...
1,sadness,"Yet the dog had grown old and less capable , a...","yet the dog had grown old and less capable , a..."
2,fear,When I get into the tube or the train without ...,when i get into the tube or the train without ...
3,fear,This last may be a source of considerable disq...,this last may be a source of considerable disq...
4,anger,She disliked the intimacy he showed towards so...,she disliked the intimacy he showed towards so...


In [20]:
df = df[['text_clean','Emotion']]

In [21]:
df.head()

Unnamed: 0,text_clean,Emotion
0,there are tons of other paintings that i thin...,neutral
1,"yet the dog had grown old and less capable , a...",sadness
2,when i get into the tube or the train without ...,fear
3,this last may be a source of considerable disq...,fear
4,she disliked the intimacy he showed towards so...,anger


In [22]:
df.tail()

Unnamed: 0,text_clean,Emotion
7930,when i felt alone and without love.,sadness
7931,irina hung up in exasperation .,anger
7932,no wonder she was now inconsolable at the pros...,sadness
7933,my friend had been telling me about a certain ...,fear
7934,i've created a monster ... you're a better ba...,anger


In [23]:
df['Emotion'].value_counts()

Emotion
sadness    1641
joy        1619
neutral    1616
anger      1566
fear       1492
Name: count, dtype: int64

In [24]:
df = df[:10000]

In [25]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Emotion'] = le.fit_transform(df['Emotion'])

In [26]:
df['Emotion'].value_counts()

Emotion
4    1641
2    1619
3    1616
0    1566
1    1492
Name: count, dtype: int64

In [27]:
df.head()

Unnamed: 0,text_clean,Emotion
0,there are tons of other paintings that i thin...,3
1,"yet the dog had grown old and less capable , a...",4
2,when i get into the tube or the train without ...,1
3,this last may be a source of considerable disq...,1
4,she disliked the intimacy he showed towards so...,0


In [28]:
df.shape

(7934, 2)

In [29]:
x = df['text_clean']
y= df['Emotion']

In [30]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, stratify=y, test_size=0.3, random_state=101)

In [31]:
from sklearn.feature_extraction.text import HashingVectorizer
hvectorizer = HashingVectorizer(n_features=10000,norm=None,alternate_sign=False,stop_words='english') 
x_train = hvectorizer.fit_transform(x_train).toarray()
x_test = hvectorizer.transform(x_test).toarray()

In [32]:
x_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [33]:
y_train

2998    4
5474    0
5449    4
7514    2
1340    4
       ..
4283    2
1098    0
5627    1
2389    2
6843    4
Name: Emotion, Length: 5553, dtype: int32

In [34]:
from sklearn.metrics import accuracy_score,precision_score,recall_score
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(x_train,y_train)
y_pred = dt.predict(x_test)
acc_dt = accuracy_score(y_test,y_pred)
print(acc_dt)

0.5548089038219236


In [35]:
import pickle
filename = 'cnn.sav'
pickle.dump(dt, open(filename, 'wb'))

In [36]:
model = pickle.load(open(filename, 'rb'))

In [37]:
#importing the required libraries
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPool2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense

(x_train,y_train) , (x_test,y_test)=mnist.load_data()
#reshaping data
X_train = x_train.reshape((x_train.shape[0], x_train.shape[1], x_train.shape[2], 1))
X_test = x_test.reshape((x_test.shape[0],x_test.shape[1],x_test.shape[2],1)) 
#checking the shape after reshaping
print(X_train.shape)
print(X_test.shape)
#normalizing the pixel values
X_train=X_train/255
X_test=X_test/255
#defining model
model=Sequential()
#adding convolution layer
model.add(Conv2D(32,(3,3),activation='relu',input_shape=(28,28,1)))
#adding pooling layer
model.add(MaxPool2D(2,2))
#adding fully connected layer
model.add(Flatten())
model.add(Dense(100,activation='relu'))
#adding output layer
model.add(Dense(10,activation='softmax'))
#compiling the model
model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
#fitting the model
model.fit(X_train,y_train,epochs=5)
acc_cnn=model.evaluate(X_test,y_test)
acc_cnn

(60000, 28, 28, 1)
(10000, 28, 28, 1)



Epoch 1/5


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.0411364771425724, 0.987500011920929]

In [38]:
acc_cnn = acc_cnn[1]

In [39]:
acc_cnn

0.987500011920929

In [40]:
from keras.models import load_model
model.save('cnn.h5')

  saving_api.save_model(


In [41]:
x_train.shape

(60000, 28, 28)

In [42]:
y_train.shape

(60000,)

In [43]:
# Now tokenizing the text column    
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(df['text_clean'].values)
X = tokenizer.texts_to_sequences(df['text_clean'].values)
X = pad_sequences(X)
X[:2]

array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0, 

In [44]:
#Hereby I declare the train and test dataset.
Y = df['Emotion'].values
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.20, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

(6347, 205) (6347,)
(1587, 205) (1587,)


In [45]:
from tensorflow.keras.utils import to_categorical

# convert target variable to categorical
Y_train = to_categorical(Y_train, num_classes=5)

# define model
embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(Y_train.shape[1], activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())   

# train model
batch_size = 64
model.fit(X_train, Y_train, epochs=15, batch_size=batch_size, verbose=1)


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 205, 128)          256000    
                                                                 
 spatial_dropout1d (Spatial  (None, 205, 128)          0         
 Dropout1D)                                                      
                                                                 
 lstm (LSTM)                 (None, 196)               254800    
                                                                 
 dense_2 (Dense)             (None, 5)                 985       
                                                                 
Total params: 511785 (1.95 MB)
Trainable params: 511785 (1.95 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Ep

<keras.src.callbacks.History at 0x2c2ab031630>

In [46]:
from tensorflow.keras.models import Model

In [47]:
final_model= Model.save(model,"lstm.h5")

In [48]:
result = dt.predict(hvectorizer.transform(['''there are tons of other paintings that i think...	''']))
if result==0:
    print("The Entered Text has Anger Sentiment")
elif result==1:
    print("The Entered Text Has Fear Sentiment")
elif result==2:
    print("The Entered Text Has Joy Sentiment")
elif result==3:
    print("The Entered Text Has Neutral Sentiment")
else:
    print("The Entered Text Has Sadness Sentiment")

The Entered Text Has Neutral Sentiment


In [49]:
result = dt.predict(hvectorizer.transform(['''yet the dog had grown old and less capable and...	''']))
if result==0:
    print("The Entered Text has Anger Sentiment")
elif result==1:
    print("The Entered Text Has Fear Sentiment")
elif result==2:
    print("The Entered Text Has Joy Sentiment")
elif result==3:
    print("The Entered Text Has Neutral Sentiment")
else:
    print("The Entered Text Has Sadness Sentiment")

The Entered Text Has Sadness Sentiment
