In [None]:
#importing library pandas, numpy and ktrain : A Lightweight Wrapper for Keras to Help Train Neural Networks

In [1]:
import pandas as pd
import numpy as np

import ktrain
from ktrain import text

In [None]:
#loading ISEAR dataset

In [2]:
df=pd.read_csv("isear.csv")

In [3]:
df.head()

Unnamed: 0,Emotion,Text
0,joy,"During the period of falling in love, each tim..."
1,fear,When I was involved in a traffic accident.
2,anger,When I was driving home after several days of...
3,sadness,When I lost the person who meant the most to me.
4,disgust,The time I knocked a deer down - the sight of ...


In [4]:
df.groupby("Emotion").describe()

Unnamed: 0_level_0,Text,Text,Text,Text
Unnamed: 0_level_1,count,unique,top,freq
Emotion,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
anger,1071,1071,When I was driving home after several days of...,1
disgust,1067,1066,I saw a program on T.V where they had to saw a...,2
fear,1081,1074,Before the examn.,3
guilt,1052,1046,NO RESPONSE.,3
joy,1081,1070,When I was admitted to the University.,3
sadness,1067,1040,When my grandfather died.,8
shame,1054,1046,Not doing well in the examn.,4


In [5]:
df["Emotion"].value_counts()

joy        1081
fear       1081
anger      1071
sadness    1067
disgust    1067
shame      1054
guilt      1052
Name: Emotion, dtype: int64

In [None]:
#create emotion class for later use in Ktrain

In [6]:
class_names = ['joy', 'sadness', 'fear', 'anger', 'disgust','shame','guilt']


In [None]:
#Encoding emotion values into Discreate values

In [7]:
encoding = {
    'joy': 0,
    'sadness': 1,
    'fear': 2,
    'anger': 3,
    'disgust': 4,
    'shame':5,
    'guilt':6
}

In [None]:
#dividing into inpout and output
#using .tolist() to convert them into list

In [8]:
reviews = df['Text'].values.tolist()
labels = df['Emotion'].tolist()

In [None]:
#dividing into train and test set.
#here 20% of the data will be used for test set

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(reviews, labels, test_size=.2)

In [None]:
#Encoding emotion values and putting them into test and train set

In [10]:
y_train = [encoding[x] for x in y_train]
y_test = [encoding[x] for x in y_test]

In [None]:
#Data preprocessing in BERT accomplished by setting preprocess_mode. The BERT model and vocabulary will be downloaded.

In [11]:
(x_train,  y_train), (x_test, y_test), preproc = text.texts_from_array(x_train=X_train, y_train=y_train,
                                                                       x_test=X_test, y_test=y_test,
                                                                       class_names=class_names,
                                                                       preprocess_mode='bert',
                                                                       maxlen=350, 
                                                                       max_features=35000)

preprocessing train...
language: en


Is Multi-Label? False
preprocessing test...
language: en


task: text classification


In [None]:
#Loading the pretrained BERT for text classification

In [12]:
model = text.text_classifier('bert', train_data=(x_train, y_train), preproc=preproc)

Is Multi-Label? False
maxlen is 350
done.


In [None]:
#Wrapping it in a Learner object

In [13]:
learner = ktrain.get_learner(model, train_data=(x_train, y_train), 
                             val_data=(x_test, y_test),
                             batch_size=6)

In [None]:
#Training the model

In [14]:
learner.fit_onecycle(2e-5, 5)



begin training using onecycle policy with max lr of 2e-05...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1a3bd7e0670>

In [None]:
#Validating the model accuracy

In [15]:
learner.validate(val_data=(x_test, y_test), class_names=class_names)

              precision    recall  f1-score   support

         joy       0.87      0.89      0.88       204
     sadness       0.70      0.77      0.73       209
        fear       0.76      0.78      0.77       221
       anger       0.69      0.64      0.66       219
     disgust       0.72      0.73      0.72       222
       shame       0.64      0.61      0.63       215
       guilt       0.66      0.65      0.66       205

    accuracy                           0.72      1495
   macro avg       0.72      0.72      0.72      1495
weighted avg       0.72      0.72      0.72      1495



array([[181,   7,   4,   2,   0,   7,   3],
       [  6, 160,   9,  11,   7,   7,   9],
       [  7,  17, 172,   5,   7,   6,   7],
       [  2,  17,   7, 140,  30,  13,  10],
       [  2,  10,  13,  23, 161,  12,   1],
       [  7,   6,  12,  10,  11, 131,  38],
       [  2,  13,   8,  12,   8,  28, 134]], dtype=int64)

In [None]:
#testing with input

In [16]:
predictor = ktrain.get_predictor(learner.model, preproc)
predictor.get_classes()

['joy', 'sadness', 'fear', 'anger', 'disgust', 'shame', 'guilt']

In [24]:
import time 

message = 'My teamates are helpful '

start_time = time.time() 
prediction = predictor.predict(message)

print('predicted: {} ({:.2f})'.format(prediction, (time.time() - start_time)))

predicted: joy (0.33)


In [None]:
#saving the model for later use

In [18]:
predictor.save("models/bert_model")