In [1]:
import pandas as pd
import numpy as np

import ktrain
from ktrain import text

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_train = pd.read_csv('data_train.csv', encoding='utf-8')
data_test = pd.read_csv('data_test.csv', encoding='utf-8')

X_train = data_train.Text.tolist()
X_test = data_test.Text.tolist()

y_train = data_train.Emotion.tolist()
y_test = data_test.Emotion.tolist()

data = pd.concat([data_train, data_test], ignore_index=True)  # corrected line

class_names = ['joy', 'sadness', 'fear', 'anger', 'neutral']

print('size of training set: %s' % (len(data_train['Text'])))
print('size of validation set: %s' % (len(data_test['Text'])))
print(data.Emotion.value_counts())

data.head(10)


size of training set: 7934
size of validation set: 3393
Emotion
joy        2326
sadness    2317
anger      2259
neutral    2254
fear       2171
Name: count, dtype: int64


Unnamed: 0,Emotion,Text
0,neutral,There are tons of other paintings that I thin...
1,sadness,"Yet the dog had grown old and less capable , a..."
2,fear,When I get into the tube or the train without ...
3,fear,This last may be a source of considerable disq...
4,anger,She disliked the intimacy he showed towards so...
5,sadness,When my family heard that my Mother's cousin w...
6,joy,Finding out I am chosen to collect norms for C...
7,anger,A spokesperson said : ` Glen is furious that t...
8,neutral,Yes .
9,sadness,"When I see people with burns I feel sad, actua..."


In [3]:
encoding = {
    'joy': 0,
    'sadness': 1,
    'fear': 2,
    'anger': 3,
    'neutral': 4
}

# Integer values for each class
y_train = [encoding[x] for x in y_train]
y_test = [encoding[x] for x in y_test]

In [4]:
(x_train,  y_train), (x_test, y_test), preproc = text.texts_from_array(x_train=X_train, y_train=y_train,
                                                                       x_test=X_test, y_test=y_test,
                                                                       class_names=class_names,
                                                                       preprocess_mode='bert',
                                                                       maxlen=350, 
                                                                       max_features=35000)

preprocessing train...
language: en


Is Multi-Label? False
preprocessing test...
language: en


task: text classification


In [5]:
model = text.text_classifier('bert', train_data=(x_train, y_train), preproc=preproc)

Is Multi-Label? False
maxlen is 350




done.


In [6]:

learner = ktrain.get_learner(model, train_data=(x_train, y_train), 
                             val_data=(x_test, y_test),  batch_size=6)

In [7]:
learner.fit_onecycle(2e-5, 3)



begin training using onecycle policy with max lr of 2e-05...
Epoch 1/3
  26/1323 [..............................] - ETA: 1:45:24 - loss: 1.8281 - accuracy: 0.1731

KeyboardInterrupt: 

In [None]:
learner.validate(val_data=(x_test, y_test), class_names=class_names)

              precision    recall  f1-score   support

         joy       0.87      0.85      0.86       707
     sadness       0.84      0.81      0.82       676
        fear       0.88      0.84      0.86       679
       anger       0.78      0.82      0.80       693
     neutral       0.80      0.84      0.82       638

    accuracy                           0.83      3393
   macro avg       0.83      0.83      0.83      3393
weighted avg       0.83      0.83      0.83      3393



array([[604,  10,  15,  17,  61],
       [ 17, 547,  28,  57,  27],
       [ 19,  22, 572,  54,  12],
       [ 17,  49,  25, 568,  34],
       [ 40,  25,   8,  28, 537]], dtype=int64)

In [9]:
predictor = ktrain.get_predictor(learner.model, preproc)
predictor.get_classes()

['joy', 'sadness', 'fear', 'anger', 'neutral']

In [None]:
import time 

message = '“Rich girls who don’t know a thing about anything,” he deadpanned— something he was obviously used to saying, if not thinking. I would have laughed if that comment hadn’t stung so much. “Hey! You don’t even know me!” I shouted after him. “Don’t need to,” he called back. He slipped into a crowd of people milling about on the sidewalk and disappeared from sight within a few seconds'

start_time = time.time() 
prediction = predictor.predict(message)

probabilities = predictor.predict_proba(message)

# Assuming class_names is a list of strings representing the class names in the same order as sthe probabilities
class_names = predictor.get_classes()

print('Predicted probabilities:')
for class_name, probability in zip(class_names, probabilities):
    print(f"{class_name}: {probability:.2f}")

print('\npredicted: {} ({:.2f})'.format(prediction, (time.time() - start_time)))


Predicted probabilities:
joy: 0.00
sadness: 0.97
fear: 0.02
anger: 0.00
neutral: 0.00

predicted: sadness (2.64)


In [10]:
# let's save the predictor for later use
predictor.save("models/bert_model")

predictor = ktrain.load_predictor("models/bert_model")

  saving_api.save_model(


In [16]:
import time

# Your message
message = '“Rich girls who don’t know a thing about anything,” he deadpanned— something he was obviously used to saying, if not thinking. I would have laughed if that comment hadn’t stung so much. “Hey! You don’t even know me!” I shouted after him. “Don’t need to,” he called back. He slipped into a crowd of people milling about on the sidewalk and disappeared from sight within a few seconds'

# Start the timer
start_time = time.time()

# Make the prediction
prediction = predictor.predict(message)

# Get the predicted probabilities
probabilities = predictor.predict_proba(message)

# Get the class names
class_names = predictor.get_classes()

# Print the predicted probabilities
print('Predicted probabilities:')
for class_name, probability in zip(class_names, probabilities):
    print(f"{class_name}: {probability:.2f}")

# Calculate the time taken and print the final prediction
time_taken = time.time() - start_time
print(f'\npredicted: {prediction} ({time_taken:.2f})')

Predicted probabilities:
joy: 0.33
sadness: 0.24
fear: 0.07
anger: 0.17
neutral: 0.18

predicted: joy (0.51)
