In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import pickle

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding,Dense,LSTM,Bidirectional,Input
import tensorflow_addons as tfa
from tensorflow_addons.text.crf_wrapper import CRFModelWrapper

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix
import regex as re



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [84]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

GLOVE Embeddings And Tokenizor

In [85]:
word_to_vec = {}
with open('glove.6B/glove.6B.100d.txt','r', encoding='utf-8') as f:
  for line in f:
    values = line.split()
    word = values[0]
    vector = np.asarray(values[1:],'float32')
    word_to_vec[word]=vector

In [87]:
tokenizer = Tokenizer(oov_token='<OOV>')
tokenizer.fit_on_texts(word_to_vec.keys())
word_to_index = tokenizer.word_index
index_to_word = tokenizer.index_word

In [52]:
with open('tokenizer.pickle', 'wb') as f:
    pickle.dump(tokenizer, f)

In [88]:
T = 71
vocab_size = len(word_to_index)+1
embedding_dim = 100

embedding_matrix = np.zeros((vocab_size, embedding_dim))

for word, i in word_to_index.items():
    if word in word_to_vec:
        embedding_matrix[i] = word_to_vec[word]
with tf.device('/CPU:0'):
    embedding_layer = Embedding(
        input_dim=vocab_size,
        output_dim=embedding_dim,
        weights=[embedding_matrix],
        trainable = False
    )

Data Preprocessing

In [89]:
import ast
def read_data(path,sentences,triplets):
    with open(path, 'r') as file:
        lines = file.readlines()
        for line in lines:
            parts = line.strip().split('#### #### ####')
            if len(parts)==2:
                sentence = parts[0].strip()
                triplet = ast.literal_eval(parts[1].strip())

                sentences.append(sentence)
                triplets.append(triplet)
sentences,triplets =[],[]
paths = ['14res/test.txt','14res/train.txt','15res/test.txt','15res/train.txt','16res/test.txt','16res/train.txt']
for path in paths:
    read_data(path,sentences,triplets)
df = pd.DataFrame({'sentence':sentences,'triplets':triplets})
df.shape

(3868, 2)

In [90]:
df.drop_duplicates(subset='sentence',inplace=True)
df.shape

(2729, 2)

In [91]:
corrected_sentence = []
tokenized = []
corrected_triplets = []
s = 0
for idx,row in df.iterrows():
    tokenized_sentence = tokenizer.texts_to_sequences([row.sentence])[0]
    regex_sentence = re.findall(r'\b[\'\w]+\b',row.sentence)
    if len(tokenized_sentence)==len(regex_sentence):
        triplets = row.triplets.copy()
        new_triplets = []
        sentence = row.sentence.split(' ')
        for triplet in triplets:
            #nothing-0, aspect-1, opinion-2
            pairs = sentence.copy()
            for j in range(len(pairs)):
                if j in triplet[0]:
                    pairs[j] = [pairs[j],1]
                elif j in triplet[1]:
                    pairs[j] = [pairs[j],2]
                else:
                    pairs[j] = [pairs[j],0]
            new_pairs = []
            for j in pairs:
                tokenized_word = tokenizer.texts_to_sequences([j[0]])[0]
                regex_word = re.findall(r'\b[\'\w]+\b',j[0])
                for i in regex_word:
                    new_pairs.append([i,j[1]])
            aspect = []
            opinion = []
            for j in range(len(new_pairs)):
                if new_pairs[j][1] == 1:
                    aspect.append(j)
                elif new_pairs[j][1] == 2:
                    opinion.append(j)
            new_triplets.append((aspect,opinion,triplet[2]))
        new_sentence = ''
        for i in regex_sentence:
            new_sentence+=i+' '
        corrected_sentence.append(new_sentence)
        corrected_triplets.append(new_triplets)

In [92]:
df = pd.DataFrame({'sentence':corrected_sentence,'triplets':corrected_triplets})
df

Unnamed: 0,sentence,triplets
0,The bread is top notch as well,"[([1], [3, 4], POS)]"
1,I have to say they have one of the fastest del...,"[([10, 11], [9], POS)]"
2,Food is always fresh and hot ready to eat,"[([0], [3], POS), ([0], [5], POS)]"
3,Did I mention that the coffee is OUTSTANDING,"[([5], [7], POS)]"
4,Certainly not the best sushi in New York howev...,"[([15], [18], POS)]"
...,...,...
2707,I have been to Rao s probably 15 times the pas...,"[([4, 5], [17], POS)]"
2708,The decor is very simple but comfortable,"[([1], [4], POS), ([1], [6], POS)]"
2709,whoever the jazz duo was they were on POINT,"[([2, 3], [7, 8], POS)]"
2710,even the wine by the glass was good,"[([2, 3, 4, 5], [7], POS)]"


Aspect Opinion Extraction Using CRF

In [10]:
classes = {0:'padding',1:'O',2:'A-B',3:'A-I',4:'O-B',5:'O-I'}
X = []
y = []
for idx,row in df.iterrows():
    triplets = row.triplets
    tokenized = pad_sequences(tokenizer.texts_to_sequences([row.sentence]),maxlen=T,padding='post')[0]
    labelling = np.ones(shape=(T,))
    for i in range(T):
        if tokenized[i]==0:
            labelling[i]=0
    for i in triplets:
        for j in range(len(i[0])):
            if j == 0:
                labelling[i[0][j]]=2
            else:
                labelling[i[0][j]]=3
        for j in range(len(i[1])):
            if j == 0:
                labelling[i[1][j]]=4
            else:
                labelling[i[1][j]]=5
    X.append(tokenized)
    y.append(labelling)
X = np.array(X,dtype=int)
y = np.array(y,dtype=int)

In [11]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1)

In [12]:
def CRF_model(T, n_a, classes):
    X = Input(shape=(T,))
    embeddings = embedding_layer(X)
    a = Bidirectional(LSTM(units=n_a,return_sequences=True, dropout=0.5, recurrent_dropout=0.4))(embeddings)
    base_model = Model(inputs = X, outputs = a)
    model = CRFModelWrapper(base_model, classes)
    return model

In [13]:
extractor = CRF_model(T,64,6)
extractor.compile(optimizer='adam', metrics=['accuracy'])

In [27]:
extractor.fit(X_train,y_train,epochs=20)
extractor.evaluate(X_test,y_test)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[0.7053406834602356, 4.789958477020264, 4.789958477020264]

In [28]:
extractor.evaluate(X_test,y_test)



[0.7053406834602356, 4.789958477020264, 4.789958477020264]

In [50]:
with tf.device('/CPU:0'):
    EXAMPLES = ["Affordably Priced"]
    z = pad_sequences(tokenizer.texts_to_sequences(EXAMPLES),maxlen=71,padding='post')
    aa = extractor.predict(z)
    for i,j in zip(z[0],aa[0]):
        if j in {2,3,4,5}:
            print(index_to_word[i],j)
EXAMPLES[0]

affordably 4
priced 2


'Affordably Priced'

In [47]:
extractor.save('extractor_model.tf', save_format='tf')

INFO:tensorflow:Assets written to: extractor_model.tf\assets


INFO:tensorflow:Assets written to: extractor_model.tf\assets


Aspect Opinion Pair Classification

In [151]:
df_pair = pd.DataFrame(columns=['sentence','indices','tokenized_sentence','custom_mask','ispair'])
for idx,row in df.iterrows():
    sentence = row.sentence
    tokenized = pad_sequences(tokenizer.texts_to_sequences([row.sentence]),maxlen=T,padding='post')[0]
    aspect_i = [i[0] for i in row.triplets]
    opinion_i = [i[1] for i in row.triplets]
    true_pair = [tuple(i[0]+i[1]) for i in row.triplets]
    false_pair = []
    for i in range(len(aspect_i)):
        for j in range(len(aspect_i)):
            if i != j:
                joint = tuple(aspect_i[i]+opinion_i[j])
                if joint not in true_pair:
                    false_pair.append(joint)
    for i in true_pair:
        mask = np.zeros(shape=(T,),dtype=bool)
        for j in i:
            mask[j]=True
        d = pd.DataFrame({'sentence':[sentence],
             'indices':[i],
             'tokenized_sentence':[tokenized],
             'custom_mask':[mask],
             'ispair':[1]
             })
        df_pair = pd.concat([df_pair,d])
    for i in false_pair:
        mask = np.zeros(shape=(T,),dtype=bool)
        for j in i:
            mask[j]=True
        d = pd.DataFrame({'sentence':[sentence],
             'indices':[i],
             'tokenized_sentence':[tokenized],
             'custom_mask':[mask],
             'ispair':[0]
             })
        df_pair = pd.concat([df_pair,d])

In [152]:
df_pair.head(5)

Unnamed: 0,sentence,indices,tokenized_sentence,custom_mask,ispair
0,The bread is top notch as well,"(1, 3, 4)","[202, 15739, 1409, 254, 8301, 918, 188, 0, 0, ...","[False, True, False, True, True, False, False,...",1
0,I have to say they have one of the fastest del...,"(10, 11, 9)","[80, 4232, 138, 6115, 6102, 4232, 155, 126, 20...","[False, False, False, False, False, False, Fal...",1
0,Food is always fresh and hot ready to eat,"(0, 3)","[1586, 1409, 6156, 4392, 190, 1321, 1105, 138,...","[True, False, False, True, False, False, False...",1
0,Food is always fresh and hot ready to eat,"(0, 5)","[1586, 1409, 6156, 4392, 190, 1321, 1105, 138,...","[True, False, False, False, False, True, False...",1
0,Did I mention that the coffee is OUTSTANDING,"(5, 7)","[13550, 80, 14722, 6099, 202, 4567, 1409, 1456...","[False, False, False, False, False, True, Fals...",1


In [153]:
df_pair_train,df_pair_test = train_test_split(df_pair,test_size=0.25)

In [154]:
class_count_1,class_count_0 = df_pair_train.ispair.value_counts()
df_pair_train_class_0 = df_pair_train[df_pair_train.ispair == 0]
df_pair_train_class_1 = df_pair_train[df_pair_train.ispair == 1]


In [155]:
df_pair_train.ispair.value_counts()

ispair
1    3626
0    2777
Name: count, dtype: int64

In [156]:
df_pair_train_class_0_over = df_pair_train_class_0.sample(class_count_1,replace=True)
df_pair_train_over = pd.concat([df_pair_train_class_1,df_pair_train_class_0_over])

In [157]:
df_pair_train_over.ispair.value_counts()

ispair
1    3626
0    3626
Name: count, dtype: int64

In [158]:
X_train_tokenized = np.array([x for x in df_pair_train_over.tokenized_sentence],dtype=int)
X_test_tokenized = np.array([x for x in df_pair_test.tokenized_sentence],dtype=int)
X_train_mask = np.array([x for x in df_pair_train_over.custom_mask],dtype=bool)
X_test_mask = np.array([x for x in df_pair_test.custom_mask],dtype=bool)
y_train = np.array([x for x in df_pair_train_over.ispair],dtype=int)
y_test = np.array([x for x in df_pair_test.ispair],dtype=int)

In [159]:
def pair_classification_model(units):
    input1 = Input(shape=(71,))
    input2 = Input(shape=(71,),dtype=tf.bool)
    embeddings = embedding_layer(input1)
    x = Bidirectional(LSTM(units,return_sequences=True,dropout=0.5))(embeddings)
    x = LSTM(units)(x,mask=input2)
    outputs = Dense(1,activation='sigmoid')(x)
    model = Model([input1,input2],outputs)
    return model

In [173]:
model = pair_classification_model(32)
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [174]:
model.fit([X_train_tokenized,X_train_mask],y_train,epochs=20)
model.evaluate([X_test_tokenized,X_test_mask],y_test)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[0.24444632232189178, 0.9152224659919739]

In [175]:
y_pred = model.predict([X_test_tokenized,X_test_mask])
y_pred = np.squeeze((y_pred > 0.5).astype(int))
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.88      0.94      0.91       946
           1       0.95      0.90      0.92      1189

    accuracy                           0.92      2135
   macro avg       0.91      0.92      0.91      2135
weighted avg       0.92      0.92      0.92      2135



In [260]:
model.save('pair_classifier.h5')

  saving_api.save_model(


Polarity Classification

In [None]:
index_to_polarity = {0:'NEG',1:'NEU',2:'POS'}
polarity_to_index = {'NEG':0,'NEU':1,'POS':2}
df_polarity = pd.DataFrame(columns=['sentence','tokenized_sentence','polarity'])
df_polarity
for idx,row in df.iterrows():
    sentence = row.sentence.split(' ')
    for triplet in row.triplets:
        filtered_sent = ''
        for i in triplet[0]+triplet[1]:
            filtered_sent+=sentence[i]+' '
        tokenized = pad_sequences(tokenizer.texts_to_sequences([filtered_sent]),maxlen=25,padding='post')[0]
        d = pd.DataFrame({'sentence':[filtered_sent],'tokenized_sentence':[tokenized],'polarity':[polarity_to_index[triplet[2]]]})
        df_polarity = pd.concat([df_polarity,d])

In [99]:
df_polarity

Unnamed: 0,sentence,tokenized_sentence,mask,polarity
0,The bread is top notch as well,"[202, 15739, 1409, 254, 8301, 918, 188, 0, 0, ...","[False, True, False, True, True, False, False,...",2
0,I have to say they have one of the fastest del...,"[80, 4232, 138, 6115, 6102, 4232, 155, 126, 20...","[False, False, False, False, False, False, Fal...",2
0,Food is always fresh and hot ready to eat,"[1586, 1409, 6156, 4392, 190, 1321, 1105, 138,...","[True, False, False, True, False, False, False...",2
0,Food is always fresh and hot ready to eat,"[1586, 1409, 6156, 4392, 190, 1321, 1105, 138,...","[True, False, False, False, False, True, False...",2
0,Did I mention that the coffee is OUTSTANDING,"[13550, 80, 14722, 6099, 202, 4567, 1409, 1456...","[False, False, False, False, False, True, Fals...",2
...,...,...,...,...
0,The decor is very simple but comfortable,"[202, 27879, 1409, 13562, 6362, 13534, 14804, ...","[False, True, False, False, True, False, False...",2
0,The decor is very simple but comfortable,"[202, 27879, 1409, 13562, 6362, 13534, 14804, ...","[False, True, False, False, False, False, True...",2
0,whoever the jazz duo was they were on POINT,"[18072, 202, 1344, 4855, 4231, 6102, 3370, 152...","[False, False, True, True, False, False, False...",2
0,even the wine by the glass was good,"[1993, 202, 2957, 253, 202, 1033, 4231, 811, 0...","[False, False, True, True, True, True, False, ...",2


In [146]:
df_polarity_train,df_polarity_test = train_test_split(df_polarity,test_size=0.25)

In [148]:
class_count_2,class_count_0,class_count_1 = df_polarity_train.polarity.value_counts()
df_polarity_train_class_0 = df_polarity_train[df_polarity_train.polarity == 0]
df_polarity_train_class_1 = df_polarity_train[df_polarity_train.polarity == 1]
df_polarity_train_class_2 = df_polarity_train[df_polarity_train.polarity == 2]

In [149]:
class_count_0,class_count_1,class_count_2

(710, 222, 2679)

In [150]:
df_polarity_train_class_0_over = df_polarity_train_class_0.sample(class_count_2,replace=True)
df_polarity_train_class_1_over = df_polarity_train_class_1.sample(class_count_2,replace=True)

In [151]:
df_polarity_train_under = pd.concat([df_polarity_train_class_0_over,df_polarity_train_class_1_over,df_polarity_train_class_2],axis=0)
df_polarity_train_under.shape

(8037, 3)

In [153]:
X_train = np.array([x for x in df_polarity_train_under.tokenized_sentence],dtype=int)
X_test = np.array([x for x in df_polarity_test.tokenized_sentence],dtype=int)
y_train = np.array(df_polarity_train_under.polarity,dtype=int)
y_test = np.array(df_polarity_test.polarity,dtype=int)

In [154]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((8037, 25), (1204, 25), (8037,), (1204,))

In [158]:
def polarity_classifier(units):
    input = Input(shape=(25,))
    x = embedding_layer(input)
    x = LSTM(units, dropout=0.4, recurrent_dropout=0.4)(x)
    output = Dense(3,activation='softmax')(x)
    return Model(input,output)

In [163]:
model = polarity_classifier(32)
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [164]:
model.fit(X_train,y_train,epochs=40)
model.evaluate(X_test,y_test)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


[0.5995977520942688, 0.8239202499389648]

In [169]:
y_pred = model.predict(X_test)
y_pred = [np.argmax(x) for x in y_pred]
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.74      0.71      0.72       241
           1       0.35      0.57      0.43        80
           2       0.92      0.88      0.90       883

    accuracy                           0.82      1204
   macro avg       0.67      0.72      0.68      1204
weighted avg       0.85      0.82      0.83      1204



In [171]:
model.save('polarity_classifier.keras')

In [2]:
T1 = 71
T2 = 25
with open('tokenizer.pickle', 'rb') as f:
    tokenizer = pickle.load(f)
extractor = tf.keras.models.load_model('extractor_model.tf')
pair_classfier = tf.keras.models.load_model('pair_classifier.h5')
polarity_classifier = tf.keras.models.load_model('polarity_classifier.h5')

In [5]:
pair_classfier.summary()

Model: "model_15"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_33 (InputLayer)       [(None, 71)]                 0         []                            
                                                                                                  
 embedding (Embedding)       (None, 71, 100)              3392530   ['input_33[0][0]']            
                                                          0                                       
                                                                                                  
 bidirectional_16 (Bidirect  (None, 71, 64)               34048     ['embedding[0][0]']           
 ional)                                                                                           
                                                                                           

In [186]:
df.head(10)

Unnamed: 0,sentence,triplets
0,The bread is top notch as well,"[([1], [3, 4], POS)]"
1,I have to say they have one of the fastest del...,"[([10, 11], [9], POS)]"
2,Food is always fresh and hot ready to eat,"[([0], [3], POS), ([0], [5], POS)]"
3,Did I mention that the coffee is OUTSTANDING,"[([5], [7], POS)]"
4,Certainly not the best sushi in New York howev...,"[([15], [18], POS)]"
5,I trust the people at Go Sushi it never disapp...,"[([3], [1], POS)]"
6,Straight forward no surprises very decent Japa...,"[([6, 7], [5], POS)]"
7,BEST spicy tuna roll great asian salad,"[([5, 6], [4], POS), ([1, 2, 3], [0], POS)]"
8,Try the rose roll not on menu,"[([2, 3], [0], POS)]"
9,I love the drinks esp lychee martini and the f...,"[([3], [1], POS), ([5, 6], [1], POS), ([9], [1..."


In [268]:
def predict_triplets(sentence):
    sentences = sentence.split('.')
    sentences = [i for i in sentences if i]
    regex_sentence = [re.findall(r'\b[\'\w]+\b',i) for i in sentences]
    tokenized = pad_sequences(tokenizer.texts_to_sequences(np.array(sentences)),maxlen=T1,padding='post')
    labels = extractor.predict(tokenized)
    pair_sent = []
    mask = []
    aspects = []
    opinions = []
    aspects_i = []
    opinions_i = []
    for i in range(len(labels)):
        aspect_indices = []
        opinion_indices = []
        current_aspect_index = []
        current_opinion_index = []
        aspect = []
        opinion = []
        current_aspect = ''
        current_opinion = ''
        for j in range(len(labels[0])):
            if labels[i][j] == 2:
                if current_aspect_index:
                    aspect_indices.append(current_aspect_index)
                    aspect.append(current_aspect)
                current_aspect_index = [j]
                current_aspect = regex_sentence[i][j]+' '
            elif labels[i][j] == 3:
                current_aspect_index.append(j)
                current_aspect+=regex_sentence[i][j]+' '
            elif labels[i][j] == 4:
                if current_opinion_index:
                    opinion_indices.append(current_opinion_index)
                    opinion.append(current_opinion)
                current_opinion_index = [j]
                current_opinion = regex_sentence[i][j]+' '
            elif labels[i][j] == 5:
                current_opinion_index.append(j)
                current_opinion+=regex_sentence[i][j]+' '
        if current_aspect_index:
            aspect_indices.append(current_aspect_index)
            aspect.append(current_aspect)
        if current_opinion_index:
            opinion_indices.append(current_opinion_index)
            opinion.append(current_opinion)
        for x in range(len(aspect_indices)):
            for y in range(len(opinion_indices)):
                pair_sent.append(tokenized[i])
                m = np.zeros(shape=(T1),dtype=bool)
                m[aspect_indices[x]+opinion_indices[y]] = True
                mask.append(m)
                aspects_i.append(aspect_indices[x])
                opinions_i.append(opinion_indices[y])
                aspects.append(aspect[x])
                opinions.append(opinion[y])
    pair_sent = np.array(pair_sent,dtype=int)
    mask = np.array(mask,dtype=bool)
    pair_pred = pair_classfier.predict([pair_sent,mask])
    aspects_i = [aspects_i[x] for x in range(len(aspects_i)) if pair_pred[x][0]>=0.5] 
    opinions_i = [opinions_i[x] for x in range(len(opinions_i)) if pair_pred[x][0]>=0.5]
    aspects =  [aspects[x] for x in range(len(aspects)) if pair_pred[x][0]>=0.5]
    opinions = [opinions[x] for x in range(len(opinions)) if pair_pred[x][0]>=0.5]
    sentences = [x+y for x,y in zip(aspects,opinions)]
    tokenized_sentence = pad_sequences(tokenizer.texts_to_sequences(np.array(sentences)),maxlen=T2,padding='post')
    y_pred = polarity_classifier.predict(tokenized_sentence)
    y_pred = [np.argmax(x) for x in y_pred]
    triplets = []
    for i in range(len(y_pred)):
        sentiment = 'NEG'
        if y_pred[i]==1:
            sentiment = 'NEU'
        elif y_pred[i]==2:
            sentiment = "POS"
        triplets.append((aspects[i],opinions[i],sentiment))
    return triplets

In [269]:
predict_triplets('We sat outside on the terrace which was very pretty and private. Our waitress was wonderful and the food was absolutely delicious!!')



[('terrace ', 'pretty ', 'NEU'),
 ('terrace ', 'private ', 'POS'),
 ('waitress ', 'wonderful ', 'POS'),
 ('food ', 'delicious ', 'POS')]