# Assignment Two:  Sentiment Classification

For this exercise you will be using the "SemEval 2017 task 4" corpus provided on the module website, available through the following link: https://warwick.ac.uk/fac/sci/dcs/teaching/material/cs918/semeval-tweets.tar.bz2 You will focus particularly on Subtask A, i.e. classifying the overall sentiment of a tweet as positive, negative or neutral.

You are requested to produce a standalone Python program or Jupyter notebook for coursework submission. The input to your program is the SemEval data downloaded. Note that TAs need to run your program on their own machine by using the original SemEval data. As such, don’t submit a Python program that takes as input some preprocessed files.

#### Import necessary packages
You may import more packages here.

In [1]:
# Import necessary packages
import re
from os.path import join
import numpy as np
from sklearn.model_selection import GridSearchCV 
from sklearn.naive_bayes import MultinomialNB
from sklearn import naive_bayes
import re
import nltk
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import os
import pickle

from keras.layers import Dense, Dropout, Embedding, LSTM

from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical

from keras.preprocessing.sequence import pad_sequences

from keras.preprocessing.text import Tokenizer

import numpy as np

import gensim

from sklearn.linear_model import LogisticRegression

#seed keras model 
from numpy.random import seed
seed(1)
import tensorflow
tensorflow.random.set_seed(2)

from keras.models import Sequential
from keras.models import load_model

from nltk.corpus import stopwords


In [2]:
# Define test sets
testsets = ['twitter-test1.txt', 'twitter-test2.txt', 'twitter-test3.txt']

In [3]:
# Skeleton: Evaluation code for the test sets
def read_test(testset):
    '''
    readin the testset and return a dictionary
    :param testset: str, the file name of the testset to compare
    '''
    id_gts = {}
    with open(testset, 'r', encoding='utf8') as fh:
        for line in fh:
            fields = line.split('\t')
            tweetid = fields[0]
            gt = fields[1]

            id_gts[tweetid] = gt

    return id_gts


def confusion(id_preds, testset, classifier):
    '''
    print the confusion matrix of {'positive', 'netative'} between preds and testset
    :param id_preds: a dictionary of predictions formated as {<tweetid>:<sentiment>, ... }
    :param testset: str, the file name of the testset to compare
    :classifier: str, the name of the classifier
    '''
    id_gts = read_test(testset)

    gts = []
    for m, c1 in id_gts.items():
        if c1 not in gts:
            gts.append(c1)

    gts = ['positive', 'negative', 'neutral']

    conf = {}
    for c1 in gts:
        conf[c1] = {}
        for c2 in gts:
            conf[c1][c2] = 0

    for tweetid, gt in id_gts.items():
        if tweetid in id_preds:
            pred = id_preds[tweetid]
        else:
            pred = 'neutral'
        conf[pred][gt] += 1

    print(''.ljust(12) + '  '.join(gts))

    for c1 in gts:
        print(c1.ljust(12), end='')
        for c2 in gts:
            if sum(conf[c1].values()) > 0:
                print('%.3f     ' % (conf[c1][c2] / float(sum(conf[c1].values()))), end='')
            else:
                print('0.000     ', end='')
        print('')

    print('')


def evaluate(id_preds, testset, classifier):
    '''
    print the macro-F1 score of {'positive', 'netative'} between preds and testset
    :param id_preds: a dictionary of predictions formated as {<tweetid>:<sentiment>, ... }
    :param testset: str, the file name of the testset to compare
    :classifier: str, the name of the classifier
    '''
    id_gts = read_test(testset)

    acc_by_class = {}
    for gt in ['positive', 'negative', 'neutral']:
        acc_by_class[gt] = {'tp': 0, 'fp': 0, 'tn': 0, 'fn': 0}

    catf1s = {}

    ok = 0
    for tweetid, gt in id_gts.items():
        if tweetid in id_preds:
            pred = id_preds[tweetid]
        else:
            pred = 'neutral'

        if gt == pred:
            ok += 1
            acc_by_class[gt]['tp'] += 1
        else:
            acc_by_class[gt]['fn'] += 1
            acc_by_class[pred]['fp'] += 1

    catcount = 0
    itemcount = 0
    macro = {'p': 0, 'r': 0, 'f1': 0}
    micro = {'p': 0, 'r': 0, 'f1': 0}
    semevalmacro = {'p': 0, 'r': 0, 'f1': 0}

    microtp = 0
    microfp = 0
    microtn = 0
    microfn = 0
    for cat, acc in acc_by_class.items():
        catcount += 1

        microtp += acc['tp']
        microfp += acc['fp']
        microtn += acc['tn']
        microfn += acc['fn']

        p = 0
        if (acc['tp'] + acc['fp']) > 0:
            p = float(acc['tp']) / (acc['tp'] + acc['fp'])

        r = 0
        if (acc['tp'] + acc['fn']) > 0:
            r = float(acc['tp']) / (acc['tp'] + acc['fn'])

        f1 = 0
        if (p + r) > 0:
            f1 = 2 * p * r / (p + r)

        catf1s[cat] = f1

        n = acc['tp'] + acc['fn']

        macro['p'] += p
        macro['r'] += r
        macro['f1'] += f1

        if cat in ['positive', 'negative']:
            semevalmacro['p'] += p
            semevalmacro['r'] += r
            semevalmacro['f1'] += f1

        itemcount += n

    micro['p'] = float(microtp) / float(microtp + microfp)
    micro['r'] = float(microtp) / float(microtp + microfn)
    micro['f1'] = 2 * float(micro['p']) * micro['r'] / float(micro['p'] + micro['r'])

    semevalmacrof1 = semevalmacro['f1'] / 2

    print(testset + ' (' + classifier + '): %.3f' % semevalmacrof1)

In [4]:
stopwords = stopwords.words('english')

def preprocess(filename):
    clean_content_tokenized = []
    tweet_sentiment =[]
    tweet_id_list =[]
    documents = []
    
    #with open(filename,'r', encoding="utf8") as f:
    with open(filename,'r', encoding="utf8") as f:
        for idx,tweet in enumerate(f):
            tweet = tweet.split('\t') #split columns by tab

            # store 1st col of tweet as tweet_id
            tweet_id = tweet[0]
            tweet_id_list.append(tweet_id)
            # second col as sentiment (pos/neg/neutral)
            sentiment = tweet[1]
            tweet_sentiment.append(tweet[1])
            # third col as tweet_message
            tweet_message = tweet[2]

            # @user
            content = re.sub(r"@[A-Za-z0-9_]+", " USERNAME ", tweet_message.lower())
            # URL link
            content = re.sub(r"http\S+", " URLLINK ", content)
            #Happy Faces
            content = re.sub(r"(:\))|(:\-\))|(:d)|(:\-d)|(:D)|(XD)|(xD)|(;D)|(;\-D)|(;\))|(;\-\))|(lol)|(LOL)|(LOFL)|(lofl)|(lmfao)|(<3)", " HEMOTICON ", content, flags = re.IGNORECASE)
            #Sad Faces
            content = re.sub(r"(:\()|(:\-\()", " SEMOTICON ", content)
            #Remove repeater letter in a word e.g. heeeello to hello
            content = re.sub(r"([A-Za-z])\1{2,}", r"\1", content)
            #Replace all whitespace characters 
            content = re.sub(r"\s", " ", content)
            # Replace EOS with END
            content = re.sub(r"(\.|!|\?) ", " END ", content) 
            # Remove non-alphanumeric characters except spaces
            content = re.sub(r"[^A-Za-z0-9 ]", "", content) 
            #Remove Pure Digits
            content = re.sub(r"\bd+\b", "", content)
            #Remove Single Letter words eg "a"
            content = re.sub(r"\b[a-z0-9]\b", "", content)
            #tokenize content
            document =  nltk.word_tokenize(content)
            clean_content_tokenized.append(document)
            
            
    return clean_content_tokenized, tweet_sentiment, tweet_id_list

def treebank_pos(word_tag):
    #Return TREEBANK TAG Part-of-speech tag
    if word_tag.startswith('V'): #verb
        return 'v'
    elif word_tag.startswith('N'): #noun 
        return 'n'
    elif  word_tag.startswith('J'): #adjective
        return 'a'
    elif word_tag.startswith('R'): #adverb
        return 'r'
    else:
        #set to noun if none is satistfied 
        return 'n'
    
def lemmatize_content (content):
    content_lemmatized = []
    content_pos=[]
    content_clean=[]
    
    #lemmatize using POS tag
    lemmatizer = nltk.stem.WordNetLemmatizer()

    #POS tagging
    for tweet in content:
        #assign POS to each word 
        temp_pos = nltk.pos_tag(tweet)
        content_pos.append(temp_pos)  
        
    for tweet in content_pos:
        #Perform lemmatization on each POS word and return the lemmatized word as list
        temp_lem = [ lemmatizer.lemmatize(word[0], pos=treebank_pos(word[1])) for word in tweet]
        #Remove stop words
        temp_lem = [word for word in temp_lem if word not in stopwords] 
        content_lemmatized.append(temp_lem) 

    for tweet in content_lemmatized:
        #Concatenate content 
        temp_concat_content = " ".join(tweet)
        content_clean.append(temp_concat_content)
        
    return content_clean, content_lemmatized

#### Load training set, dev set and testing set
Here, you need to load the training set, the development set and the test set. For better classification results, you may need to preprocess tweets before sending them to the classifiers.

In [5]:
# Load training set, dev set and testing set
preprocessed_tweets = {}
preprocessed_tokenized_tweets = {}
tweetgts = {} 
tweetids = {}

for dataset in ['twitter-training-data.txt'] + testsets:
    
    preprocessed_tweets[dataset] = [] #preprocessed tweet
    preprocessed_tokenized_tweets[dataset] = [] #preprocessed and tokenized tweet 
    tweetids[dataset] = [] #tweet id
    tweetgts[dataset] = [] #tweet sentiment
    print(dataset)
    
    
    # write code to read in the datasets here
    
    #retrieve pickled preprocessed Training set
    if dataset == 'twitter-training-data.txt':
        
        if os.path.isfile('train_preprocess.pkl'):
            with open('train_preprocess.pkl', 'rb') as file:
                preprocessed_tweets[dataset], preprocessed_tokenized_tweets[dataset],tweetgts[dataset],tweetids[dataset] = pickle.load(file)

        else:
            clean_content_tokenized,tweetgts[dataset],tweetids[dataset]  =preprocess(dataset)
            preprocessed_tweets[dataset], preprocessed_tokenized_tweets[dataset] = lemmatize_content(clean_content_tokenized)
                        
            pickle_file = open('train_preprocess.pkl', 'wb')    
            dump_params = preprocessed_tweets[dataset], preprocessed_tokenized_tweets[dataset],tweetgts[dataset],tweetids[dataset]
            pickle.dump(dump_params, pickle_file)
            pickle_file.close()
            
    else:
        clean_content_tokenized,tweetgts[dataset],tweetids[dataset]  =preprocess(dataset)
        preprocessed_tweets[dataset], preprocessed_tokenized_tweets[dataset] = lemmatize_content(clean_content_tokenized)
    

twitter-training-data.txt
twitter-test1.txt
twitter-test2.txt
twitter-test3.txt


#### Build sentiment classifiers
You need to create your own classifiers (at least 3 classifiers). For each classifier, you can choose between the bag-of-word features and the word-embedding-based features. Each classifier has to be evaluated over 3 test sets. Make sure your classifier produce consistent performance across the test sets. Marking will be based on the performance over all 5 test sets (2 of them are not provided to you).

## Feature Function Declaration

In [6]:
#feature selection

# word level tf-idf
def tfidf_word_level(content_train):
    #Vectorize content by word with 2000 max features capping
    tfidf_vect = TfidfVectorizer(analyzer='word', token_pattern=r'\S+', max_features= 2000)
    tfidf_vect.fit(content_train)
    xtrain_tfidf =  tfidf_vect.transform(content_train)

    xtrain_tfidf_np = xtrain_tfidf.todense()
    xtrain_tfidf_np = np.array(xtrain_tfidf_np)

    #return train set and vectorizer
    return (xtrain_tfidf_np, tfidf_vect)

# ngram level tf-idf
def tfidf_ngram_level(content_train):
    #Perform Bigram and Trigram Vectorization with max features of 5000
    tfidf_vect_ngram = TfidfVectorizer(analyzer='word', token_pattern=r'\S+', ngram_range=(2,3), max_features=5000) #non whitespace chars
    tfidf_vect_ngram.fit(content_train)
    xtrain_tfidf_ngram =  tfidf_vect_ngram.transform(content_train)
    
    xtrain_tfidf_ngram_np = xtrain_tfidf_ngram.todense()
    xtrain_tfidf_ngram_np= np.array(xtrain_tfidf_ngram_np)
    
    #return train set and vectorizer
    return (xtrain_tfidf_ngram_np, tfidf_vect_ngram)


# Sentiment By Lexicon
def sentiment_by_lexicon(split_corpus):
    pos_num = 0
    neg_num = 0
    
    def open_file(lexicon_file):
        with open(lexicon_file,'r') as file:
            lexicon_content = set(file.read().splitlines())
            return lexicon_content
            
    positive_words= open_file('positive-words.txt')
    negative_words = open_file('negative-words.txt')
    
    positive_negative= []
    for tweet in split_corpus:
        matched_pos = len( [word for word in tweet if word in positive_words] )
        matched_neg = len( [word for word in tweet if word in negative_words] )
        positive_negative.append([matched_pos, matched_neg])
        
    return np.array(positive_negative)

# Sentiment By Emoticon 
def sentiment_by_emoticon(split_corpus):
    happy_emoticon = 0
    sad_emoticon = 0
    
    emoticon_feature= []
    for tweet in split_corpus:
        happy_emoticon = len( [word for word in tweet if word == 'HEMOTICON'] ) 
        sad_emoticon = len( [word for word in tweet if word == 'SEMOTICON'] )
        
        emoticon_feature.append([happy_emoticon, sad_emoticon])
        
    return np.array(emoticon_feature)

#Bag of Words
def bag_of_word(content_train):
    
    #BOW using countvectorizer with max of 500 features
    bow_vectorizer = CountVectorizer(max_df=0.90, min_df=2, max_features=500)
    x_bow_train = bow_vectorizer.fit_transform(content_train)
    x_bow = np.array(x_bow_train.todense())
    
     #return train set and vectorizer
    return (x_bow, bow_vectorizer)


if os.path.isfile('word_2_vec.pkl'):
    with open('word_2_vec.pkl', 'rb') as file:
        model_w2v = pickle.load(file)
else:
    pickle_file = open('word_2_vec.pkl', 'wb')    

    model_w2v = gensim.models.Word2Vec(preprocessed_tweets['twitter-training-data.txt'],
                size=1000, # number of features variables
                window=10, # window size
                min_count=10, # ignore words with frequency<2                     
                sg = 1, # skip-gram model
                seed = 34
    ) 
    
    pickle.dump(model_w2v, pickle_file)
    pickle_file.close()
            


def word_vector(tokens, size):
    
    vec = np.zeros(size).reshape((1, size))
    count = 0
    for word in tokens:
        try:
            vec += model_w2v[word].reshape((1, size))
            count += 1.
        #throw exception if token is not in the vocabaulary
        except KeyError: 
            continue
            
    if count != 0:
        vec /= count
    return vec

def glove_LSTM_model(content_train, Y_train, no_tokens , embedding_dim):

    # tokenize input content
    tk = Tokenizer(num_words=no_tokens)
    tk.fit_on_texts(content_train)
    #convert text to sequence
    content_seq = tk.texts_to_sequences(content_train)
    
    #Build Glove Dictionary
    glove_file = 'glove.6B.100d.txt'
    embedding_dict = {}
    with open(glove_file,'r', encoding="utf8") as glove:
        for line in glove:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            embedding_dict[word] = vector

    #zero padding numpy array 
    embedding_matrix = np.zeros((no_tokens, embedding_dim))
    
    #retrieve 100 dimensional vector for each word
    for word, index in tk.word_index.items():
        if index < no_tokens:
            vect = embedding_dict.get(word)
            if vect is not None:
                embedding_matrix[index] = vect
        else:
            break
    
    #get max train length
    max_length = np.max([len(text.split()) for text in content_train])
    #pad sequence
    content_seq_trunc = pad_sequences(content_seq, maxlen=max_length)
    
    #encoding output
    encoder = LabelEncoder()
    y_train_encoded = encoder.fit_transform(Y_train)
    y_train_categorical = to_categorical(y_train_encoded)
    
    #train model
    model = Sequential()
    model.add(Embedding(no_tokens, embedding_dim, weights = [embedding_matrix], trainable=False, input_length=max_length ))
    model.add((LSTM(80, return_sequences=True)))
    model.add(Dropout(0.2))
    model.add((LSTM(32)))
    model.add(Dropout(0.2))
    model.add(Dense(units=3, activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='rmsprop',metrics = ['accuracy'])
    History = model.fit(content_seq_trunc,y_train_categorical,epochs = 10,batch_size=100,validation_split =0.2)
    
    return tk, max_length, model

In [8]:
# Buid traditional sentiment classifiers. An example classifier name 'svm' is given
# in the code below. You should replace the other two classifier names
# with your own choices. For features used for classifier training, 
# the 'bow' feature is given in the code. But you could also explore the 
# use of other features.
for classifier in ['NaiveBayes', 'MaxEnt', 'LSTM']: # You may rename the names of the classifiers to something more descriptive
        
    #Training Models
    trainset = 'twitter-training-data.txt'
    labels_to_numbers = {"positive": 0, "negative": 1, "neutral": 2}
    labels = [labels_to_numbers[sent] for sent in tweetgts[trainset]]
    Y_train = np.array(labels)

    if classifier == 'NaiveBayes':
        print('Training ' + classifier)

        
        if os.path.isfile('NB.pkl'):
            with open('NB.pkl', 'rb') as file:
                model_nb,tfidf_word_transformer,tfidf_ngram_transformer = pickle.load(file)
        else:
      
            #extract features: word level tf-idf, ngram level tf-idf, setiment by lexicon and sentiment by emoticon
            (xtrain_tfidf_np, tfidf_word_transformer) = tfidf_word_level(preprocessed_tweets[trainset])        
            (xtrain_tfidf_ngram_np, tfidf_ngram_transformer) = tfidf_ngram_level(preprocessed_tweets[trainset])
            xtrain_sentiment_lexicon = sentiment_by_lexicon(preprocessed_tokenized_tweets[trainset])
            xtrain_sentiment_emoticon = sentiment_by_emoticon(preprocessed_tokenized_tweets[trainset])
            #concatenate features
            xtrain_concat_features = np.concatenate((xtrain_tfidf_np, xtrain_tfidf_ngram_np,xtrain_sentiment_lexicon, xtrain_sentiment_emoticon), axis=1)

            # Train sentiment classifier1
            model_nb = naive_bayes.MultinomialNB(alpha = 1, fit_prior = True)
            model_nb.fit(xtrain_concat_features,Y_train)
            pickle_dump = model_nb,tfidf_word_transformer,tfidf_ngram_transformer
            
            pickle_file = open('NB.pkl', 'wb')        
            pickle.dump(pickle_dump, pickle_file)
            pickle_file.close()
                
    elif classifier == 'MaxEnt':
        print('Training ' + classifier)
        
        if os.path.isfile('ME.pkl'):
            with open('ME.pkl', 'rb') as file:
                 model_me,tfidf_word_transformer,tfidf_ngram_transformer = pickle.load(file)
        else:
           # TODO: extract features for training classifier2
            #extract features: word level tf-idf, ngram level tf-idf, setiment by lexicon and sentiment by emoticon
            (xtrain_tfidf_np, tfidf_word_transformer) = tfidf_word_level(preprocessed_tweets[trainset])        
            (xtrain_tfidf_ngram_np, tfidf_ngram_transformer) = tfidf_ngram_level(preprocessed_tweets[trainset])
            xtrain_sentiment_lexicon = sentiment_by_lexicon(preprocessed_tokenized_tweets[trainset])
            xtrain_sentiment_emoticon = sentiment_by_emoticon(preprocessed_tokenized_tweets[trainset])
            #concatenate features
            xtrain_concat_features = np.concatenate((xtrain_tfidf_np, xtrain_tfidf_ngram_np,xtrain_sentiment_lexicon, xtrain_sentiment_emoticon), axis=1)

            # TODO: train sentiment classifier2
            model_me = LogisticRegression()
            model_me.fit(xtrain_concat_features, Y_train)
            
            pickle_file = open('ME.pkl', 'wb')    
            pickle_dump = model_me,tfidf_word_transformer,tfidf_ngram_transformer
            pickle.dump(pickle_dump, pickle_file)
            pickle_file.close()
            
        
    elif classifier == 'LSTM':
        print('Training ' + classifier)
        
        
        # TODO: extract features for training classifier3  
        # TODO: train sentiment classifier3
        no_tokens = 5000  # Max no of tokens
        embedding_dim = 100  # Embedding dimensionality

        lstm_model_save_file = 'LSTM_model.h5'
        lstm_parameters_save_file = 'LSTM.pkl'
        
        if os.path.isfile(lstm_parameters_save_file) & os.path.isfile(lstm_model_save_file):
            with open(lstm_parameters_save_file, 'rb') as file:
                #retrieve parameters
                tokenizer_lstm, max_length_lstm = pickle.load(file)
                #retrieve model
                lstm_model = load_model(lstm_model_save_file)
        else:
            #fit model
            tokenizer_lstm, max_length_lstm, lstm_model = glove_LSTM_model(preprocessed_tweets[trainset], Y_train, no_tokens , embedding_dim)
            
            #save parameters
            pickle_file = open(lstm_parameters_save_file, 'wb') 
            pickle_dump = tokenizer_lstm, max_length_lstm
            pickle.dump(pickle_dump, pickle_file)
            pickle_file.close()
            
            #save model
            lstm_model.save(lstm_model_save_file)
        

    # Predition performance of the classifiers
    for testset in testsets:
        id_preds = {}
        
        # write the prediction and evaluation code here
        if classifier == 'NaiveBayes':
            #transform test set corpus into features:word level tf-idf, ngram level tf-idf, setiment by lexicon and sentiment by emoticon
            # word level tf-idf
            xtest_tfidf =  tfidf_word_transformer.transform(preprocessed_tweets[testset])
            xtest_tfidf_np = xtest_tfidf.todense()
            xtest_tfidf_np= np.array(xtest_tfidf_np)
            
            # ngram level tf-idf
            xtest_tfidf_ngram =  tfidf_ngram_transformer.transform(preprocessed_tweets[testset])
            xtest_tfidf_ngram_np = xtest_tfidf_ngram.todense()
            xtest_tfidf_ngram_np= np.array(xtest_tfidf_ngram_np)
            
            xtest_sentiment_lexicon = sentiment_by_lexicon(preprocessed_tokenized_tweets[testset])
            xest_sentiment_emoticon = sentiment_by_emoticon(preprocessed_tokenized_tweets[testset])
            
            #concatenate features
            xtest_concat_features = np.concatenate((xtest_tfidf_np, xtest_tfidf_ngram_np,xtest_sentiment_lexicon,xest_sentiment_emoticon), axis=1)
            Y_predicted = model_nb.predict(xtest_concat_features)
            
            
        elif classifier == 'MaxEnt':
            #transform test set corpus into features:word level tf-idf, ngram level tf-idf, setiment by lexicon and sentiment by emoticon
            # word level tf-idf
            xtest_tfidf =  tfidf_word_transformer.transform(preprocessed_tweets[testset])
            xtest_tfidf_np = xtest_tfidf.todense()
            xtest_tfidf_np= np.array(xtest_tfidf_np)
            
            # ngram level tf-idf
            xtest_tfidf_ngram =  tfidf_ngram_transformer.transform(preprocessed_tweets[testset])
            xtest_tfidf_ngram_np = xtest_tfidf_ngram.todense()
            xtest_tfidf_ngram_np= np.array(xtest_tfidf_ngram_np)
            
            xtest_sentiment_lexicon = sentiment_by_lexicon(preprocessed_tokenized_tweets[testset])
            
            xest_sentiment_emoticon = sentiment_by_emoticon(preprocessed_tokenized_tweets[testset])
            
            #concatenate features
            xtest_concat_features = np.concatenate((xtest_tfidf_np, xtest_tfidf_ngram_np,xtest_sentiment_lexicon,xest_sentiment_emoticon), axis=1)
            Y_predicted = model_me.predict(xtest_concat_features)
            
        elif classifier == 'LSTM':
            #tokenize and pad test tweets
            xtest_seq = tokenizer_lstm.texts_to_sequences(preprocessed_tweets[testset])
            xtest_seq_trunc = pad_sequences(xtest_seq, maxlen=max_length_lstm)
            
            predict_lstm_categorical = lstm_model.predict(xtest_seq_trunc)
            Y_predicted = np.argmax(predict_lstm_categorical, axis=1)
           
        
        #cast predicted values back to labels
        numbers_to_labels = {0:"positive" ,  1 :"negative",  2:"neutral"}
        labels = [numbers_to_labels[i] for i in Y_predicted]
        id_preds = dict(zip(tweetids[testset], labels))
        
        evaluate(id_preds, testset, classifier)
        #confusion(id_preds, testset_path, classifier)

Training NaiveBayes
twitter-test1.txt (NaiveBayes): 0.565
twitter-test2.txt (NaiveBayes): 0.593
twitter-test3.txt (NaiveBayes): 0.567
Training MaxEnt
twitter-test1.txt (MaxEnt): 0.571
twitter-test2.txt (MaxEnt): 0.597
twitter-test3.txt (MaxEnt): 0.551
Training LSTM
twitter-test1.txt (LSTM): 0.614
twitter-test2.txt (LSTM): 0.641
twitter-test3.txt (LSTM): 0.578
