In [1]:
import pandas as pd
import numpy as np
import nltk
import itertools
from nltk.corpus import sentiwordnet as swn
from normalization import normalize_accented_characters, html_parser, strip_html
from utils import display_evaluation_metrics, display_confusion_matrix, display_classification_report
nltk.download('averaged_perceptron_tagger')
nltk.download('sentiwordnet')
import dynet as dy
from nltk.corpus import wordnet as wn
from collections import defaultdict
import gc

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package sentiwordnet to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!


## Movie review dataset

In [2]:
def prepare_movie_dataset(train_start,train_end,test_start,test_end) : 

    dataset = pd.read_csv(r'datasets/movie_reviews.csv')
    print('dataset size : ',dataset.shape[0])

    train_data = dataset[train_start:train_end]
    test_data = dataset[test_start:test_end]
    
    print('Train_X : ',train_data.shape[0])
    print('Test_X  : ',test_data.shape[0])

    test_reviews = np.array(test_data['review'])
    test_sentiments = np.array(test_data['sentiment'])

    return train_data,test_reviews,test_sentiments

def prepare_labeled_data(train_start,train_end,test_start,test_end) : 
    
    labeled_data=open("datasets/labeledTrainData.tsv","r")
    data=labeled_data.readlines()
    data=[d.split("\t") for d in data]
    sa_data=pd.DataFrame(data,columns=['ind','sentiment','review'])
    sa_data=sa_data[['sentiment','review']]
    
    print('dataset size : ',sa_data.shape[0])

    train_data = sa_data[train_start:train_end]
    test_data = sa_data[test_start:test_end]
    
    print('Train_X : ',train_data.shape[0])
    print('Test_X  : ',test_data.shape[0])

    test_reviews = np.array(test_data['review'])
    test_sentiments = np.array(test_data['sentiment'])

    return train_data,test_reviews,test_sentiments

train_x,test_x,test_y=prepare_movie_dataset(1,1000,1000,2000)

dataset size :  50000
Train_X :  999
Test_X  :  1000


## Evaluation for unsupervised Lexicon sentiment tagging

#### compare against the sentence tagging (already provided in the dataset )

[add markdown #11 here]

In [3]:
### BORROWED FROM THE AR_SARKAR METRIC
def analyze_sentiment_sentiwordnet_lexicon(review,verbose=False):
    
    
    #review = normalize_accented_characters(review)
    #review = review.decode('utf-8')
    review = html_parser.unescape(review)
    review = strip_html(review)
    
    text_tokens = nltk.word_tokenize(review)
    tagged_text = nltk.pos_tag(text_tokens)
    pos_score = neg_score = token_count = obj_score = 0

    for word, tag in tagged_text:
        ss_set = None
        if 'NN' in tag and swn.senti_synsets(word, 'n'):
            ss_set = list(swn.senti_synsets(word, 'n'))
            if ss_set : 
                ss_set=ss_set[0]
        elif 'VB' in tag and swn.senti_synsets(word, 'v'):
            ss_set = list(swn.senti_synsets(word, 'v'))
            if ss_set : 
                ss_set=ss_set[0]
        elif 'JJ' in tag and swn.senti_synsets(word, 'a'):
            ss_set = list(swn.senti_synsets(word, 'a'))
            if ss_set : 
                ss_set=ss_set[0]
        elif 'RB' in tag and swn.senti_synsets(word, 'r'):
            ss_set = list(swn.senti_synsets(word, 'r'))
            if ss_set : 
                ss_set=ss_set[0]
        
        if ss_set:
            
            pos_score += ss_set.pos_score()
            neg_score += ss_set.neg_score()
            obj_score += ss_set.obj_score()
            token_count += 1
    
    
    final_score = pos_score - neg_score
    norm_final_score = round(float(final_score) / token_count, 2)
    final_sentiment = 'positive' if norm_final_score >= 0 else 'negative'
    if verbose:
        norm_obj_score = round(float(obj_score) / token_count, 2)
        norm_pos_score = round(float(pos_score) / token_count, 2)
        norm_neg_score = round(float(neg_score) / token_count, 2)
        
        sentiment_frame = pd.DataFrame([[final_sentiment, norm_obj_score,
                                         norm_pos_score, norm_neg_score,
                                         norm_final_score]],
                                         columns=pd.MultiIndex(levels=[['SENTIMENT STATS:'], 
                                                                      ['Predicted Sentiment', 'Objectivity',
                                                                       'Positive', 'Negative', 'Overall']], 
                                                              labels=[[0,0,0,0,0],[0,1,2,3,4]]))
        print (sentiment_frame)   
    return final_sentiment
            
                                                               
def evaluate_lexicons(TRUE_LABELS,PREDICTED_LABELS,POS_CLASS,NEG_CLASS) : 

    print ('Performance metrics:')
    display_evaluation_metrics(true_labels=TRUE_LABELS,
                               predicted_labels=PREDICTED_LABELS,
                               positive_class=str(POS_CLASS))  
    print ('\nConfusion Matrix:'             )              
    display_confusion_matrix(true_labels=TRUE_LABELS,
                             predicted_labels=PREDICTED_LABELS,
                             classes=[str(POS_CLASS),str(NEG_CLASS)])
    print ('\nClassification report:' )                        
    display_classification_report(true_labels=TRUE_LABELS,
                                  predicted_labels=PREDICTED_LABELS,
                                  classes=[str(POS_CLASS),str(NEG_CLASS)])
    return

                               

## Basline lexicon evaluation

#### movie dataset 

In [6]:
train_x,test_x,test_y=prepare_movie_dataset(0,1000,1000,2000)
sentiwordnet_predictions = [analyze_sentiment_sentiwordnet_lexicon(review) for review in test_x]
evaluate_lexicons(test_y.tolist(),sentiwordnet_predictions,'positive','negative')

dataset size :  50000
Train_X :  1000
Test_X  :  1000


KeyboardInterrupt: 

#### labeled dataset 

In [None]:
train_x,test_x,test_y=prepare_labeled_data(0,1000,1000,2000)
sentiwordnet_predictions = [analyze_sentiment_sentiwordnet_lexicon(review) for review in test_x]
binary_predicted=['1' if p=='positive' else '0' for p in sentiwordnet_predictions ]
evaluate_lexicons(test_y.tolist(),binary_predicted,'1','0')

### Simple network for learning (do afterwards)

In [5]:
## SIMPLE NETWORK WITH THE sigma(V*tanh(WX+B)) ## for the XOR problem
# create a parameter collection and add the parameters.
m = dy.ParameterCollection()
W = m.add_parameters((8,2))
V = m.add_parameters((1,8))
b = m.add_parameters((8))

dy.renew_cg() # new computation graph. not strictly needed here, but good practice.
b.value() ## bias values
x=dy.vecInput(2) ## 2 sized inputs 
output=dy.logistic(V*(dy.tanh(W*x)+b)) ## output node

y = dy.scalarInput(0) ## objective function
loss = dy.binary_log_loss(output,y) ## loss function
## trainer with the initialized parameters m 
trainer=dy.SimpleSGDTrainer(m)
x.set([1,0])
y.set(1)
loss_value = loss.value() # this performs a forward through the network.
print("the loss before step is:",loss_value)

loss.backward()  # compute the gradients
trainer.update()

loss_value = loss.value(recalculate=True) 
print("the loss after step is:",loss_value)
pc = dy.ParameterCollection()
NUM_LAYERS=2
INPUT_DIM=50
HIDDEN_DIM=10
builder = dy.LSTMBuilder(NUM_LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
s0 = builder.initial_state()
x1 = dy.vecInput(INPUT_DIM)
s1=s0.add_input(x1)
y1 = s1.output()
s2=s1.add_input(x1) # we can add another input
y2=s2.output()

the loss before step is: 1.0331029891967773
the loss after step is: 0.7983773350715637


### IMPLEMENTING 2.1 SECTION OF GOOGLE PAPER FOR LEXICON EXPANSION [this is for the lexicon expansion ]

In [None]:
ALL_WORDS=list(wn.words())
SCORES=defaultdict()
df=pd.DataFrame(ALL_WORDS,columns=['word'])

swn.all_
    

### Comparing the lexicons from stanford paper "Incuding Domain-Specific Sentiment Lexicons from Unalabeled Copora"

In [4]:
import socialsent_util
def load_lexicon(name, remove_neutral=True):
    lexicon = socialsent_util.load_json("./lexicons_socialsent/"+ name + '.json')
    return {w: p for w, p in lexicon.items() if p != 0} if remove_neutral else lexicon

def compare_lexicons_binary(print_disagreements=False):
    lexicons = {
        "inquirer": load_lexicon("inquirer", False),
        "mpqa": load_lexicon("mpqa", False),
        "bingliu": load_lexicon("bingliu", False),
    }

    for l in lexicons:
        print( l, len(lexicons[l]), len([w for w in lexicons[l] if lexicons[l][w] != 0]))

    for l1, l2 in itertools.combinations(lexicons.keys(), 2):
        ps1, ps2 = lexicons[l1], lexicons[l2]
        common_words = set(ps1.keys()) & set(ps2.keys())
        print( l1, l2, "agreement: {:.2f}".format(
            100.0 * sum(1 if ps1[w] == ps2[w] else 0 for w in common_words) / len(common_words)))
        common_words = set([word for word in ps1.keys() if ps1[word] != 0]) & \
                       set([word for word in ps2.keys() if ps2[word] != 0])  
        print (l1, l2, "agreement ignoring neutral: {:.2f}".format(
            100.0 * sum(1 if ps1[w] * ps2[w] == 1 else 0 for w in common_words) / len(common_words)))
        
        if print_disagreements and l1 == 'opinion' and l2 == 'inquirer':
            for w in common_words:
                if lexicons[l1][w] != lexicons[l2][w]:
                    print (w, lexicons[l1][w], lexicons[l2][w])
      
    
## ALL THESE LEXICONS ARE 2-CLASS SENTIMENTS. 1 = POSITIVE; -1 = NEGATIVE
finance_lexicons=load_lexicon('finance')
bingliu_lexicons=load_lexicon('bingliu')
inquirer_lexicons=load_lexicon('inquirer')
mpqa_lexicons=load_lexicon('mpqa')
twitter_lexicons=load_lexicon('twitter')

In [5]:

ZEEYANG_LEXICONS='lexicons_zeeyang'
def read_zeeyang_lexicons(fname) : 
    
    polarities=defaultdict()
    for line in open(fname,'r') : 
        token=line.split(" ")[0]
        score=line.split(" ")[1]
        polarities[token]=score
        
    return polarities

## THESE LEXICONS HAVE CONTINOUS SCORES (BETWEEN -1 AND 1 )
senti140_lexicons=read_zeeyang_lexicons(ZEEYANG_LEXICONS+"/sentiment140.lex")
sentiwn_lexicons=read_zeeyang_lexicons(ZEEYANG_LEXICONS+"/sentiwordnet.lex")
sst_lexicons=read_zeeyang_lexicons(ZEEYANG_LEXICONS+"/stanford.tree.lexicon")



### Polarity values of imported lexicons

In [11]:
print("POLARTTY VALUES OF IMPORTED LEXICONS")
print("Finance")
print(pd.DataFrame(list(finance_lexicons.values()),columns=['score'])['score'].unique())
print("Bingliu")
print(pd.DataFrame(list(bingliu_lexicons.values()),columns=['score'])['score'].unique())
print("Inquirer")
print(pd.DataFrame(list(inquirer_lexicons.values()),columns=['score'])['score'].unique())
print("Twitter")
print(pd.DataFrame(list(twitter_lexicons.values()),columns=['score'])['score'].unique())

print("Senti140")
print(pd.DataFrame(list(senti140_lexicons.values()),columns=['score'])['score'].unique())
print("SentiWordNet")
print(pd.DataFrame(list(sentiwn_lexicons.values()),columns=['score'])['score'].unique())
print("SST")
print(pd.DataFrame(list(sst_lexicons.values()),columns=['score'])['score'].unique())


LEXICON_LIST=[finance_lexicons,bingliu_lexicons,inquirer_lexicons,twitter_lexicons,senti140_lexicons,sentiwn_lexicons,sst_lexicons]
LEXICON_LABELS=['Finance','Bingliu','Inquirer','Twitter','Senti140','Sentiwordnet','SST']
for i,l in enumerate(LEXICON_LIST) : 
    
    
    print("========== {} ========== ".format(LEXICON_LABELS[i]))
    print(len(LEXICON_LIST[i]))

POLARTTY VALUES OF IMPORTED LEXICONS
Finance
[-1  1]
Bingliu
[-1  1]
Inquirer
[-1  1]
Twitter
[-1  1]
Senti140
['-1.250000\n' '-0.798000\n' '0.049000\n' ... '-1.033000\n' '-1.876000\n'
 '-2.000000\n']
SentiWordNet
['2.250000\n' '0.750000\n' '1.750000\n' '3.250000\n' '1.000000\n'
 '0.250000\n' '1.250000\n' '2.500000\n' '0.500000\n' '1.500000\n'
 '3.000000\n' '2.750000\n' '3.500000\n' '3.750000\n' '4.000000\n'
 '0.000000\n']
SST
['2\n' '3\n' '1\n' '4\n' '0\n']
2709
6785
3457
1277
62468
32980
19465


### Compares the different lexicon repositories through the mutual information between them (common words)

### The comparison is done through looking for words in two lexicon dictionaries L1 AND L2, and how many words are common in them which have the same scores.

In [7]:
## COMPARING THE BINARY LEXICONS 
compare_lexicons_binary()

inquirer 8640 3457
mpqa 6886 6462
bingliu 6785 6785
inquirer mpqa agreement: 82.47
inquirer mpqa agreement ignoring neutral: 98.50
inquirer bingliu agreement: 84.39
inquirer bingliu agreement ignoring neutral: 98.74
mpqa bingliu agreement: 99.19
mpqa bingliu agreement ignoring neutral: 99.44


#### Lexicon Induction : the idea is to generate the lexicons provided the corpus. This method makes sure that the lexicon are sensitive to the context they are drawn from. They may prove useful if we would like to assess them in a simiar context. For instance, financial lexicons will reflect better sentiments than using general lexicons such as SentiWordNet. Three ways purposed for induction 


### POLARITY INDUCTION METHOD : This is used for re-scoring of the lexicons(tokens) by taking information from the word-embeddings (domain-specific), positive and the negative seed words.

In [14]:
import polarity_induction_methods

### THIS IS THE FUNCTION FOR INDUCING LEXICONS GIVEN THE SEEDS, EMBEDDINGS AND THE METHOD.
def run_method(positive_seeds, negative_seeds, embeddings, transform_embeddings=False, post_densify=False,
        method=polarity_induction_methods.densify, **kwargs):
    
    print("THE INTERNAL RUN_METHOD IS RUNNING...")
    
    if transform_embeddings:
        print ("Transforming embeddings...")
        embeddings = embedding_transformer.apply_embedding_transformation(embeddings, positive_seeds, negative_seeds, n_dim=50)
    
    print("AFTER EMBEDDING TRANSFORM ",embeddings)
    
    ## using densify method
    if post_densify:
        polarities = method(embeddings, positive_seeds, negative_seeds, **kwargs)
        top_pos = [word for word in 
                sorted(polarities, key = lambda w : -polarities[w])[:150]]
        top_neg = [word for word in 
                sorted(polarities, key = lambda w : polarities[w])[:150]]
        top_pos.extend(positive_seeds)
        top_neg.extend(negative_seeds)
        return polarity_induction_methods.densify(embeddings, top_pos, top_neg)
    
    
    positive_seeds = [s for s in positive_seeds if s in embeddings]
    negative_seeds = [s for s in negative_seeds if s in embeddings]
    
    
    return method(embeddings, positive_seeds, negative_seeds, **kwargs)


## LEXICON INDUCTION ON STANDARD ENGLISH 

In [15]:
def calculate_new_lexicon_polarities(parent_lexicon,positive_seeds,negative_seeds,technique) : 
    
    ## CHOOSE SEEDS : 

    POSITIVE_SEEDS = ["good", "lovely", "excellent", "fortunate", "pleasant", "delightful", "perfect", "loved", "love", "happy"] 
    NEGATIVE_SEEDS = ["bad", "horrible", "poor",  "unfortunate", "unpleasant", "disgusting", "evil", "hated", "hate", "unhappy"]

    ## LOAD THE WORD-EMBEDDINGS : 

    eval_words = set(parent_lexicon.keys())

    EMBEDDING_TYPE = constants.GLOVE_EMBEDDINGS
    EMBEDDING = create_representation("GIGA", constants.GLOVE_EMBEDDINGS,eval_words.union(POSITIVE_SEEDS).union(NEGATIVE_SEEDS))

    embed_words = set(EMBEDDING.iw)
    eval_words = eval_words.intersection(EMBEDDING)
    eval_words = [word for word in eval_words  if not word in POSITIVE_SEEDS and not word in NEGATIVE_SEEDS]

    ## TRAIN THE BEST ALGORITHM : SENTPROP and get polarities re-scored
    
    polarities=defaultdict()
    if technique=='label_propagate_prob' : 
        
        polarities = run_method(POSITIVE_SEEDS, NEGATIVE_SEEDS, 
                    EMBEDDING.get_subembed(set(eval_words).union(NEGATIVE_SEEDS).union(POSITIVE_SEEDS)),
                    method=polarity_induction_methods.label_propagate_probabilistic,beta=0.99, nn=10)
    
    elif technique == 'pmi' : 
        
        EMBEDDING = create_representation("Explicit", constants.GLOVE_EMBEDDINGS)
        hist_counts = EMBEDDING.get_subembed(set(eval_words).union(positive_seeds).union(negative_seeds),restrict_context=False)
        
        
        print(dir(EMBEDDING))
        
        polarities = run_method(positive_seeds, negative_seeds,
                hist_counts,
                method=polarity_induction_methods.bootstrap,
                score_method=polarity_induction_methods.pmi)
    
    return polarities,eval_words

In [18]:
def evaluate_method_performance(polarities,INITIAL_LEXICON_LIB,domain,eval_words) : 
    
    ## EVALUATING THE EFFECTIVENESS OF THE NEW LEXICON POLARITIES.
    from evaluate_methods import binary_metrics,ternary_metrics

    acc, auc, avg_prec = binary_metrics(polarities, INITIAL_LEXICON_LIB, eval_words)
    if auc < 0.5:
        polarities = {word:-1*polarities[word] for word in polarities}
        acc, auc, avg_prec = binary_metrics(polarities, INITIAL_LEXICON_LIB, eval_words)

    print("============== DOMAIN : {} ==============".format(domain))
    print ("Binary metrics:")
    print( "==============")
    print ("Accuracy with optimal threshold: {:.4f}".format(acc))
    print ("ROC AUC Score: {:.4f}".format(auc))
    print ("Average Precision Score: {:.4f}".format(avg_prec))


    tau, cmn_f1, maj_f1, conf_mat = ternary_metrics(polarities, INITIAL_LEXICON_LIB, eval_words, tau_lexicon=THREE_WAY_LEXICON)
    print ("Ternary metrics:")
    print( "==============")
    print ("Majority macro F1 baseline {:.4f}".format(maj_f1))
    print ("Macro F1 with cmn threshold: {:.4f}".format(cmn_f1))
    if tau:
        print ("Kendall Tau {:.4f}".format(tau))
    print ("Confusion matrix: ")
    print (conf_mat)
    print( "Neg :", float(conf_mat[0,0]) / np.sum(conf_mat[0,:]))
    print ("Neut :", float(conf_mat[1,1]) / np.sum(conf_mat[1,:]))
    print ("Pos :", float(conf_mat[2,2]) / np.sum(conf_mat[2,:]))

In [16]:
from representations.representation_factory import create_representation
import constants
from evaluate_methods import run_method
import polarity_induction_methods


## TRAINING THE LABEL-PROPAGATION FOR THE RE-SCORING OF POLARITIES FROM PRE-DETERMINED LEXICONS (MADE FROM WORD EMBEDDINGS)

INQUIRER = load_lexicon("inquirer", remove_neutral=False)

FINANCE_LEXICONS=load_lexicon('finance')
TWITTER_LEXICONS=load_lexicon('twitter')

THREE_WAY_LEXICON = kuperman = load_lexicon("kuperman", remove_neutral=False)

POSITIVE_FINANCE = ["successful", "excellent", "profit", "beneficial", "improving", "improved", "success", "gains", "positive"]
NEGATIVE_FINANCE = ["negligent", "loss", "volatile", "wrong", "losses", "damages", "bad", "litigation", "failure", "down", "negative"]


POSITIVE_SE = ["good", "lovely", "excellent", "fortunate", "pleasant", "delightful", "perfect", "loved", "love", "happy"] 
NEGATIVE_SE = ["bad", "horrible", "poor",  "unfortunate", "unpleasant", "disgusting", "evil", "hated", "hate", "unhappy"]


### Training financial and standard-english lexicons with glove embeddings using probabilistic label propagation method.

#### Finance lexicon

In [19]:
finance_polarities,finance_eval=calculate_new_lexicon_polarities(FINANCE_LEXICONS,POSITIVE_FINANCE,NEGATIVE_FINANCE,'label_propagate_prob')
evaluate_method_performance(finance_polarities,FINANCE_LEXICONS,'FINANCE',finance_eval)
gc.collect()

Binary metrics:
Accuracy with optimal threshold: 1.7899
ROC AUC Score: 0.9598
Average Precision Score: 0.8393
Ternary metrics:
Majority macro F1 baseline 0.4642
Macro F1 with cmn threshold: 0.1180
Kendall Tau 0.3777
Confusion matrix: 
[[   0    1 2246]
 [   0    0    0]
 [   0    0  347]]
Neg : 0.0
Neut : nan
Pos : 1.0


  'precision', 'predicted', average, warn_for)


0

In [20]:
standard_english_polarities,se_eval=calculate_new_lexicon_polarities(INQUIRER,POSITIVE_SE,NEGATIVE_SE,'label_propagate_prob')
evaluate_method_performance(standard_english_polarities,INQUIRER,' STANDARD ENGLISH ',se_eval)
gc.collect()


Binary metrics:
Accuracy with optimal threshold: 1.1631
ROC AUC Score: 0.8072
Average Precision Score: 0.7743
Ternary metrics:
Majority macro F1 baseline 0.2497
Macro F1 with cmn threshold: 0.1024
Kendall Tau 0.3625
Confusion matrix: 
[[   0    1 1874]
 [   0    0 5106]
 [   0    0 1547]]
Neg : 0.0
Neut : 0.0
Pos : 1.0


  'precision', 'predicted', average, warn_for)


0

In [None]:
sent140_polarities,s140eval = calculate_new_lexicon_polarities(senti140_lexicons,POSITIVE_SE,NEGATIVE_SE,'label_propagate_prob')
gc.collect()

In [None]:
sentiwn_polarities,swn_eval = calculate_new_lexicon_polarities(sentiwn_lexicons,POSITIVE_SE,NEGATIVE_SE,'label_propagate_prob')
gc.collect()

In [None]:
sst_lexicons,sst_eval = calculate_new_lexicon_polarities(sst_lexicons,POSITIVE_SE,NEGATIVE_SE,'label_propagate_prob')
gc.collect()

### Here I tried to make a dictionary of all sentiwordnet and use the evaluation on them. 

In [15]:
ss=swn.all_senti_synsets()
SENTI_LEXICONS=defaultdict()

for s in ss : 
    
    lemmas = s.synset.lemma_names()
    positive=s.pos_score()
    
    negative=s.neg_score()
    
    for l in lemmas : 
        net_sentiment=positive-negative
        ## CONVERTING TO BINARY SENTIMENTS 
        if net_sentiment>0 : 
            SENTI_LEXICONS[l]=1
        elif net_sentiment<0:
            SENTI_LEXICONS[l]=-1
            
            
## RIGHT NOW, RUNNING THIS CRASHES THE KERNEL, PROBABLY NEEDS TO BE RUN SEPARATELY ON A PYTHON FILE I GUESS.
#senti_polarities,senti_eval=calculate_new_lexicon_polarities(SENTI_LEXICONS,POSITIVE_SE,NEGATIVE_SE,'label_propagate_prob')

## EVALUATING THE EFFECTIVENESS OF THE NEW LEXICON POLARITIES.

#### Calculates the ROC auc scores with the new polarities comparing to the earlier lexicon binary classification (1 = positive and 0 = negative).
#### Interpretation of the score. 
#### Higher the score, it means that new polarities (continous sentiment scores) confirms with the binary sentiment scores

In [24]:
### TASKS IN THE DOC : 

## 1. EXPLAIN ABOUT THE LEXICON INDUCING : LABEL PROPAGATION ALGORITHM
## 2. GIVES SOME UNDERLYING MATHEMATICS FROM THE RESEARCH PAPER REGARDING HOW SCORES ARE COMPUTED.

In [17]:
def evaluate_method_performance(polarities,INITIAL_LEXICON_LIB,domain,eval_words) : 
    
    ## EVALUATING THE EFFECTIVENESS OF THE NEW LEXICON POLARITIES.
    from evaluate_methods import binary_metrics,ternary_metrics

    acc, auc, avg_prec = binary_metrics(polarities, INITIAL_LEXICON_LIB, eval_words)
    if auc < 0.5:
        polarities = {word:-1*polarities[word] for word in polarities}
        acc, auc, avg_prec = binary_metrics(polarities, INITIAL_LEXICON_LIB, eval_words)

    print("============== DOMAIN : {} ==============".format(domain))
    print ("Binary metrics:")
    print( "==============")
    print ("Accuracy with optimal threshold: {:.4f}".format(acc))
    print ("ROC AUC Score: {:.4f}".format(auc))
    print ("Average Precision Score: {:.4f}".format(avg_prec))


    tau, cmn_f1, maj_f1, conf_mat = ternary_metrics(polarities, INITIAL_LEXICON_LIB, eval_words, tau_lexicon=THREE_WAY_LEXICON)
    print ("Ternary metrics:")
    print( "==============")
    print ("Majority macro F1 baseline {:.4f}".format(maj_f1))
    print ("Macro F1 with cmn threshold: {:.4f}".format(cmn_f1))
    if tau:
        print ("Kendall Tau {:.4f}".format(tau))
    print ("Confusion matrix: ")
    print (conf_mat)
    print( "Neg :", float(conf_mat[0,0]) / np.sum(conf_mat[0,:]))
    print ("Neut :", float(conf_mat[1,1]) / np.sum(conf_mat[1,:]))
    print ("Pos :", float(conf_mat[2,2]) / np.sum(conf_mat[2,:]))

In [None]:
evaluate_method_performance(standard_english_polarities,INQUIRER,' STANDARD ENGLISH ',se_eval)

## BASELINE : On the Movie Reviews using senti-wordnet lexicons

-  Accuracy: 0.6
-  Precision: 0.56
-  Recall: 0.93
-  F1 Score: 0.7

### This is to compute the effectiveness of binary sentiment scores provided a lexicon library.
### This can be used to see which lexicon libraries help achieving the closest sentiment scores.
### Thus a supervised algorithm and evaluation is the 

In [17]:
## INPUTS : 
## review = single sentence 
## lexicon_dict = dict of the lexicon with key as word and value as the polarity

def analyze_sentiment_domain(review,lexicon_dict,verbose=False):
    
    
    #review = normalize_accented_characters(review)
    #review = review.decode('utf-8')
    review = html_parser.unescape(review)
    review = strip_html(review)
    
    text_tokens = nltk.word_tokenize(review)
    tagged_text = nltk.pos_tag(text_tokens)
    pos_score = neg_score = token_count = obj_score = 0

    ## postitve polarity counts as positive and negative polarities counts as negative
    
    
    for token in text_tokens : 
        
        if token in lexicon_dict : 
            
            if lexicon_dict[token]>0 : 
                pos_score+=1
            elif lexicon_dict[token]<0:
                neg_score+=1

        token_count+=1
            
    final_score = pos_score - neg_score
    norm_final_score = round(float(final_score) / token_count, 2)
    final_sentiment = 'positive' if norm_final_score >= 0 else 'negative'
    if verbose:
        norm_pos_score = round(float(pos_score) / token_count, 2)
        norm_neg_score = round(float(neg_score) / token_count, 2)
        
        
        
        sentiment_frame = pd.DataFrame([[final_sentiment,
                                         norm_pos_score, norm_neg_score,
                                         norm_final_score]],
                                         columns=pd.MultiIndex(levels=[['SENTIMENT STATS:'], 
                                                                      ['Predicted Sentiment',
                                                                       'Positive', 'Negative', 'Overall']], 
                                                              labels=[[0,0,0,0],[0,1,2,3]]))
        print (sentiment_frame)   
    return final_sentiment
            
                               

In [18]:
train_x,test_x,test_y=prepare_movie_dataset(0,1000,1000,2000)

sentiwordnet_predictions = [analyze_sentiment_domain(review,polarities) for review in test_x]
#evaluate_lexicons(test_y.tolist(),sentiwordnet_predictions,'positive','negative')

dataset size :  50000
Train_X :  1000
Test_X  :  1000


NameError: name 'polarities' is not defined

In [25]:
## TO DO : 

## 1. COMPUTE THESE SCORES AGAIN WITH THE NEW DOMAIN SPECIFIC (GLOVE BASED) EMBEDDINGS TO SEE SENTIMENTAL SCORE CHANGE.
## 2. GET THE TWITTER DATASET  AND DO AGAIN THE SAME THING. (TWITTER EMBEDDINGS/GLOVE EMBEDDINGS/SENTIWORDNET + TWITTER DATASET)


In [27]:
### FOR TWO DATASETS : 
### FOR THREE EMBEDDINGS : CUSTOM-MADE / GLOVE (1B) / ACTUAL-DOMAIN ONES.
### GET THESE SCORES (2*3 MATRIX OF SCORES)
### GET THE NEW POLARITIES AND USE THEM TO CALCULATE SENTIMENTAL SCORES(SENTENCE BASED).
### ALSO GET THE BINARY_METRICS FOR THE 

### THE WHOLE USE OF THE METHODS IS THAT TO GET INSIGHT INTO THE CONTEXT-SENSITIVE INFORMATION.


### HOW? 

### 1. ARRANGE THE PRE-TAGGED LEXICONS (ATLEAST POSITIVE/NEGATIVE)

### 2. WORD EMBEDDINGS TRAINED ON THE CONTEXT-MATERIAL. 

### 3. LABEL-PROPAGATE ALGORITHM TO MAKE LEXICONS SCORE LEXICONS TO THE CONTINOUS SENTIMENT SCORES.

### 4. USE SUM (P+V)/T OR NEURAL NETWORK TO OBTAIN THE SCORE FOR THE WHOLE SENTENCE SENTIMENT 

##### Here, we earlier thought that we would be able to implement this phase if we had more time. The paper "Context-Sensitive Lexicon Features for Neural Sentiment Analysis" we can test baseline with normal lexicons and improvement as label-propagated lexicons with LSTM for scores, and evaluate them back on the binary classification scores 

117659