In [1]:
import pandas as pd
import numpy as np
import nltk
import itertools
from nltk.corpus import sentiwordnet as swn
from normalization import normalize_accented_characters, html_parser, strip_html
from utils import display_evaluation_metrics, display_confusion_matrix, display_classification_report
nltk.download('averaged_perceptron_tagger')
nltk.download('sentiwordnet')
import dynet as dy
from nltk.corpus import wordnet as wn
from collections import defaultdict

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package sentiwordnet to
[nltk_data]     /home/ubuntu/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!


## Movie review dataset

In [2]:
def prepare_movie_dataset(train_start,train_end,test_start,test_end) : 

    dataset = pd.read_csv(r'datasets/movie_reviews.csv')
    print('dataset size : ',dataset.shape[0])

    train_data = dataset[train_start:train_end]
    test_data = dataset[test_start:test_end]
    
    print('Train_X : ',train_data.shape[0])
    print('Test_X  : ',test_data.shape[0])

    test_reviews = np.array(test_data['review'])
    test_sentiments = np.array(test_data['sentiment'])

    return train_data,test_reviews,test_sentiments

def prepare_labeled_data(train_start,train_end,test_start,test_end) : 
    
    labeled_data=open("datasets/labeledTrainData.tsv","r")
    data=labeled_data.readlines()
    data=[d.split("\t") for d in data]
    sa_data=pd.DataFrame(data,columns=['ind','sentiment','review'])
    sa_data=sa_data[['sentiment','review']]
    
    print('dataset size : ',sa_data.shape[0])

    train_data = sa_data[train_start:train_end]
    test_data = sa_data[test_start:test_end]
    
    print('Train_X : ',train_data.shape[0])
    print('Test_X  : ',test_data.shape[0])

    test_reviews = np.array(test_data['review'])
    test_sentiments = np.array(test_data['sentiment'])

    return train_data,test_reviews,test_sentiments

In [3]:
train_x,test_x,test_y=prepare_labeled_data(1,1000,1000,2000)

dataset size :  25001
Train_X :  999
Test_X  :  1000


## Evaluation for unsupervised Lexicon sentiment tagging

#### compare against the sentence tagging (already provided in the dataset )

[add markdown #11 here]

In [4]:
### BORROWED FROM THE AR_SARKAR METRIC
def analyze_sentiment_sentiwordnet_lexicon(review,verbose=False):
    
    
    #review = normalize_accented_characters(review)
    #review = review.decode('utf-8')
    review = html_parser.unescape(review)
    review = strip_html(review)
    
    text_tokens = nltk.word_tokenize(review)
    tagged_text = nltk.pos_tag(text_tokens)
    pos_score = neg_score = token_count = obj_score = 0

    for word, tag in tagged_text:
        ss_set = None
        if 'NN' in tag and swn.senti_synsets(word, 'n'):
            ss_set = list(swn.senti_synsets(word, 'n'))
            if ss_set : 
                ss_set=ss_set[0]
        elif 'VB' in tag and swn.senti_synsets(word, 'v'):
            ss_set = list(swn.senti_synsets(word, 'v'))
            if ss_set : 
                ss_set=ss_set[0]
        elif 'JJ' in tag and swn.senti_synsets(word, 'a'):
            ss_set = list(swn.senti_synsets(word, 'a'))
            if ss_set : 
                ss_set=ss_set[0]
        elif 'RB' in tag and swn.senti_synsets(word, 'r'):
            ss_set = list(swn.senti_synsets(word, 'r'))
            if ss_set : 
                ss_set=ss_set[0]
        
        if ss_set:
            
            pos_score += ss_set.pos_score()
            neg_score += ss_set.neg_score()
            obj_score += ss_set.obj_score()
            token_count += 1
    
    
    final_score = pos_score - neg_score
    norm_final_score = round(float(final_score) / token_count, 2)
    final_sentiment = 'positive' if norm_final_score >= 0 else 'negative'
    if verbose:
        norm_obj_score = round(float(obj_score) / token_count, 2)
        norm_pos_score = round(float(pos_score) / token_count, 2)
        norm_neg_score = round(float(neg_score) / token_count, 2)
        
        sentiment_frame = pd.DataFrame([[final_sentiment, norm_obj_score,
                                         norm_pos_score, norm_neg_score,
                                         norm_final_score]],
                                         columns=pd.MultiIndex(levels=[['SENTIMENT STATS:'], 
                                                                      ['Predicted Sentiment', 'Objectivity',
                                                                       'Positive', 'Negative', 'Overall']], 
                                                              labels=[[0,0,0,0,0],[0,1,2,3,4]]))
        print (sentiment_frame)   
    return final_sentiment
            
                                                               
def evaluate_lexicons(TRUE_LABELS,PREDICTED_LABELS,POS_CLASS,NEG_CLASS) : 

    print ('Performance metrics:')
    display_evaluation_metrics(true_labels=TRUE_LABELS,
                               predicted_labels=PREDICTED_LABELS,
                               positive_class=str(POS_CLASS))  
    print ('\nConfusion Matrix:'             )              
    display_confusion_matrix(true_labels=TRUE_LABELS,
                             predicted_labels=PREDICTED_LABELS,
                             classes=[str(POS_CLASS),str(NEG_CLASS)])
    print ('\nClassification report:' )                        
    display_classification_report(true_labels=TRUE_LABELS,
                                  predicted_labels=PREDICTED_LABELS,
                                  classes=[str(POS_CLASS),str(NEG_CLASS)])
    return

                               

## Basline lexicon evaluation

#### movie dataset 

In [8]:
train_x,test_x,test_y=prepare_movie_dataset(0,1000,1000,2000)
sentiwordnet_predictions = [analyze_sentiment_sentiwordnet_lexicon(review) for review in test_x]
evaluate_lexicons(test_y.tolist(),sentiwordnet_predictions,'positive','negative')

dataset size :  50000
Train_X :  1000
Test_X  :  1000
Performance metrics:
Accuracy: 0.6
Precision: 0.56
Recall: 0.93
F1 Score: 0.7

Confusion Matrix:
                 Predicted:         
                   positive negative
Actual: positive        470       34
        negative        365      131

Classification report:
              precision    recall  f1-score   support

    positive       0.56      0.93      0.70       504
    negative       0.79      0.26      0.40       496

   micro avg       0.60      0.60      0.60      1000
   macro avg       0.68      0.60      0.55      1000
weighted avg       0.68      0.60      0.55      1000



#### labeled dataset 

In [7]:
train_x,test_x,test_y=prepare_labeled_data(0,1000,1000,2000)
sentiwordnet_predictions = [analyze_sentiment_sentiwordnet_lexicon(review) for review in test_x]
binary_predicted=['1' if p=='positive' else '0' for p in sentiwordnet_predictions ]
evaluate_lexicons(test_y.tolist(),binary_predicted,'1','0')

dataset size :  25001
Train_X :  1000
Test_X  :  1000
Performance metrics:
Accuracy: 0.59
Precision: 0.56
Recall: 0.92
F1 Score: 0.7

Confusion Matrix:
          Predicted:     
                   1    0
Actual: 1        476   39
        0        371  114

Classification report:
              precision    recall  f1-score   support

           1       0.56      0.92      0.70       515
           0       0.75      0.24      0.36       485

   micro avg       0.59      0.59      0.59      1000
   macro avg       0.65      0.58      0.53      1000
weighted avg       0.65      0.59      0.53      1000



### Simple network for learning (do afterwards)

In [141]:
## SIMPLE NETWORK WITH THE sigma(V*tanh(WX+B)) ## for the XOR problem
# create a parameter collection and add the parameters.
m = dy.ParameterCollection()
W = m.add_parameters((8,2))
V = m.add_parameters((1,8))
b = m.add_parameters((8))

dy.renew_cg() # new computation graph. not strictly needed here, but good practice.
b.value() ## bias values
x=dy.vecInput(2) ## 2 sized inputs 
output=dy.logistic(V*(dy.tanh(W*x)+b)) ## output node

y = dy.scalarInput(0) ## objective function
loss = dy.binary_log_loss(output,y) ## loss function
## trainer with the initialized parameters m 
trainer=dy.SimpleSGDTrainer(m)
x.set([1,0])
y.set(1)
loss_value = loss.value() # this performs a forward through the network.
print("the loss before step is:",loss_value)

loss.backward()  # compute the gradients
trainer.update()

loss_value = loss.value(recalculate=True) 
print("the loss after step is:",loss_value)
pc = dy.ParameterCollection()
NUM_LAYERS=2
INPUT_DIM=50
HIDDEN_DIM=10
builder = dy.LSTMBuilder(NUM_LAYERS, INPUT_DIM, HIDDEN_DIM, pc)
s0 = builder.initial_state()
x1 = dy.vecInput(INPUT_DIM)
s1=s0.add_input(x1)
y1 = s1.output()
s2=s1.add_input(x1) # we can add another input
y2=s2.output()

the loss before step is: 0.4169560670852661
the loss after step is: 0.3646645247936249


### IMPLEMENTING 2.1 SECTION OF GOOGLE PAPER FOR LEXICON EXPANSION [this is for the lexicon expansion ]

In [251]:
ALL_WORDS=list(wn.words())
SCORES=defaultdict()
df=pd.DataFrame(ALL_WORDS,columns=['word'])

swn.all_
    

### Comparing the lexicons from stanford paper "Incuding Domain-Specific Sentiment Lexicons from Unalabeled Copora"

In [5]:
import socialsent_util
def load_lexicon(name, remove_neutral=True):
    lexicon = socialsent_util.load_json("./lexicons_socialsent/"+ name + '.json')
    return {w: p for w, p in lexicon.items() if p != 0} if remove_neutral else lexicon

def compare_lexicons(print_disagreements=False):
    lexicons = {
        "inquirer": load_lexicon("inquirer", False),
        "mpqa": load_lexicon("mpqa", False),
        "bingliu": load_lexicon("bingliu", False),
    }

    for l in lexicons:
        print( l, len(lexicons[l]), len([w for w in lexicons[l] if lexicons[l][w] != 0]))

    for l1, l2 in itertools.combinations(lexicons.keys(), 2):
        ps1, ps2 = lexicons[l1], lexicons[l2]
        common_words = set(ps1.keys()) & set(ps2.keys())
        print( l1, l2, "agreement: {:.2f}".format(
            100.0 * sum(1 if ps1[w] == ps2[w] else 0 for w in common_words) / len(common_words)))
        common_words = set([word for word in ps1.keys() if ps1[word] != 0]) & \
                       set([word for word in ps2.keys() if ps2[word] != 0])  
        print (l1, l2, "agreement ignoring neutral: {:.2f}".format(
            100.0 * sum(1 if ps1[w] * ps2[w] == 1 else 0 for w in common_words) / len(common_words)))
        
        if print_disagreements and l1 == 'opinion' and l2 == 'inquirer':
            for w in common_words:
                if lexicons[l1][w] != lexicons[l2][w]:
                    print (w, lexicons[l1][w], lexicons[l2][w])
      
    
## ALL THESE LEXICONS ARE 2-CLASS SENTIMENTS. 1 = POSITIVE; -1 = NEGATIVE
finance_lexicons=load_lexicon('finance')
bingliu_lexicons=load_lexicon('bingliu')
inquirer_lexicons=load_lexicon('inquirer')
mpqa_lexicons=load_lexicon('mpqa')
twitter_lexicons=load_lexicon('twitter')

### Compares the different lexicon repositories through the mutual information between them (common words)

In [6]:
twitter_lexicons

{'horrible': -1,
 '#eurgh': -1,
 ':/': -1,
 ':(': -1,
 ':)': 1,
 'cough': -1,
 'whoever': -1,
 'ciao': 1,
 'relieve': 1,
 'ouch': -1,
 'foul': -1,
 'catch': 1,
 'ugh': -1,
 'sleep': 1,
 '#pissedoff': -1,
 'hanging': -1,
 'go': -1,
 'follow': -1,
 'revolting': -1,
 'chillin': 1,
 'children': 1,
 'forget': -1,
 'spoiled': -1,
 'dreadful': -1,
 'yummy': 1,
 'vile': -1,
 'ignorant': -1,
 '#happy': 1,
 'tweet': 1,
 'show': 1,
 'sweetest': 1,
 'young': 1,
 'send': 1,
 'yummm': 1,
 'finally': 1,
 'asking': 1,
 'wants': 1,
 'hola': 1,
 'text': 1,
 'wooooo': 1,
 'smile': 1,
 'sorry': -1,
 'enjoying': 1,
 'ergh': -1,
 '#forgiveme': -1,
 'fly': 1,
 ':o': 1,
 '#dying': -1,
 'garden': 1,
 'song': 1,
 ':d': -1,
 '#hurt': -1,
 'horror': -1,
 'rise': 1,
 '#rich': 1,
 'unfamiliar': -1,
 'anyway': 1,
 ':|': -1,
 '#justwhatineed': 1,
 'fan': 1,
 'updates': 1,
 'fam': 1,
 'telling': -1,
 '#tramps': -1,
 ':p': 1,
 'w00t': 1,
 'cool': 1,
 'school': 1,
 '#nolife': -1,
 '<33': 1,
 'mothers': 1,
 '#pitiful': -

In [6]:
## FIGURE OUT WHAT DOES THE COMPARE_LEXICONS DOES.
compare_lexicons()

inquirer 8640 3457
mpqa 6886 6462
bingliu 6785 6785
inquirer mpqa agreement: 82.47
inquirer mpqa agreement ignoring neutral: 98.50
inquirer bingliu agreement: 84.39
inquirer bingliu agreement ignoring neutral: 98.74
mpqa bingliu agreement: 99.19
mpqa bingliu agreement ignoring neutral: 99.44


#### Lexicon Induction : the idea is to generate the lexicons provided the corpus. This method makes sure that the lexicon are sensitive to the context they are drawn from. They may prove useful if we would like to assess them in a simiar context. For instance, financial lexicons will reflect better sentiments than using general lexicons such as SentiWordNet. Three ways purposed for induction 

- SENTPROP
- DENSIFIER
- Sentiment140

In [7]:
def dist(embeds, positive_seeds, negative_seeds, **kwargs):
    polarities = {}
    sim_mat = similarity_matrix(embeds, **kwargs)
    for i, w in enumerate(embeds.iw):
        if w not in positive_seeds and w not in negative_seeds:
            pol = sum(sim_mat[embeds.wi[p_seed], i] for p_seed in positive_seeds)
            pol -= sum(sim_mat[embeds.wi[n_seed], i] for n_seed in negative_seeds)
            polarities[w] = pol
    return polarities


def pmi(count_embeds, positive_seeds, negative_seeds, smooth=0.01, **kwargs):
    """
    Learns polarity scores using PMI with seed words.
    Adapted from Turney, P. and M. Littman. "Measuring Praise and Criticism: Inference of semantic orientation from assocition".
    ACM Trans. Inf. Sys., 2003. 21(4) 315-346.

    counts is explicit embedding containing raw co-occurrence counts
    """
    w_index = count_embeds.wi
    c_index = count_embeds.ci
    counts = count_embeds.m
    polarities = {}
    for w in count_embeds.iw:
        if w not in positive_seeds and w not in negative_seeds:
            pol = sum(np.log(counts[w_index[w], c_index[seed]] + smooth) 
                    - np.log(counts[w_index[seed],:].sum()) for seed in positive_seeds)
            pol -= sum(np.log(counts[w_index[w], c_index[seed]] + smooth) 
                    - np.log(counts[w_index[seed],:].sum())for seed in negative_seeds)
            polarities[w] = pol
    return polarities

In [8]:
import polarity_induction_methods

### THIS IS THE FUNCTION FOR INDUCING LEXICONS GIVEN THE SEEDS, EMBEDDINGS AND THE METHOD.
def run_method(positive_seeds, negative_seeds, embeddings, transform_embeddings=False, post_densify=False,
        method=polarity_induction_methods.densify, **kwargs):
    
    
    if transform_embeddings:
        print ("Transforming embeddings...")
        embeddings = embedding_transformer.apply_embedding_transformation(embeddings, positive_seeds, negative_seeds, n_dim=50)
    
    
    ## using densify method
    if post_densify:
        polarities = method(embeddings, positive_seeds, negative_seeds, **kwargs)
        top_pos = [word for word in 
                sorted(polarities, key = lambda w : -polarities[w])[:150]]
        top_neg = [word for word in 
                sorted(polarities, key = lambda w : polarities[w])[:150]]
        top_pos.extend(positive_seeds)
        top_neg.extend(negative_seeds)
        return polarity_induction_methods.densify(embeddings, top_pos, top_neg)
    
    
    positive_seeds = [s for s in positive_seeds if s in embeddings]
    negative_seeds = [s for s in negative_seeds if s in embeddings]
    
    
    return method(embeddings, positive_seeds, negative_seeds, **kwargs)


data =  /home/ubuntu/workspace/nlpclass-1187-g-Mad_Titans/sa/embeddings_socialsent/


Using Theano backend.


In [41]:
import seeds
from representations.representation_factory import create_representation
import constants

def evaluate_methods():
    """
    Evaluates different methods on standard English.
    """
    print ("Getting evalution words..")
    np.random.seed(0)
    
    ## inquirer is ternrary -1,0,1
    lexicon = load_lexicon("inquirer", remove_neutral=False)
    
    ## kuperman is continus -5.0 to 5
    kuperman = load_lexicon("kuperman", remove_neutral=False)
    eval_words = set(lexicon.keys())

    qwn = load_lexicon("qwn-scores")
    for word in lexicon:
        if not word in qwn:
            qwn[word] = 0

    positive_seeds, negative_seeds = seeds.hist_seeds()
    
    common_embed = create_representation("GIGA", constants.GLOVE_EMBEDDINGS,eval_words.union(positive_seeds).union(negative_seeds))
    
    
    embed_words = set(common_embed.iw)
    
    
    eval_words = eval_words.intersection(embed_words)

    eval_words = [word for word in eval_words 
            if not word in positive_seeds 
            and not word in negative_seeds]
    
    
    print ("Evaluating with ", len(eval_words), "out of", len(lexicon))
    print ("SentProp:")
    
    
    polarities = run_method(positive_seeds, negative_seeds, 
            common_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
            method=polarity_induction_methods.label_propagate_probabilistic,beta=0.99, nn=10)
    
    
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)
    util.write_pickle(polarities, "tmp/gi-cc-walk-pols.pkl")

In [42]:
evaluate_methods()

Getting evalution words..
Evaluating with  8528 out of 8640
SentProp:


NameError: name 'DEFAULT_ARGUMENTS' is not defined

In [10]:
import seeds
from representations.representation_factory import create_representation
import constants


lexicon = load_lexicon("inquirer", remove_neutral=False)
eval_words = set(lexicon.keys())
positive_seeds, negative_seeds = seeds.hist_seeds()
eval_words_with_seeds=eval_words.union(positive_seeds).union(negative_seeds)


common_embed = create_representation("GIGA", constants.GLOVE_EMBEDDINGS,eval_words)



In [11]:
embed_words = set(common_embed.iw)


eval_words = set(eval_words).intersection(embed_words)

eval_words = [word for word in eval_words  if not word in positive_seeds   and not word in negative_seeds]


print ("Evaluating with ", len(eval_words), "out of", len(lexicon))
print ("SentProp:")


polarities = run_method(positive_seeds, negative_seeds, 
        common_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
        method=polarity_induction_methods.label_propagate_probabilistic,beta=0.99, nn=10)


util.write_pickle(polarities, "tmp/gi-cc-walk-pols.pkl")



Evaluating with  8528 out of 8640
SentProp:


NameError: name 'util' is not defined

In [19]:
fname="tmp/gi-cc-walk-pols.pkl"

def write_pickle(o, fname):
    with open(fname, 'w') as f:
        cPickle.dump(polarities, f, -1)



In [33]:
from operator import itemgetter
from historical import vocab
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, f1_score
from scipy.stats import kendalltau
from representations.representation_factory import create_representation
from evaluate_methods import evaluate

print ("Getting evalution words..")
np.random.seed(0)

## inquirer is ternrary -1,0,1
lexicon = load_lexicon("inquirer", remove_neutral=False)

## kuperman is continus -5.0 to 5
kuperman = load_lexicon("kuperman", remove_neutral=False)
eval_words = set(lexicon.keys())

qwn = load_lexicon("qwn-scores")
for word in lexicon:
    if not word in qwn:
        qwn[word] = 0
            
evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)
util.write_pickle(polarities, "tmp/gi-cc-walk-pols.pkl")


Getting evalution words..


KeyError: 'accurateness'

In [40]:
#list(filter(lambda x:x=='accurateness',eval_words))
polarities['accurateness']

KeyError: 'accurateness'

In [None]:
from socialsent import constants
from socialsent import util
from socialsent import polarity_induction_methods
from socialsent import seeds
from socialsent import lexicons
import sys
import random
import numpy as np
import scipy as sp
import embedding_transformer

from operator import itemgetter
from socialsent.historical import vocab
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, f1_score
from scipy.stats import kendalltau
from socialsent.representations.representation_factory import create_representation

DEFAULT_ARGUMENTS = dict(
        # for iterative graph algorithms
        similarity_power=1,
        arccos=True,
        max_iter=50,
        epsilon=1e-6,
        sym=True,

        # for learning embeddings transformation
        n_epochs=50,
        force_orthogonal=False,
        batch_size=100,
        cosine=False,

        ## bootstrap
        num_boots=1,
        n_procs=1,
)

def evaluate_methods():
    """
    Evaluates different methods on standard English.
    """
    print ("Getting evalution words..")
    np.random.seed(0)
    lexicon = lexicons.load_lexicon("inquirer", remove_neutral=False)
    kuperman = lexicons.load_lexicon("kuperman", remove_neutral=False)
    eval_words = set(lexicon.keys())

    # load in WordNet lexicon and pad with zeros for missing words
    # (since these are implicitly zero for this method)
    qwn = lexicons.load_lexicon("qwn-scores")
    for word in lexicon:
        if not word in qwn:
            qwn[word] = 0

    positive_seeds, negative_seeds = seeds.hist_seeds()

    common_embed = create_representation("GIGA", constants.GOOGLE_EMBEDDINGS, 
            eval_words.union(positive_seeds).union(negative_seeds))
    embed_words = set(common_embed.iw)
    eval_words = eval_words.intersection(embed_words)

    eval_words = [word for word in eval_words 
            if not word in positive_seeds 
            and not word in negative_seeds]
    print ("Evaluating with ", len(eval_words), "out of", len(lexicon))

#    
#    print "WordNet:"
#    evaluate(qwn, lexicon, eval_words, tau_lexicon=kuperman)
#
#    print "Densifier:"
#    polarities = run_method(positive_seeds, negative_seeds, 
#            common_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
#            method=polarity_induction_methods.bootstrap, score_method=polarity_induction_methods.densify,
#            **DEFAULT_ARGUMENTS)
#    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)

    print( "SentProp:")
    polarities = run_method(positive_seeds, negative_seeds, 
            common_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
            method=polarity_induction_methods.label_propagate_probabilistic,
            #method=polarity_induction_methods.bootstrap, 
            beta=0.99, nn=10,

            **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)
    util.write_pickle(polarities, "tmp/gi-cc-walk-pols.pkl")

def hyperparam_eval():
    print ("Getting evaluation words and embeddings")
    lexicon = lexicons.load_lexicon("bingliu", remove_neutral=False)
    eval_words = set(lexicon.keys())

    positive_seeds, negative_seeds = seeds.hist_seeds()

    common_embed = create_representation("GIGA", constants.COMMON_EMBEDDINGS, 
            eval_words.union(positive_seeds).union(negative_seeds))
    common_words = set(common_embed.iw)
    eval_words = eval_words.intersection(common_words)

    hist_embed = create_representation("SVD", constants.SVD_EMBEDDINGS + "1990")
    hist_words = set(hist_embed.iw)
    eval_words = eval_words.intersection(hist_words)

    eval_words = [word for word in eval_words
            if not word in positive_seeds 
            and not word in negative_seeds] 

    print ("SentProp...")
    for nn in [5, 10, 25, 50]:
        for beta in [0.8, 0.9, 0.95, 0.99]:
          print( "Common")
          polarities = run_method(positive_seeds, negative_seeds, 
                    common_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
                    method=polarity_induction_methods.random_walk, 
                    nn=nn, beta=beta,
                    **DEFAULT_ARGUMENTS)
          evaluate(polarities, lexicon, eval_words)
          print( "Hist")
          polarities = run_method(positive_seeds, negative_seeds, 
                    hist_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
                    method=polarity_induction_methods.random_walk, 
                    nn=nn, beta=beta,
                    **DEFAULT_ARGUMENTS)
          evaluate(polarities, lexicon, eval_words)

    print ("Densify...")
    for lr in [0.001, 0.01, 0.1, 0.5]:
        for reg in [0.001, 0.01, 0.1, 0.5]:
          print( "LR : ", lr, "Reg: ", reg)
          print ("Common")
          polarities = run_method(positive_seeds, negative_seeds, 
                    common_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
                    method=polarity_induction_methods.densify, 
                    lr=lr, regularization_strength=reg,
                    **DEFAULT_ARGUMENTS)
          evaluate(polarities, lexicon, eval_words, tern=False)
          print ("Hist")
          polarities = run_method(positive_seeds, negative_seeds, 
                    hist_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
                    method=polarity_induction_methods.densify, 
                    lr=lr, regularization_strength=reg,
                    **DEFAULT_ARGUMENTS)
          evaluate(polarities, lexicon, eval_words, tern=False)


def evaluate_overlap_methods():
    """
    Evaluate different methods on standard English,
    but restrict to words that are present in the 1990s portion of historical data.
    """
    print ("Getting evalution words and embeddings..")
    np.random.seed(0)
    lexicon = lexicons.load_lexicon("inquirer", remove_neutral=False)
    kuperman = lexicons.load_lexicon("kuperman", remove_neutral=False)
    eval_words = set(lexicon.keys())

    # load in WordNet lexicon and pad with zeros for missing words
    # (since these are implicitly zero for this method)
    qwn = lexicons.load_lexicon("qwn-scores")
    for word in lexicon:
        if not word in qwn:
            qwn[word] = 0

    positive_seeds, negative_seeds = seeds.hist_seeds()

#    common_embed = create_representation("GIGA", constants.COMMON_EMBEDDINGS, 
#            eval_words.union(positive_seeds).union(negative_seeds))
#    common_words = set(common_embed.iw)
#    eval_words = eval_words.intersection(common_words)

    hist_embed = create_representation("SVD", constants.COHA_EMBEDDINGS + "2000")
    hist_counts = create_representation("Explicit", constants.COHA_COUNTS + "2000", normalize=False)
    hist_words = set(hist_embed.iw)
    eval_words = eval_words.intersection(hist_words)

    eval_words = [word for word in eval_words
            if not word in positive_seeds 
            and not word in negative_seeds] 

    hist_counts = hist_counts.get_subembed(set(eval_words).union(positive_seeds).union(negative_seeds), 
            restrict_context=False)

    print( "Evaluating with ", len(eval_words), "out of", len(lexicon))

    print ("PMI")
    polarities = run_method(positive_seeds, negative_seeds,
            hist_counts,
            method=polarity_induction_methods.bootstrap,
            score_method=polarity_induction_methods.pmi,
            **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)

    print
    evaluate(qwn, lexicon, eval_words, tau_lexicon=kuperman)

    print( "SentProp with 1990s Fic embeddings")
    polarities = run_method(positive_seeds, negative_seeds, 
                        hist_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
                        method=polarity_induction_methods.bootstrap,
                        score_method=polarity_induction_methods.random_walk, 
                        nn=25, beta=0.9,
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)
    print
    
    print( "Densifier with 1990s Fic embeddings")
    polarities = run_method(positive_seeds, negative_seeds, 
                        hist_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
                        method=polarity_induction_methods.bootstrap,
                        score_method=polarity_induction_methods.densify,
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)
    

    print ("Velikovich with 1990s Fic embeddings")
    hist_counts.normalize()
    polarities = run_method(positive_seeds, negative_seeds, 
                        hist_counts,
                        method=polarity_induction_methods.bootstrap,
                        score_method=polarity_induction_methods.graph_propagate,
                        T=3,
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)
    

#    print "SentProp with CC"
#    polarities = run_method( positive_seeds, negative_seeds, 
#                        common_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
#                        method=polarity_induction_methods.bootstrap,
#                        score_method=polarity_induction_methods.random_walk,
#                        beta=0.99, nn=10,
#                        **DEFAULT_ARGUMENTS)
#    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)
#
#    print "Densifier with CC"
#    polarities = run_method( positive_seeds, negative_seeds, 
#                        common_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
#                        method=polarity_induction_methods.bootstrap,
#                        score_method=polarity_induction_methods.densify,
#                        **DEFAULT_ARGUMENTS)
#    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)



def evaluate_adj_methods():
    """
    Evaluate different methods on standard English,
    but restrict to words that are present in the 1990s portion of historical data.
    """
    print ("Getting evalution words and embeddings..")
    np.random.seed(0)
    lexicon = lexicons.load_lexicon("inquirer", remove_neutral=False)
    kuperman = lexicons.load_lexicon("kuperman", remove_neutral=False)
    eval_words = set(lexicon.keys())
    adjs = vocab.pos_words("1990", "ADJ")

    # load in WordNet lexicon and pad with zeros for missing words
    # (since these are implicitly zero for this method)
    qwn = lexicons.load_lexicon("qwn-scores")
    for word in lexicon:
        if not word in qwn:
            qwn[word] = 0

    positive_seeds, negative_seeds = seeds.adj_seeds()

    common_embed = create_representation("GIGA", constants.COMMON_EMBEDDINGS, 
            eval_words.union(positive_seeds).union(negative_seeds))
    common_words = set(common_embed.iw)
    eval_words = eval_words.intersection(common_words)

    hist_embed = create_representation("SVD", constants.COHA_EMBEDDINGS + "2000")
    hist_counts = create_representation("Explicit", constants.COUNTS + "1990", normalize=False)
    hist_words = set(hist_embed.iw)
    eval_words = eval_words.intersection(hist_words)

    embed_words = [word for word in adjs if word in hist_words and word in common_words]
    eval_words = [word for word in eval_words if word in embed_words
            and not word in positive_seeds 
            and not word in negative_seeds] 
    
    hist_counts = hist_counts.get_subembed(set(eval_words).union(positive_seeds).union(negative_seeds), 
            restrict_context=False)

    print ("Evaluating with ", len(eval_words), "out of", len(lexicon))
    print ("Embeddings with ", len(embed_words))

    print ("PMI")
    polarities = run_method(positive_seeds, negative_seeds,
            hist_counts,
            method=polarity_induction_methods.bootstrap,
            score_method=polarity_induction_methods.pmi,
            boot_size=6,
            **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)

    
    evaluate(qwn, lexicon, eval_words, tau_lexicon=kuperman)

    print ("Dist with 1990s Fic embeddings")
    polarities = run_method(positive_seeds, negative_seeds, 
                        hist_embed.get_subembed(set(embed_words).union(negative_seeds).union(positive_seeds)),
                        method=polarity_induction_methods.dist, 
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)
    

    print( "Densifier with 1990s Fic embeddings")
    polarities = run_method(positive_seeds, negative_seeds, 
                        hist_embed.get_subembed(set(embed_words).union(negative_seeds).union(positive_seeds)),
                        method=polarity_induction_methods.bootstrap, 
                        score_method=polarity_induction_methods.densify, 
                        boot_size=6,
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)
    

    print( "SentProp with 1990s Fic embeddings")
    polarities = run_method(positive_seeds, negative_seeds, 
                        hist_embed.get_subembed(set(embed_words).union(negative_seeds).union(positive_seeds)),
                        method=polarity_induction_methods.bootstrap, 
                        nn=25, beta=0.9,
                        boot_size=6,
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)
    

    print ("Velikovich with 1990s Fic embeddings")
    hist_counts.normalize()
    polarities = run_method(positive_seeds, negative_seeds, 
                        hist_counts,
                        method=polarity_induction_methods.bootstrap, 
                        score_method=polarity_induction_methods.graph_propagate,
                        T=3,
                        boot_size=6,
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)
    

    print ("SentProp with CC")
    polarities = run_method( positive_seeds, negative_seeds, 
                        common_embed.get_subembed(set(embed_words).union(negative_seeds).union(positive_seeds)),
                        method=polarity_induction_methods.bootstrap, 
                        score_method=polarity_induction_methods.random_walk,
                        beta=0.99, nn=10,
                        boot_size=6,
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)

    print ("Densifier with CC")
    polarities = run_method( positive_seeds, negative_seeds, 
                        common_embed.get_subembed(set(embed_words).union(negative_seeds).union(positive_seeds)),
                        method=polarity_induction_methods.bootstrap, 
                        score_method=polarity_induction_methods.densify,
                        boot_size=6,
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=kuperman)


def evaluate_finance_methods():
    np.random.seed(0)
    print ("Getting evalution words and embeddings..")
    gi = lexicons.load_lexicon("inquirer", remove_neutral=False)
    lexicon = lexicons.load_lexicon("finance", remove_neutral=True)

    ### padding in neutrals from GI lexicon
    gi_neut = [word for word in gi if gi[word] == 0]
    gi_neut = np.random.choice(gi_neut, int( (float(len(gi_neut))/(len(gi)-len(gi_neut)) * len(lexicon))))
    for word in gi_neut:
        lexicon[word] = 0
    positive_seeds, negative_seeds = seeds.finance_seeds()
    stock_embed = create_representation("SVD", constants.STOCK_EMBEDDINGS)
    stock_counts = create_representation("Explicit", constants.STOCK_COUNTS)
    common_embed = create_representation("GIGA", constants.COMMON_EMBEDDINGS, set(lexicon.keys()).union(positive_seeds).union(negative_seeds))

    stock_words = set(stock_embed.iw)
    common_words = set(common_embed)
    eval_words = [word for word in lexicon if word in stock_words and
            word in common_words and
            not word in positive_seeds and  
            not word in negative_seeds]

    stock_counts = stock_counts.get_subembed(set(eval_words).union(positive_seeds).union(negative_seeds), restrict_context=False)

    print ("Evaluating with ", len(eval_words), "out of", len(lexicon))

    print ("Velikovich with 1990s Fic embeddings")
    stock_counts.normalize()
    polarities = run_method(positive_seeds, negative_seeds, 
                        stock_counts,
                        method=polarity_induction_methods.bootstrap, 
                        score_method=polarity_induction_methods.graph_propagate,
                        T=3,
                        boot_size=6,
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=None)


    print ("PMI")
    polarities = run_method(positive_seeds, negative_seeds,
            stock_counts,
            method=polarity_induction_methods.bootstrap, 
            score_method=polarity_induction_methods.pmi,
            **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words)
    print

    print( "SentProp with stock embeddings")
    polarities = run_method(positive_seeds, negative_seeds, 
                        stock_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
                        method=polarity_induction_methods.bootstrap, 
                        beta=0.9, nn=25,
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words)

    print ("Densifier with stock embeddings")
    polarities = run_method(positive_seeds, negative_seeds, 
                        stock_embed.get_subembed(set(eval_words).union(negative_seeds).union(positive_seeds)),
                        method=polarity_induction_methods.bootstrap, 
                        score_method=polarity_induction_methods.densify, 
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words)


def evaluate_twitter_methods():
    np.random.seed(0)

    print ("Getting evalution words and embeddings..")
    gi = lexicons.load_lexicon("inquirer", remove_neutral=False)
    lexicon = lexicons.load_lexicon("twitter", remove_neutral=True)
    scores = lexicons.load_lexicon("twitter-scores", remove_neutral=True)
    sent140 = lexicons.load_lexicon("140-scores", remove_neutral=False)

    # padding lexicon with neutral from GI
    gi_neut = [word for word in gi if gi[word] == 0]
    gi_neut = np.random.choice(gi_neut, int( (float(len(gi_neut))/(len(gi)-len(gi_neut)) * len(lexicon))))
    for word in gi_neut:
        lexicon[word] = 0

    positive_seeds, negative_seeds = seeds.twitter_seeds()
    embed = create_representation("GIGA", constants.TWITTER_EMBEDDINGS, set(lexicon.keys()).union(positive_seeds).union(negative_seeds))
    print len((set(positive_seeds).union(negative_seeds)).intersection(embed.iw))
    embed_words = set(embed.iw)
    s140_words = set(sent140.keys())
    eval_words = [word for word in lexicon if word in s140_words and
            not word in positive_seeds 
            and not word in negative_seeds
            and word in embed_words] 

    print ("Evaluating with ", len(eval_words), "out of", len(lexicon))

    print( "Sentiment 140")
    evaluate(sent140, lexicon, eval_words, tau_lexicon=scores)
    print

    print( "SentProp")
    polarities = run_method(positive_seeds, negative_seeds, 
                        embed,
                        method=polarity_induction_methods.bootstrap, 
                        score_method=polarity_induction_methods.densify,
                        lr=0.01, regularization_strength=0.5,
                        **DEFAULT_ARGUMENTS)
    util.write_pickle(polarities, "twitter-test.pkl")
    evaluate(polarities, lexicon, eval_words, tau_lexicon=scores)

    print ("SentProp")
    polarities = run_method(positive_seeds, negative_seeds, 
                        embed,
                        method=polarity_induction_methods.bootstrap, 
                        score_method=polarity_induction_methods.random_walk,
                        beta=0.9, nn=25,
                        **DEFAULT_ARGUMENTS)
    evaluate(polarities, lexicon, eval_words, tau_lexicon=scores)


def run_method(positive_seeds, negative_seeds, embeddings, transform_embeddings=False, post_densify=False,
        method=polarity_induction_methods.densify, **kwargs):
    if transform_embeddings:
        print ("Transforming embeddings...")
        embeddings = embedding_transformer.apply_embedding_transformation(embeddings, positive_seeds, negative_seeds, n_dim=50)
    if post_densify:
        polarities = method(embeddings, positive_seeds, negative_seeds, **kwargs)
        top_pos = [word for word in 
                sorted(polarities, key = lambda w : -polarities[w])[:150]]
        top_neg = [word for word in 
                sorted(polarities, key = lambda w : polarities[w])[:150]]
        top_pos.extend(positive_seeds)
        top_neg.extend(negative_seeds)
        return polarity_induction_methods.densify(embeddings, top_pos, top_neg)
    positive_seeds = [s for s in positive_seeds if s in embeddings]
    negative_seeds = [s for s in negative_seeds if s in embeddings]
    return method(embeddings, positive_seeds, negative_seeds, **kwargs)


def print_polarities(polarities, lexicon):
    for w, p in sorted(polarities.items(), key=itemgetter(1), reverse=True):
        print (util.GREEN if lexicon[w] == 1 else util.RED) + \
              "{:}: {:0.5f}".format(w, p) + util.ENDC

def evaluate(polarities, lexicon, eval_words, tau_lexicon=None, tern=True):
    acc, auc, avg_prec = binary_metrics(polarities, lexicon, eval_words)
    if auc < 0.5:
        polarities = {word:-1*polarities[word] for word in polarities}
        acc, auc, avg_prec = binary_metrics(polarities, lexicon, eval_words)
    print ("Binary metrics:")
    print( "==============")
    print ("Accuracy with optimal threshold: {:.4f}".format(acc))
    print ("ROC AUC Score: {:.4f}".format(auc))
    print ("Average Precision Score: {:.4f}".format(avg_prec))
    
    if not tern:
        return 
    tau, cmn_f1, maj_f1, conf_mat = ternary_metrics(polarities, lexicon, eval_words, tau_lexicon=tau_lexicon)
    print ("Ternary metrics:")
    print( "==============")
    print ("Majority macro F1 baseline {:.4f}".format(maj_f1))
    print ("Macro F1 with cmn threshold: {:.4f}".format(cmn_f1))
    if tau:
        print ("Kendall Tau {:.4f}".format(tau))
    print ("Confusion matrix: ")
    print conf_mat
    print( "Neg :", float(conf_mat[0,0]) / np.sum(conf_mat[0,:]))
    print ("Neut :", float(conf_mat[1,1]) / np.sum(conf_mat[1,:]))
    print ("Pos :", float(conf_mat[2,2]) / np.sum(conf_mat[2,:]))
    print
    if tau:
        print( "Latex table line: {:2.1f} & {:2.1f} & {:.2f}\\\\".format(100*auc, 100*cmn_f1, tau))
    else:
        print ("Latex table line: {:2.1f} & {:2.1f}\\\\".format(100*auc, 100*cmn_f1))


def binary_metrics(polarities, lexicon, eval_words, print_predictions=False, top_perc=None):
    eval_words = [word for word in eval_words if lexicon[word] != 0]
    y_prob, y_true = [], []
    if top_perc:
        polarities = {word:polarities[word] for word in 
                sorted(eval_words, key = lambda w : abs(polarities[w]-0.5), reverse=True)[:int(top_perc*len(polarities))]}
    else:
        polarities = {word:polarities[word] for word in eval_words}
    for w in polarities:
        y_prob.append(polarities[w])
        y_true.append(1 + lexicon[w] / 2)

    n = len(y_true)
    ordered_labels = [y_true[i] for i in sorted(range(n), key=lambda i: y_prob[i])]
    positive = sum(ordered_labels)
    cumsum = np.cumsum(ordered_labels)
    best_accuracy = max([(1 + i - cumsum[i] + positive - cumsum[i]) / float(n) for i in range(n)])

    return best_accuracy, roc_auc_score(y_true, y_prob), average_precision_score(y_true, y_prob)

def ternary_metrics(polarities, lexicon, eval_words, tau_lexicon=None):
    if not tau_lexicon == None:
        kendall_words = list(set(eval_words).intersection(tau_lexicon))
    y_prob, y_true = [], []
    polarities = {word:polarities[word] for word in eval_words}
    for w in polarities:
        y_prob.append(polarities[w])
        y_true.append(lexicon[w])
    y_prob = np.array(y_prob)
    y_true = np.array(y_true)
    y_prob = 2*(y_prob - np.min(y_prob)) / (np.max(y_prob) - np.min(y_prob)) - 1
    neg_prop = np.sum(np.array(lexicon.values()) == -1) / float(len(lexicon))
    pos_prop = np.sum(np.array(lexicon.values()) == 1) / float(len(lexicon))
    sorted_probs = sorted(y_prob)
    neg_thresh = sorted_probs[int(np.round(neg_prop*len(sorted_probs)))]
    pos_thresh = sorted_probs[-int(np.round(pos_prop*len(sorted_probs)))]
    cmn_labels = [1 if val >= pos_thresh else -1 if val <= neg_thresh else 0 for val in y_prob]
    if not tau_lexicon == None:
        tau = kendalltau(*zip(*[(polarities[word], tau_lexicon[word]) for word in kendall_words]))[0]
    else:
        tau = None
    maj_f1 = f1_score(y_true, np.repeat(sp.stats.mode(y_true)[0][0], len(y_true)), average="macro")
    cmn_f1 = f1_score(y_true, cmn_labels, average="macro")
    label_func = lambda entry : 1 if entry > pos_thresh else -1 if entry < neg_thresh else 0
    conf_mat = confusion_matrix(y_true, [label_func(entry) for entry in y_prob])
    return tau, cmn_f1, maj_f1, conf_mat

def optimal_tern_acc(polarities, lexicon, eval_words, threshes=np.arange(0.95, 0.0, -0.01)):
    """
    Performs grid search to determine optimal ternary accuracy.
    """
    y_prob, y_true = [], []
    polarities = {word:polarities[word] for word in eval_words}
    for w in polarities:
        y_prob.append(polarities[w])
        y_true.append(lexicon[w])
    y_prob = np.array(y_prob)
    y_true = np.array(y_true)
    y_prob = 2*(y_prob - np.min(y_prob)) / (np.max(y_prob) - np.min(y_prob)) - 1
    f1s = np.zeros((len(threshes)**2,))
    for i, pos_thresh in enumerate(threshes):
        for k, neg_thresh in enumerate(threshes):
            labels = []
            for j in range(len(y_prob)):
                if y_prob[j] > pos_thresh:
                    labels.append(1)
                elif y_prob[j] < -1*neg_thresh:
                    labels.append(-1)
                else:
                    labels.append(0)
            f1s[i*len(threshes)+k] = f1_score(y_true, labels, average="macro")
    print ("(Oracle) majority baseline {:.4f}".format(
            f1_score(y_true, np.repeat(sp.stats.mode(y_true)[0][0], len(y_true)), average="macro")))
    print ("Accuracy with optimal threshold: {:.4f}".format(np.max(f1s)))
    best_iter = int(np.argmax(f1s))
    pos_thresh = threshes[best_iter / len(threshes)]
    neg_thresh = -1*threshes[best_iter % len(threshes)]
    print ("Optimal positive threshold: {:.4f}".format(pos_thresh))
    print ("Optimal negative threshold: {:.4f}".format(neg_thresh))
    print( "Confusion matrix: ")
    label_func = lambda entry : 1 if entry > pos_thresh else -1 if entry < neg_thresh else 0
    conf_mat = confusion_matrix(y_true, [label_func(entry) for entry in y_prob])
    print( conf_mat)
    print( "Neg :", float(conf_mat[0,0]) / np.sum(conf_mat[0,:]))
    print ("Neut :", float(conf_mat[1,1]) / np.sum(conf_mat[1,:]))
    print ("Pos :", float(conf_mat[2,2]) / np.sum(conf_mat[2,:]))


if __name__ == '__main__':
    random.seed(0)
    if sys.argv[1] == "twitter":
        evaluate_twitter_methods()
    elif sys.argv[1] == "finance":
        evaluate_finance_methods()
    elif sys.argv[1] == "overlap":
        evaluate_overlap_methods()
    elif sys.argv[1] == "adj":
        evaluate_adj_methods()
    elif sys.argv[1] == "hyper":
        hyperparam_eval()
    else:
        evaluate_methods()
