In [1]:
import pickle
import numpy as np
import re

### Loading results
Results have the format:

    <dict> {conf:table_of_results},

where **conf** represents the experimental setting and is the tuple 
    
    (0-remove_stopwords,
     1-remove_punctuation,
     2-termsim_metric,
     3-normalization,
     4-termsim_th,
     5-synsets_taken,
     6-wscheme,
     7-sim_metric)
     
There is another configuration for last experiments with did:
Configuration format
0. Removing stopwords flag  :  [True, False]

1. Removing punctuation flag  :  [True, False]

2. Wordnet similarity metrics  :  ["path", "lch", "wup", "res", "jcn", "lin"]

3. Features extracted to compute similarity  :  ["token", "lemma", "stem", "lemmapos"]

4. Features used to extract synsets  :  ["token", "lemma", "lemmapos"]

5. Information Content used in some WordNet metrics  :  ["bnc_ic_2007", "bnc_ic_2000", "semcor_ic", "brown_ic"]

6. Normalization flag : [True, False]

7. Term-Term similarity minimum threslhold  :  [0.0, 0.25, 0.5, 0.75]            

8. Synsets selection strategy (all-vs-all, first)  :  ["all", "first"]

9. Features weighting scheme  :  ["tf", "binary"]

10. Text similarity method  :  ["mihalcea", "softcosine", "stevenson"]
     
**table_of_results** is a numpy 2d array with scores for each **similarity treshold** in the range **range(0, 101, 5)** sorted by the best accuracy. The **scores** (columns) are:

    scores = (0-threshold,
              1-accuracy,
              2-f_measure,
              3-precision,
              4-recall)

In [2]:
#experiments = pickle.load(open("./Results/results_20170620_complex_normalization.pickle", 'rb'))
experiments = pickle.load(open("./Results/results_20170830_normalization_4termsimTh_Rada_Stevenson_Softcosine.pickle", 'rb'))
len(experiments)

17280

In [3]:
# res = (threshold, accuracy, f_measure, precision, recall)
max([(conf, res[0][0], res[0][1], res[0][2])
     for conf, res in experiments.items()
     #if conf[2] == "path"
    ]
    , key=lambda x:x[2])
#[(conf, res[0][0], res[0][1]) for conf, res in all_scores.items()]

((False,
  True,
  'lin',
  'lemma',
  'lemma',
  'bnc_ic_2007',
  False,
  0.5,
  'all',
  'binary',
  'mihalcea'),
 0.7,
 0.7409224730127576,
 0.8188679245283019)

In [9]:
def extract_best_results(experiments, ktop=3, metric="accuracy", wordnet_metric=None, text_metric=None):
    """Extract best results"""
    # Maps score names to column index in the numpy 2d array
    # containing the results for a particular configuration
    scores_map = {"accuracy":1, "f_measure":2, "precision":3, "recall":4}
    
    best_results = np.zeros((ktop*2, 5))
    corresponding_confs = np.array([(None, None, None, None, None,
                                     None, None, None, None, None, None)]*(ktop*2), dtype=np.dtype('O'))
    #print("best_results:", best_results.shape)
    #print("corresponding_confs:", corresponding_confs.shape)
    #print("Starts\n-------------------")
    for conf, results in experiments.items():
        # Skip text sim metrics distinct to <text_metric>
        if text_metric and conf[10] != text_metric:
            continue
        # Skip wordnet sim metrics distinct to <wordnet_metric>
        if wordnet_metric and conf[2] != wordnet_metric:
            continue
        
        # Sort by desired score in descending order and select best ktop
        sorted_results = results[results[:,scores_map[metric]].argsort()[::-1]][:ktop, :]
        #print("sorted results", sorted_results.shape)
        
        # Configuration of results
        results_conf = np.array([conf]*ktop, dtype=np.dtype('O'))
        #print("results conf", sorted_results.shape)
        corresponding_confs[ktop:,:] = results_conf
        #print("corresponding_confs", corresponding_confs.shape)
        
        #print(corresponding_confs)
                            
        # Vertically stacking best and new results
        best_results[ktop:,:] = sorted_results[:ktop,:]
        #print(best_results)
        
        # Updating the best results if needed
        idx = best_results[:,scores_map[metric]].argsort(axis=0)[::-1]
        
        best_results = best_results[idx,:]
        corresponding_confs = corresponding_confs[idx,:]
        
        
        #print("New best results")
        #print(best_results)
        
        #print("New corresponding configurations")
        #print(corresponding_confs)
        
        # *** Declare best_results with twice the size of ktop and do everthing in-place ***
        # Then compare running times
        
        # Choose the new corresponding configurations to the new best results
        
                            
        #break
        #print("\n")
    
    return best_results[:ktop,:], corresponding_confs[:ktop,:]

In [10]:
bres, bconf = extract_best_results(experiments, ktop=3, metric="accuracy", wordnet_metric=None, text_metric=None)
print(bres)
print()
print(bconf)

[[0.7        0.74092247 0.81886792 0.77575561 0.86705412]
 [0.7        0.74092247 0.81886792 0.77575561 0.86705412]
 [0.7        0.74018646 0.81875749 0.77411003 0.86887032]]

[[False True 'lin' 'lemma' 'lemma' 'bnc_ic_2007' False 0.5 'all' 'binary'
  'mihalcea']
 [False True 'lin' 'lemma' 'lemma' 'bnc_ic_2007' False 0.5 'all' 'tf'
  'mihalcea']
 [False True 'lin' 'stem' 'token' 'bnc_ic_2007' False 0.5 'all' 'tf'
  'mihalcea']]


# Getting some results
In this cell we get the best results for each text similarity metric and 3-top best wordnet metrics

In [55]:
my_results = []
for text_metric in ["mihalcea", "softcosine", "stevenson"]:
    for wordnet_metric in ["path", "lch", "wup", "res", "jcn", "lin"]:
        print("------------------------------------------------------")
        print(text_metric, wordnet_metric)
        res, conf = extract_best_results(experiments,
                                         ktop=1,
                                         metric="accuracy",
                                         wordnet_metric=wordnet_metric,
                                         text_metric=text_metric)
        print(conf)
        print(res)
        print("\n")
        my_results.append(np.concatenate((conf[0,:], res[0,0:3]), axis=-1))

------------------------------------------------------
mihalcea path
[[False True 'path' 'stem' 'token' None False 0.5 'all' 'tf' 'mihalcea']]
[[0.6        0.7392051  0.82095334 0.76538945 0.88521613]]


------------------------------------------------------
mihalcea lch
[[False True 'lch' 'token' 'lemma' None True 0.0 'all' 'tf' 'mihalcea']]
[[0.7        0.73454367 0.82145215 0.7526459  0.90410461]]


------------------------------------------------------
mihalcea wup
[[False True 'wup' 'stem' 'token' None False 0.75 'all' 'tf' 'mihalcea']]
[[0.65       0.73307164 0.82099375 0.75037594 0.90628405]]


------------------------------------------------------
mihalcea res
[[False True 'res' 'lemma' 'lemma' 'brown_ic' True 0.0 'all' 'tf'
  'mihalcea']]
[[0.65       0.73675172 0.8173617  0.7690583  0.87213948]]


------------------------------------------------------
mihalcea jcn
[[False True 'jcn' 'stem' 'token' 'brown_ic' True 0.5 'all' 'tf'
  'mihalcea']]
[[0.55       0.73478901 0.8244844

In [56]:
res = np.array(sorted(my_results, key=lambda x:x[-2], reverse=True))

# Saving results to latex table format

0. Removing stopwords flag  :  [True, False]

1. Removing punctuation flag  :  [True, False]

2. Wordnet similarity metrics  :  ["path", "lch", "wup", "res", "jcn", "lin"]

3. Features extracted to compute similarity  :  ["token", "lemma", "stem", "lemmapos"]

4. Features used to extract synsets  :  ["token", "lemma", "stem", "lemmapos"]

5. Information Content used in some WordNet metrics  :  ["bnc_ic_2007", "bnc_ic_2000", "semcor_ic", "brown_ic"]

6. Normalization flag : [True, False]

7. Term-Term similarity minimum threslhold  :  [0.0, 0.25, 0.5, 0.75]            

8. Synsets selection strategy (all-vs-all, first)  :  ["all", "first"]

9. Features weighting scheme  :  ["tf", "binary"]

10. Text similarity method  :  ["mihalcea", "softcosine", "stevenson"]

11. Text similarity threshold : Real

12. Accuracy : Real

13. F1-score : Real

In [57]:
ic_map = {None: "", "bnc_ic_2007": "bnc07", "bnc_ic_2000": "bnc00", "semcor_ic":"semcor", "brown_ic":"brown"}
ss_map = {"first":"1", "all":"n"}
ws_map = {"tf":"tf", "binary":"bin"}
with open("latex_out.txt", "w") as fid:
    for row in res:
        line = "{0} & {1} & {3} & {9} & {4} & {8} & {2} & {5} & {6} & {7:.2f} & {10} & {11:.2f} & {12:.3f} & {13:.3f}".format(
            r"\checkmark" if row[0] else "",    
            r"\checkmark" if row[1] else "",
            row[2],
            row[3],
            row[4],
            ic_map[row[5]],
            r"\checkmark" if row[6] else "",
            row[7],
            ss_map[row[8]],
            ws_map[row[9]],
            row[10],
            row[11],
            row[12],
            row[13]
        )+"\\\\\n"
        #line = " & ".join([str(x) for x in row]) +"\\\\\n"
        fid.write(line)

In [58]:
test_configurations = []
for row in res:
    test_configurations.append(tuple(row[:-3]))

In [59]:
with open("test_configurations.pickle", "wb") as fid:
    pickle.dump(test_configurations, fid)

# Analyzing test scores

In [63]:
test_scores = pickle.load(open("./Results/test_results_20180504.pickle","rb"))

In [64]:
test_scores

{(False,
  True,
  'jcn',
  'stem',
  'token',
  'brown_ic',
  True,
  0.5,
  'all',
  'tf',
  'mihalcea'): array([[0.55      , 0.74086957, 0.82627283, 0.7454418 , 0.92676548],
        [0.6       , 0.73333333, 0.81023102, 0.76898982, 0.85614647],
        [0.5       , 0.72463768, 0.82439926, 0.7156611 , 0.97210113],
        [0.45      , 0.70956522, 0.81984898, 0.69767442, 0.99389712],
        [0.65      , 0.70318841, 0.77081468, 0.79208832, 0.75065388],
        [0.4       , 0.68463768, 0.8079096 , 0.67893175, 0.99738448],
        [0.35      , 0.67246377, 0.80224011, 0.67017544, 0.99912816],
        [0.3       , 0.66608696, 0.79930314, 0.66569936, 1.        ],
        [0.25      , 0.66550725, 0.79902473, 0.66531323, 1.        ],
        [0.1       , 0.66492754, 0.79874652, 0.66492754, 1.        ],
        [0.15      , 0.66492754, 0.79874652, 0.66492754, 1.        ],
        [0.2       , 0.66492754, 0.79874652, 0.66492754, 1.        ],
        [0.05      , 0.66492754, 0.79874652, 0.664927

In [69]:
ic_map = {None: "", "bnc_ic_2007": "bnc07", "bnc_ic_2000": "bnc00", "semcor_ic":"semcor", "brown_ic":"brown"}
ss_map = {"first":"1", "all":"n"}
ws_map = {"tf":"tf", "binary":"bin"}
new_scores = []
for old_row in res:
    conf = tuple(old_row[:-3])
    scores_mat = test_scores[conf]
    scores = [x for x in scores_mat if x[0] == old_row[-3]][0]
    new_scores.append((conf[-1], conf[2], scores[1], scores[2]))
new_scores.sort(key=lambda x:x[2], reverse=True)
with open("latex_test_out.txt", "w") as fid:
    for row in new_scores:
        line = "{0} & {1} & {2:.3f} & {3:.3f} \\\\\n".format(*row)
        fid.write(line)