Minor function changes to useful functions and preprocessor and summa…

…rising the paper of this research
EdCo95 · Jul 16, 2017 · 5e4b6ad · 5e4b6ad
1 parent a0443f4
commit 5e4b6ad
Show file tree

Hide file tree

Showing 9 changed files with 118 additions and 4 deletions.
diff --git a/DataTools/DataPreprocessing/AbstractNetPreprocessor.py b/DataTools/DataPreprocessing/AbstractNetPreprocessor.py
@@ -123,6 +123,20 @@ def prepare_for_summarisation(self, filename, visualise=False):
 
         prev_section = ""
 
+        try:
+            bow = self.paper_bags_of_words[filename]
+        except KeyError:
+            paper_str = useful_functions.read_in_paper(filename)
+            paper_str = " ".join([val for _, val in paper_str.iteritems()]).lower()
+            paper_bag_of_words = useful_functions.calculate_bag_of_words(paper_str)
+            self.paper_bags_of_words[filename] = paper_bag_of_words
+
+        try:
+            kf = self.keyphrases[filename]
+        except KeyError:
+            kfs = raw_paper["KEYPHRASES"]
+            self.keyphrases[filename] = kfs
+
         for sentence, section in sentences:
 
             sentence_vector = useful_functions.sentence2vec(sentence, self.word2vec)

diff --git a/DataTools/useful_functions.py b/DataTools/useful_functions.py
@@ -539,6 +539,18 @@ def calculate_title_score(sentence, title):
 
     return score
 
+def calculate_bag_of_words(paper_string):
+    """
+    Calculates the bag of words representation of a paper and returns a defaultdict.
+    :param paper_string: the paper in string representation.
+    :return: the paper's bag of words representation as a defaultdict.
+    """
+    bow = defaultdict(int)
+    for word in paper_string.split():
+        bow[word] += 1
+
+    return bow
+
 
 def bag_of_words_score(sentence, paper_bag_of_words):
     """

diff --git a/Summarisers/AbstractRougeSummariser.py b/Summarisers/AbstractRougeSummariser.py
@@ -119,6 +119,11 @@ def prepare_paper(self, filename):
     # Paper Two: S0141938215300044.txt
     # Paper Three: S0142694X15000423.txt
     summ = AbstractRougeSummariser()
+
+    summ.summarise("our_paper.txt")
+
+    wait()
+
     #summ.summarise("S0142694X15000423.txt")
 
     count = 0

diff --git a/Summarisers/EnsembleSummariser.py b/Summarisers/EnsembleSummariser.py
@@ -183,7 +183,9 @@ def summarise(self, filename, visualise=False):
 
         final_sents_probs = [x for x in reversed(sorted(final_sents_probs, key=itemgetter(2)))]
         final_sents_probs = sorted(final_sents_probs, key=itemgetter(-1))
-        return final_sents_probs
+
+        if visualise:
+            return final_sents_probs
 
         #summary = final_sents_probs[0:self.summary_length]
 
@@ -314,6 +316,11 @@ def prepare_paper(self, filename, visualise=False):
     # Paper Two: S0141938215300044.txt
     # Paper Three: S0142694X15000423.txt
     summ = EnsembleSummariser()
+
+    summ.summarise("our_paper.txt")
+
+    wait()
+
     #summ.summarise("S0142694X15000423.txt")
     #summ.summarise("S0142694X15000423.txt")
     #sys.exit()

diff --git a/Summarisers/EnsembleV2Summariser.py b/Summarisers/EnsembleV2Summariser.py
@@ -298,6 +298,10 @@ def prepare_paper(self, filename):
     # Paper Two: S0141938215300044.txt
     # Paper Three: S0142694X15000423.txt
     summ = EnsembleV2Summariser()
+    summ.summarise("our_paper.txt")
+
+    wait()
+
     #summ.summarise("S0142694X15000423.txt")
     #summ.summarise("S0142694X15000423.txt")
     #sys.exit()

diff --git a/Visualisations/EnsembleVisOurs_index.html b/Visualisations/EnsembleVisOurs_index.html
diff --git a/Visualisations/EnsembleVis_index.html b/Visualisations/EnsembleVis_index.html
diff --git a/Visualisations/ensemble_visualiser.py b/Visualisations/ensemble_visualiser.py
@@ -15,12 +15,20 @@
 from sklearn import linear_model
 from Dev.Evaluation.rouge import Rouge
 
-NAME = "EnsembleVis"
+NAME = "EnsembleVisOurs"
 
 def heatmap(value):
     h = (1.0 - value) * 240
     return "hsla(" + str(h) + ", 100%, 50%, 0.5)"
 
+def heatmap_simple(value):
+    h = (1.0 - value) * 240
+    return "hsla(0, 100%, 50%, 0.5)"
+
+def opacitymap(value):
+    a = value
+    return "hsla(0, 100%, 50%, " + str(a) + ")"
+
 with open(BASE_DIR + "/Visualisations/base_html.txt", "rb") as f:
     html = f.readlines()
 
@@ -43,6 +51,8 @@ def heatmap(value):
 
 filename = "S0003687013000562.txt"
 
+filename = "our_paper.txt"
+
 paper = useful_functions.read_in_paper(filename, sentences_as_lists=True, preserve_order=True)
 
 html.append("<h1>" + " ".join(paper["MAIN-TITLE"][0][0]) + "</h1>")
@@ -57,6 +67,10 @@ def heatmap(value):
 print("Reading stuff...")
 bag_of_words = defaultdict(float)
 for key, val in paper.iteritems():
+
+    if not val:
+        continue
+
     sents = val[0]
     for sent in sents:
         for word in sent:
@@ -106,7 +120,7 @@ def heatmap(value):
             p_open = True
 
         if prob > 0.5:
-            html.append("<span style=\"background-color:" + heatmap(prob) + "\">&nbsp" + " ".join(sentence) + " </span>")
+            html.append("<span style=\"background-color:" + heatmap_simple(prob) + "\">&nbsp" + " ".join(sentence) + " </span>")
         else:
             html.append(" ".join(sentence))
 

diff --git a/Visualisations/summariser_demo.html b/Visualisations/summariser_demo.html