Skip to content

Commit

Permalink
Minor function changes to useful functions and preprocessor and summa…
Browse files Browse the repository at this point in the history
…rising the paper of this research
  • Loading branch information
EdCo95 committed Jul 16, 2017
1 parent a0443f4 commit 5e4b6ad
Show file tree
Hide file tree
Showing 9 changed files with 118 additions and 4 deletions.
14 changes: 14 additions & 0 deletions DataTools/DataPreprocessing/AbstractNetPreprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,20 @@ def prepare_for_summarisation(self, filename, visualise=False):

prev_section = ""

try:
bow = self.paper_bags_of_words[filename]
except KeyError:
paper_str = useful_functions.read_in_paper(filename)
paper_str = " ".join([val for _, val in paper_str.iteritems()]).lower()
paper_bag_of_words = useful_functions.calculate_bag_of_words(paper_str)
self.paper_bags_of_words[filename] = paper_bag_of_words

try:
kf = self.keyphrases[filename]
except KeyError:
kfs = raw_paper["KEYPHRASES"]
self.keyphrases[filename] = kfs

for sentence, section in sentences:

sentence_vector = useful_functions.sentence2vec(sentence, self.word2vec)
Expand Down
12 changes: 12 additions & 0 deletions DataTools/useful_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,18 @@ def calculate_title_score(sentence, title):

return score

def calculate_bag_of_words(paper_string):
"""
Calculates the bag of words representation of a paper and returns a defaultdict.
:param paper_string: the paper in string representation.
:return: the paper's bag of words representation as a defaultdict.
"""
bow = defaultdict(int)
for word in paper_string.split():
bow[word] += 1

return bow


def bag_of_words_score(sentence, paper_bag_of_words):
"""
Expand Down
5 changes: 5 additions & 0 deletions Summarisers/AbstractRougeSummariser.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ def prepare_paper(self, filename):
# Paper Two: S0141938215300044.txt
# Paper Three: S0142694X15000423.txt
summ = AbstractRougeSummariser()

summ.summarise("our_paper.txt")

wait()

#summ.summarise("S0142694X15000423.txt")

count = 0
Expand Down
9 changes: 8 additions & 1 deletion Summarisers/EnsembleSummariser.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,9 @@ def summarise(self, filename, visualise=False):

final_sents_probs = [x for x in reversed(sorted(final_sents_probs, key=itemgetter(2)))]
final_sents_probs = sorted(final_sents_probs, key=itemgetter(-1))
return final_sents_probs

if visualise:
return final_sents_probs

#summary = final_sents_probs[0:self.summary_length]

Expand Down Expand Up @@ -314,6 +316,11 @@ def prepare_paper(self, filename, visualise=False):
# Paper Two: S0141938215300044.txt
# Paper Three: S0142694X15000423.txt
summ = EnsembleSummariser()

summ.summarise("our_paper.txt")

wait()

#summ.summarise("S0142694X15000423.txt")
#summ.summarise("S0142694X15000423.txt")
#sys.exit()
Expand Down
4 changes: 4 additions & 0 deletions Summarisers/EnsembleV2Summariser.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,10 @@ def prepare_paper(self, filename):
# Paper Two: S0141938215300044.txt
# Paper Three: S0142694X15000423.txt
summ = EnsembleV2Summariser()
summ.summarise("our_paper.txt")

wait()

#summ.summarise("S0142694X15000423.txt")
#summ.summarise("S0142694X15000423.txt")
#sys.exit()
Expand Down
29 changes: 29 additions & 0 deletions Visualisations/EnsembleVisOurs_index.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Visualisations/EnsembleVis_index.html

Large diffs are not rendered by default.

18 changes: 16 additions & 2 deletions Visualisations/ensemble_visualiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,20 @@
from sklearn import linear_model
from Dev.Evaluation.rouge import Rouge

NAME = "EnsembleVis"
NAME = "EnsembleVisOurs"

def heatmap(value):
h = (1.0 - value) * 240
return "hsla(" + str(h) + ", 100%, 50%, 0.5)"

def heatmap_simple(value):
h = (1.0 - value) * 240
return "hsla(0, 100%, 50%, 0.5)"

def opacitymap(value):
a = value
return "hsla(0, 100%, 50%, " + str(a) + ")"

with open(BASE_DIR + "/Visualisations/base_html.txt", "rb") as f:
html = f.readlines()

Expand All @@ -43,6 +51,8 @@ def heatmap(value):

filename = "S0003687013000562.txt"

filename = "our_paper.txt"

paper = useful_functions.read_in_paper(filename, sentences_as_lists=True, preserve_order=True)

html.append("<h1>" + " ".join(paper["MAIN-TITLE"][0][0]) + "</h1>")
Expand All @@ -57,6 +67,10 @@ def heatmap(value):
print("Reading stuff...")
bag_of_words = defaultdict(float)
for key, val in paper.iteritems():

if not val:
continue

sents = val[0]
for sent in sents:
for word in sent:
Expand Down Expand Up @@ -106,7 +120,7 @@ def heatmap(value):
p_open = True

if prob > 0.5:
html.append("<span style=\"background-color:" + heatmap(prob) + "\">&nbsp" + " ".join(sentence) + " </span>")
html.append("<span style=\"background-color:" + heatmap_simple(prob) + "\">&nbsp" + " ".join(sentence) + " </span>")
else:
html.append(" ".join(sentence))

Expand Down
29 changes: 29 additions & 0 deletions Visualisations/summariser_demo.html

Large diffs are not rendered by default.

0 comments on commit 5e4b6ad

Please sign in to comment.