<a href="https://colab.research.google.com/github/GeorgeM2000/CANE/blob/master/code/Automatic_Keyword_Extraction_for_Citation_Graphs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Libraries & Tools***

In [None]:
!pip install yake

In [None]:
!pip install pytextrank

In [13]:
import pandas as pd
import numpy as np
import nltk
from itertools import chain, groupby, product
from enum import Enum
from typing import Callable, DefaultDict, Dict, List, Optional, Set, Tuple
from typing import Any
import string
from collections import Counter, defaultdict
import spacy
from nltk.corpus import stopwords
import yake
from sklearn.feature_extraction.text import TfidfVectorizer
import pytextrank
import gc
from tqdm import tqdm
import zipfile

In [None]:
nltk.data.path.append("...")

In [8]:
zip_file_path = '/content/data.zip'
extract_to = '/content/'

# Open and extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print("Extraction complete!")

Extraction complete!


# ***Abstracts Retrieval***

In [9]:
def extract_abstracts(file_path):
    # Read the contents of the file
    with open(file_path, 'r') as file:
        abstracts = file.readlines()

   # Remove any leading or trailing whitespace characters from each line
    abstracts = [abstract.strip() for abstract in abstracts if abstract.strip()]

    # Track the number of abstracts
    num_abstracts = len(abstracts)

    return abstracts, num_abstracts

# Example usage
file_path = 'data.txt'
abstracts, num_abstracts = extract_abstracts(file_path)

# Display the number of extracted abstracts
print(f'Number of extracted abstracts: {num_abstracts}')

Number of extracted abstracts: 88281


In [None]:
abstracts

# ***Keyword Extraction***

## RAKE

In [None]:
# Readability type definitions.
Word = str
Sentence = str
Phrase = Tuple[str, ...]

In [None]:
class Metric(Enum):
    """Different metrics that can be used for ranking."""

    DEGREE_TO_FREQUENCY_RATIO = 0  # Uses d(w)/f(w) as the metric
    WORD_DEGREE = 1  # Uses d(w) alone as the metric
    WORD_FREQUENCY = 2  # Uses f(w) alone as the metric

In [None]:
class Rake:
    """Rapid Automatic Keyword Extraction Algorithm."""

    def __init__(
        self,
        stopwords: Optional[Set[str]] = None,
        punctuations: Optional[Set[str]] = None,
        language: str = 'english',
        ranking_metric: Metric = Metric.DEGREE_TO_FREQUENCY_RATIO,
        max_length: int = 100000,
        min_length: int = 1,
        include_repeated_phrases: bool = True,
        sentence_tokenizer: Optional[Callable[[str], List[str]]] = None,
        word_tokenizer: Optional[Callable[[str], List[str]]] = None
    ):
        """Constructor.

        :param stopwords: Words to be ignored for keyword extraction.
        :param punctuations: Punctuations to be ignored for keyword extraction.
        :param language: Language to be used for stopwords.
        :param max_length: Maximum limit on the number of words in a phrase
                           (Inclusive. Defaults to 100000)
        :param min_length: Minimum limit on the number of words in a phrase
                           (Inclusive. Defaults to 1)
        :param include_repeated_phrases: If phrases repeat in phrase list consider
                            them as is without dropping any phrases for future
                            calculations. (Defaults to True) Ex: "Magic systems is
                            a company. Magic systems was founded by Raul".

                            If repeated phrases are allowed phrase list would be
                            [
                                (magic, systems), (company,), (magic, systems),
                                (founded,), (raul,)
                            ]

                            If they aren't allowed phrase list would be
                            [
                                (magic, systems), (company,),
                                (founded,), (raul,)
                            ]
        :param sentence_tokenizer: Tokenizer used to tokenize the text string into sentences.
        :param word_tokenizer: Tokenizer used to tokenize the sentence string into words.
        """

        # By default use degree to frequency ratio as the metric.
        if isinstance(ranking_metric, Metric):
            self.metric = ranking_metric
        else:
            self.metric = Metric.DEGREE_TO_FREQUENCY_RATIO

        # If stopwords not provided we use language stopwords by default.
        self.stopwords: Set[str]
        if stopwords:
            self.stopwords = stopwords
        else:
            self.stopwords = set(nltk.corpus.stopwords.words(language))

        # If punctuations are not provided we ignore all punctuation symbols.
        self.punctuations: Set[str]
        if punctuations:
            self.punctuations = punctuations
        else:
            self.punctuations = set(string.punctuation)

        # All things which act as sentence breaks during keyword extraction.
        self.to_ignore: Set[str] = set(chain(self.stopwords, self.punctuations))

        # Assign min or max length to the attributes
        self.min_length: int = min_length
        self.max_length: int = max_length

        # Whether we should include repeated phreases in the computation or not.
        self.include_repeated_phrases: bool = include_repeated_phrases

        # Tokenizers.
        self.sentence_tokenizer: Callable[[str], List[str]]
        if sentence_tokenizer:
            self.sentence_tokenizer = sentence_tokenizer
        else:
            self.sentence_tokenizer = nltk.tokenize.sent_tokenize

        self.word_tokenizer: Callable[[str], List[str]]
        if word_tokenizer:
            self.word_tokenizer = word_tokenizer
        else:
            self.word_tokenizer = nltk.tokenize.wordpunct_tokenize

        # Stuff to be extracted from the provided text.
        self.frequency_dist: Dict[Word, int]
        self.degree: Dict[Word, int]
        self.rank_list: List[Tuple[float, Sentence]]
        self.ranked_phrases: List[Sentence]

    def extract_keywords_from_text(self, text: str):
        """Method to extract keywords from the text provided.

        :param text: Text to extract keywords from, provided as a string.
        """
        sentences: List[Sentence] = self._tokenize_text_to_sentences(text)
        self.extract_keywords_from_sentences(sentences)


    def extract_keywords_from_sentences(self, sentences: List[Sentence]):
        """Method to extract keywords from the list of sentences provided.

        :param sentences: Text to extraxt keywords from, provided as a list
                          of strings, where each string is a sentence.
        """
        phrase_list: List[Phrase] = self._generate_phrases(sentences)
        self._build_frequency_dist(phrase_list)
        self._build_word_co_occurance_graph(phrase_list)
        self._build_ranklist(phrase_list)


    def get_ranked_phrases(self) -> List[Sentence]:
        """Method to fetch ranked keyword strings.

        :return: List of strings where each string represents an extracted
                 keyword string.
        """
        return self.ranked_phrases


    def get_ranked_phrases_with_scores(self) -> List[Tuple[float, Sentence]]:
        """Method to fetch ranked keyword strings along with their scores.

        :return: List of tuples where each tuple is formed of an extracted
                 keyword string and its score. Ex: (5.68, 'Four Scoures')
        """
        return self.rank_list


    def get_word_frequency_distribution(self) -> Dict[Word, int]:
        """Method to fetch the word frequency distribution in the given text.

        :return: Dictionary (defaultdict) of the format `word -> frequency`.
        """
        return self.frequency_dist


    def get_word_degrees(self) -> Dict[Word, int]:
        """Method to fetch the degree of words in the given text. Degree can be
        defined as sum of co-occurances of the word with other words in the
        given text.

        :return: Dictionary (defaultdict) of the format `word -> degree`.
        """
        return self.degree


    def _tokenize_text_to_sentences(self, text: str) -> List[Sentence]:
        """Tokenizes the given text string into sentences using the configured
        sentence tokenizer. Configuration uses `nltk.tokenize.sent_tokenize`
        by default.

        :param text: String text to tokenize into sentences.
        :return: List of sentences as per the tokenizer used.
        """
        return self.sentence_tokenizer(text)

    def _tokenize_sentence_to_words(self, sentence: Sentence) -> List[Word]:
        """Tokenizes the given sentence string into words using the configured
        word tokenizer. Configuration uses `nltk.tokenize.wordpunct_tokenize`
        by default.

        :param sentence: String sentence to tokenize into words.
        :return: List of words as per the tokenizer used.
        """
        return self.word_tokenizer(sentence)

    def _build_frequency_dist(self, phrase_list: List[Phrase]) -> None:
        """Builds frequency distribution of the words in the given body of text.

        :param phrase_list: List of List of strings where each sublist is a
                            collection of words which form a contender phrase.
        """
        self.frequency_dist = Counter(chain.from_iterable(phrase_list))

    def _build_word_co_occurance_graph(self, phrase_list: List[Phrase]) -> None:
        """Builds the co-occurance graph of words in the given body of text to
        compute degree of each word.

        :param phrase_list: List of List of strings where each sublist is a
                            collection of words which form a contender phrase.
        """
        co_occurance_graph: DefaultDict[Word, DefaultDict[Word, int]] = defaultdict(lambda: defaultdict(lambda: 0))
        for phrase in phrase_list:
            # For each phrase in the phrase list, count co-occurances of the
            # word with other words in the phrase.
            #
            # Note: Keep the co-occurances graph as is, to help facilitate its
            # use in other creative ways if required later.
            for (word, coword) in product(phrase, phrase):
                co_occurance_graph[word][coword] += 1

        # Print the co-occurance_graph
        """
        for word, co_words in co_occurance_graph.items():
            print(f"{word}:")
            for co_word, count in co_words.items():
                print(f"  {co_word}: {count}")
            print("=" * 30)
        """
        self.degree = defaultdict(lambda: 0)
        for key in co_occurance_graph:
            self.degree[key] = sum(co_occurance_graph[key].values())


    def _build_ranklist(self, phrase_list: List[Phrase]):
        """Method to rank each contender phrase using the formula

              phrase_score = sum of scores of words in the phrase.
              word_score = d(w) or f(w) or d(w)/f(w) where d is degree
                           and f is frequency.

        :param phrase_list: List of List of strings where each sublist is a
                            collection of words which form a contender phrase.
        """

        self.rank_list = []
        for phrase in phrase_list:
            rank = 0.0
            for word in phrase:
                if self.metric == Metric.DEGREE_TO_FREQUENCY_RATIO:
                    rank += 1.0 * self.degree[word] / self.frequency_dist[word]
                elif self.metric == Metric.WORD_DEGREE:
                    rank += 1.0 * self.degree[word]
                else:
                    rank += 1.0 * self.frequency_dist[word]
            self.rank_list.append((rank, ' '.join(phrase)))
        self.rank_list.sort(reverse=True)
        self.ranked_phrases = [ph[1] for ph in self.rank_list]


    # ==========================================================================
    def _generate_phrases(self, sentences: List[Sentence]) -> List[Phrase]:
        """Method to generate contender phrases given the sentences of the text
        document.

        :param sentences: List of strings where each string represents a
                          sentence which forms the text.
        :return: Set of string tuples where each tuple is a collection
                 of words forming a contender phrase.
        """
        phrase_list: List[Phrase] = []
        # Create contender phrases from sentences.
        for sentence in sentences:
            word_list: List[Word] = [word.lower() for word in self._tokenize_sentence_to_words(sentence)]
            phrase_list.extend(self._get_phrase_list_from_words(word_list))

        # Based on user's choice to include or not include repeated phrases
        # we compute the phrase list and return it. If not including repeated
        # phrases, we only include the first occurance of the phrase and drop
        # the rest.
        if not self.include_repeated_phrases:
            unique_phrase_tracker: Set[Phrase] = set()
            non_repeated_phrase_list: List[Phrase] = []
            for phrase in phrase_list:
                if phrase not in unique_phrase_tracker:
                    unique_phrase_tracker.add(phrase)
                    non_repeated_phrase_list.append(phrase)
            return non_repeated_phrase_list

        return phrase_list

    def _get_phrase_list_from_words(self, word_list: List[Word]) -> List[Phrase]:
        """Method to create contender phrases from the list of words that form
        a sentence by dropping stopwords and punctuations and grouping the left
        words into phrases. Only phrases in the given length range (both limits
        inclusive) would be considered to build co-occurrence matrix. Ex:

        Sentence: Red apples, are good in flavour.
        List of words: ['red', 'apples', ",", 'are', 'good', 'in', 'flavour']
        List after dropping punctuations and stopwords.
        List of words: ['red', 'apples', *, *, good, *, 'flavour']
        List of phrases: [('red', 'apples'), ('good',), ('flavour',)]

        List of phrases with a correct length:
        For the range [1, 2]: [('red', 'apples'), ('good',), ('flavour',)]
        For the range [1, 1]: [('good',), ('flavour',)]
        For the range [2, 2]: [('red', 'apples')]

        :param word_list: List of words which form a sentence when joined in
                          the same order.
        :return: List of contender phrases honouring phrase length requirements
                 that are formed after dropping stopwords and punctuations.
        """
        groups = groupby(word_list, lambda x: x not in self.to_ignore)
        phrases: List[Phrase] = [tuple(group[1]) for group in groups if group[0]]
        return list(filter(lambda x: self.min_length <= len(x) <= self.max_length, phrases))

## TF-IDF Function

In [None]:
def extract_keywords_tfidf(abstracts, T):
    # Preprocess the abstracts: remove punctuation and stopwords
    stop_words = set(stopwords.words('english'))
    punctuation = set(string.punctuation)

    def preprocess(text):
        text = text.lower()  # Lowercase
        text = ''.join([ch for ch in text if ch not in punctuation])  # Remove punctuation
        tokens = text.split()
        tokens = [word for word in tokens if word not in stop_words]  # Remove stopwords
        return ' '.join(tokens)

    # Preprocess each abstract
    preprocessed_abstracts = [preprocess(abstract) for abstract in abstracts]

    # Initialize TF-IDF vectorizer
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(preprocessed_abstracts)

    # Get feature names (i.e., words)
    feature_names = vectorizer.get_feature_names_out()

    # Convert the TF-IDF matrix to a DataFrame
    tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=feature_names)

    # Extract top_n keywords for each abstract
    keywords_per_abstract = []
    for idx, row in tfidf_df.iterrows():
        # Get the top_n words with highest TF-IDF scores
        top_keywords = row.nlargest(T).index.tolist()
        keywords_per_abstract.append(top_keywords)

    return keywords_per_abstract

## Keyword & Keyphrase Methods

In [10]:
def sample_keywords(ranked_keywords, percentage):
    sample_size = int(len(ranked_keywords) * (percentage / 100.0))
    return ranked_keywords[:sample_size]

In [11]:
def save_keywords_to_files(lists_of_keywords, file_names):
    """
    Save the keywords from each list to a separate text file.

    :param lists_of_keywords: A list of lists, where each sublist contains keywords for an abstract
    :param file_names: A list of file names corresponding to each list
    """
    for keywords_list, file_name in zip(lists_of_keywords, file_names):
        with open(file_name, 'w') as file:
            for keywords in keywords_list:
                # Join the keywords for the abstract into a single string
                line = ' '.join(keywords)
                # Write the line to the file
                file.write(line + '\n')

In [12]:
T = 5

In [None]:
tfidf_keywords = extract_keywords_tfidf(abstracts, T)

In [None]:
len(tfidf_keywords)

2277

In [None]:
tfidf_keywords

[['convex', 'heuristic', 'megaprior', 'sequence', 'discovering'],
 ['cure', 'colposuspension', 'rate', 'risk', 'medical'],
 ['channel', 'calls', 'cell', 'call', 'service'],
 ['pomdps', 'mdps', 'observable', 'partially', 'finding'],
 ['variational', 'graphical', 'approximations', 'models', 'bounds'],
 ['realtime', 'goldszmidt', 'algorithms', 'variant', 'decision'],
 ['learning', 'framework', 'speedup', 'solving', 'macrooperator'],
 ['hir', 'hischbergs', 'mmb', 'myers', 'sm'],
 ['offline', 'mistakes', 'elements', 'learner', 'sequence'],
 ['ss', 'anova', 'klein', 'grkpack', 'gu'],
 ['solutions', 'problems', 'potential', 'learning', 'discovering'],
 ['laplacemetropolis', 'estimator', 'effects', 'marginal', 'random'],
 ['trend', 'utility', 'performance', 'paradigms', 'learned'],
 ['hmm', 'protein', 'sequences', 'efhand', 'globin'],
 ['weight', 'initial', 'demonstrate', 'extreme', 'experiments'],
 ['lobe', 'medial', 'alvarez', 'consolidation', 'neocortex'],
 ['eyes', 'stripes', 'ocular', 'do

In [None]:
topicrank = spacy.load("en_core_web_lg-3.7.1-py3-none-any/en_core_web_lg/en_core_web_lg-3.7.1")
positionrank = spacy.load("en_core_web_lg-3.7.1-py3-none-any/en_core_web_lg/en_core_web_lg-3.7.1")
textrank = spacy.load("en_core_web_lg-3.7.1-py3-none-any/en_core_web_lg/en_core_web_lg-3.7.1")

In [None]:
textrank.add_pipe("textrank")
positionrank.add_pipe("positionrank")
topicrank.add_pipe("topicrank")

<pytextrank.topicrank.TopicRankFactory at 0x78ee77db1e10>

In [14]:
"""
===== This cell will execute all baselines =====
"""

#oR = Rake(ranking_metric=Metric.WORD_FREQUENCY)
yake_custom_keyword_extractor = yake.KeywordExtractor(lan='en', n=3, dedupLim=0.9, dedupFunc='seqm', windowsSize=1, top=T, features=None)

#oRake_keywords = []
yake_keywords = []
textrank_keywords = []
positionrank_keywords = []
topicrank_keywords = []



for i in tqdm(range(len(abstracts)), desc="Processing abstracts"):


    # ===== Original Rake =====
    """
    oR.extract_keywords_from_text(abstracts[i])
    ranked_keywords = oR.get_ranked_phrases_with_scores()

    unique_ranked_keywords = list(set(ranked_keywords))
    unique_ranked_keywords = sorted(unique_ranked_keywords, key=lambda x: x[0], reverse=True)
    oRake_sampled_keywords = sample_keywords(unique_ranked_keywords, 100.0)[:T]
    oRake_keywords.append([keyphrase for _, keyphrase in oRake_sampled_keywords])
    """

    # ===== YAKE =====

    yake_res = yake_custom_keyword_extractor.extract_keywords(abstracts[i])
    unique_ranked_keywords = list(set(yake_res))
    yake_sampled_keywords = sorted(unique_ranked_keywords, key=lambda x: x[1], reverse=True)
    yake_keywords.append([keyphrase for keyphrase, _ in yake_sampled_keywords])

    # ===== TextRank =====

    """
    textrank_res = textrank(abstracts[i])
    if len(textrank_res._.phrases) >= T:
        textrank_keywords.append([textrank_res._.phrases[j].text for j in range(T)])
    else:
        textrank_keywords.append([keyphrase.text for keyphrase in textrank_res._.phrases])

    # ===== PositionRank =====

    positionrank_res = positionrank(abstracts[i])
    if len(positionrank_res._.phrases) >= T:
        positionrank_keywords.append([positionrank_res._.phrases[j].text for j in range(T)])
    else:
        positionrank_keywords.append([keyphrase.text for keyphrase in positionrank_res._.phrases])

    # ===== TopicRank =====

    topicrank_res = topicrank(abstracts[i])
    if len(topicrank_res._.phrases) >= T:
        topicrank_keywords.append([topicrank_res._.phrases[j].text for j in range(T)])
    else:
        topicrank_keywords.append([keyphrase.text for keyphrase in topicrank_res._.phrases])

    """

    #print(f'Abstract: {i}')


Processing abstracts: 100%|██████████| 88281/88281 [39:30<00:00, 37.23it/s]


In [None]:
#print(len(oRake_keywords))
print(len(yake_keywords))
print(len(textrank_keywords))
print(len(positionrank_keywords))
print(len(topicrank_keywords))


2277
2277
2277
2277


### Print Keywords (for evaluation)

In [None]:
oRake_keywords

In [15]:
yake_keywords

[['entropy',
  'unit cost',
  'channel capacity',
  'capacity per unit',
  'Feldmann and Kosloff'],
 ['On-line Viterbi',
  'Viterbi Algorithm',
  'Relationship to Random',
  'On-line Viterbi Algorithm',
  'Random Walks'],
 ['Sparsely-spread CDMA',
  'Code Division Multiple',
  'mechanics based analysis',
  'Division Multiple Access',
  'statistical mechanics based'],
 ['polynomial time reduction',
  'reduction of SAT',
  'Reducing',
  'SAT',
  'Reducing SAT'],
 ['generalized Littlewood-Richardson coefficient',
  'Complexity Theory',
  'Complexity Theory III',
  'Geometric Complexity',
  'Geometric Complexity Theory'],
 ['Morphic',
  'Periodicity',
  'Morphic Sequences',
  'Criteria for Morphic',
  'Periodicity Criteria'],
 ['Geometric Complexity',
  'positive integer programming',
  'representation theory',
  'Complexity Theory',
  'Geometric Complexity Theory'],
 ['World',
  'Evolving',
  'Information',
  'Evolving Information',
  'World as Evolving'],
 ['HCP',
  'Complexity of HCP',


In [None]:
textrank_keywords

[['based fitting parameters statistical model group related sequences',
  'part statistical model',
  'patterns groups protein sequences',
  'column final model',
  'prior proportion size sequence dataset'],
 ['colposuspension cure rate',
  'differences cure rate',
  'learning strategies',
  'useful role large scale medical problem',
  'risk factors'],
 ['channel available call',
  'free channel',
  'call requests service',
  'channel reuse constraint',
  'number blocked calls'],
 ['finitememory controller extracted solution',
  'pomdps line show cases',
  'techniques operations research bear problem',
  'exact solutions pomdps possibilities',
  'previous work complexity'],
 ['Graphical models',
  'graphical models',
  'applicability graphical models',
  'representational power probability models',
  'variational methods'],
 ['test domain realtime decision algorithms',
  'Incremental Probabilisitic Inference DAmbrosio variant algorithm',
  'broader applicability algorithms',
  'experim

In [None]:
positionrank_keywords

[['Several computer algorithms',
  'patterns groups protein sequences',
  'based fitting parameters statistical model group related sequences',
  'part statistical model',
  'Gibbs sampler algorithms'],
 ['learning strategies',
  'colposuspension cure rate',
  'differences cure rate',
  'risk factors',
  'useful role large scale medical problem'],
 ['cellular communication systems important problem',
  'cellular telephone systems',
  'large cellular system',
  'communication resource channels',
  'channel available call'],
 ['techniques operations research bear problem',
  'paper',
  'optimal actions',
  'pomdps line show cases',
  'mdps pomdps'],
 ['Graphical models',
  'graphical models',
  'applicability graphical models',
  'representational power probability models',
  'variational methods'],
 ['test domain realtime decision algorithms',
  'broader applicability algorithms',
  'Incremental Probabilisitic Inference DAmbrosio variant algorithm',
  'influence diagrams',
  'decisionev

In [None]:
topicrank_keywords

[['Several computer algorithms',
  'form heuristic',
  'statistical model produced situation convex combination',
  'megaprior',
  'convex combinations'],
 ['colposuspension cure rate',
  'risk factors',
  'machine',
  'algorithmsR FOIL',
  'set rules'],
 ['disjoint cells cell',
  'channel',
  'calls',
  'large cellular system',
  'service'],
 ['pomdps line show cases',
  'novel algorithm',
  'previous work complexity',
  'techniques operations research bear problem',
  'discussion approach'],
 ['variational methods',
  'Graphical models',
  'mere approximations',
  'information',
  'representations'],
 ['results',
  'algorithms',
  'decisionevaluation variant',
  'Goldszmidt',
  'implications'],
 ['formal framework',
  'paper',
  'sufficient conditions',
  'representation',
  'different representations'],
 ['previous paper',
  'objective functions',
  'Hischbergs',
  'linear space algorithm Hir',
  'result'],
 ['mistakes',
  'possible elements',
  'studied online model learner offline

### Create Keyword Text Files

In [None]:
yake_new_keywords = []
for keywords in yake_keywords:
  paper_keywords = []
  for i, keyword in enumerate(keywords):
    paper_keywords.append(''.join(char for char in keyword if char not in string.punctuation))

  yake_new_keywords.append(paper_keywords)

In [18]:
# Combine the lists and provide corresponding file names
#lists_of_keywords = [yake_keywords, tfidf_keywords, textrank_keywords, positionrank_keywords, topicrank_keywords]
#file_names = ['cora/YAKE.txt', 'cora/TFIDF.txt', 'cora/TextRank.txt', 'cora/PositionRank.txt', 'cora/TopicRank.txt']

lists_of_keywords = [yake_new_keywords]
file_names = ['YAKE.txt']

# Save the keywords to separate text files
save_keywords_to_files(lists_of_keywords, file_names)
