Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions strings/max_k_most_frequent_words.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
require_relative '../data_structures/heaps/max_heap'

##
# This class represents a word count information
# (i.e. how many occurrences for a word).

class WordCount
include Comparable

attr_reader :word
attr_reader :occurrences

def <=>(other)
occurrences <=> other.occurrences
end

def initialize(word, occurrences)
@word = word
@occurrences = occurrences
end
end

##
# Returns the `k` most frequently occurring words, in non-increasing order of occurrence.
# In this context, a word is defined as an element in the provided list.
#
# In case `k` is greater than the number of distinct words, a value of `k` equal
# to the number of distinct words will be considered, instead.

def max_k_most_frequent_words(words, k)
count_by_word = words.tally
heap = MaxHeap.new(count_by_word.map { |w, c| WordCount.new(w, c) })
most_frequent_words = []
[k, count_by_word.size].min.times { most_frequent_words.append(heap.extract_max.word) }
most_frequent_words
end
28 changes: 28 additions & 0 deletions strings/max_k_most_frequent_words_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
require 'minitest/autorun'
require_relative 'max_k_most_frequent_words'

class TestMaxKMostFrequentWords < Minitest::Test
def test_top_3_frequent_words
assert max_k_most_frequent_words(['a', 'b', 'c', 'a', 'c', 'c'], 3) == ['c', 'a', 'b']
end

def test_top_2_frequent_words
assert max_k_most_frequent_words(['a', 'b', 'c', 'a', 'c', 'c'], 2) == ['c', 'a']
end

def test_top_frequent_word
assert max_k_most_frequent_words(['a', 'b', 'c', 'a', 'c', 'c'], 1) == ['c']
end

def test_no_frequent_word_given_zero_k
assert max_k_most_frequent_words(['a', 'b', 'c', 'a', 'c', 'c'], 0) == []
end

def test_no_frequent_word_given_empty_word_list
assert max_k_most_frequent_words([], 1) == []
end

def test_all_frequent_words_given_k_too_large
assert max_k_most_frequent_words(['a', 'a'], 2) == ['a']
end
end