In [None]:
!pip install pronouncing

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pronouncing
import nltk
from nltk.corpus import brown

nltk.download('brown')

freqs = nltk.FreqDist([w.lower() for w in brown.words()])

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Package brown is already up-to-date!


In [None]:
wordlist = set(pronouncing.rhymes(""))

wordlist_sorted = sorted(wordlist, key=lambda x: freqs[x.lower()], reverse=True)
# print the sorted list
for w in wordlist_sorted:
    print(w)

In [None]:
pronouncing.phones_for_word("sentence")

['S EH1 N T AH0 N S']

In [None]:
import re

regex = re.compile('[^a-zA-Z ]')

In [None]:
from collections import OrderedDict
from termcolor import colored

def print_colored_rhymes(sentence, more=True, top=-1):
  words = regex.sub('', sentence).split()

  all_syllables = set()
  word_to_phone = dict()
  for word in words:
    word = word.lower()
    try:
      phones = pronouncing.phones_for_word(word)[0].split()
      word_to_phone[word] = phones
      for phone in phones:
        all_syllables.add(phone)
    except:
      pass

  rhymes = OrderedDict()
  for syllable in all_syllables:
    rhymes[syllable] = [k for k, v in word_to_phone.items() if syllable in v]
  rhymes = OrderedDict(sorted(rhymes.items(), key=lambda key_value_pair: len(key_value_pair[1])))
  rhymes = OrderedDict(reversed(list(rhymes.items())))

  colors = ['on_yellow', 'on_magenta', 'on_green', 'on_blue', 'on_grey']

  top_rhymes = []
  if more:
    number_of_rhymes = len(colors) * 2
  else:
    number_of_rhymes = len(colors)
  for i in range(number_of_rhymes):
    if len(rhymes) == 0: break
    top_rhymes.append(rhymes.popitem(last=False))
    if len(top_rhymes[-1][1]) == 1:
      top_rhymes.pop()
      break

  word_to_color = dict()
  for color_index, rhyme in enumerate(top_rhymes):
    if not top == -1 and top-1 != color_index:
      continue
    for word in rhyme[1]:
      word = word.lower()
      for syllable_index in [i for i, n in enumerate(word_to_phone[word]) if n == rhyme[0]]:
        syllable_percent = syllable_index / len(word_to_phone[word])
        syllable_end_percent = (syllable_index + 1) / len(word_to_phone[word])
        syllable_index = round(syllable_percent * len(word))
        syllable_end_index = round(syllable_end_percent * len(word))
        if syllable_index == len(word): syllable_index -= 1
        if syllable_end_index == 0: syllable_end_index = 1

        if color_index == 0:
          text_color = 'grey'
        elif color_index // len(colors) == 0:
          text_color = 'white'
        else:
          text_color = 'red'
        color = colors[color_index % len(colors)]

        if any(word == d for d in word_to_color):
          for i in range(syllable_index, syllable_end_index):
            word_to_color[word][i] = colored(word_to_color[word][i], text_color, color)
        else:
          formattedText = [i for i in word[:syllable_index]]
          formattedText += [colored(i, text_color, color) for i in word[syllable_index:syllable_end_index]]
          formattedText += [i for i in word[syllable_end_index:]]
          word_to_color[word] = formattedText

  count = 0
  if len(top_rhymes) < top:
    return

  for line in sentence.splitlines():
    for item in line.split():
      word = re.sub(r'[^a-zA-Z]', '', item).lower()
      if any(word == d for d in word_to_color):
        count += 1
        print("".join(word_to_color[word]), end=' ')
      else:
        print(item, end=' ')
    print()

  if top != -1:
    print(f'{list(top_rhymes)[top-1][0]} counted {count} times')

In [None]:
sentence = "Well if you want Shady, then this is what I’ll give ya \n A little bit of weed mixed with some hard liquor \n Some vodka that will jumpstart my heart quicker \n Then a shot when I get shocked at the hospital \n By the doctor when I’m not cooperating."
print_colored_rhymes(sentence)
print()
print_colored_rhymes(sentence, False)
for i in range(1, 6):
  print()
  print_colored_rhymes(sentence, top=i)

[44m[37mw[0mel[43m[31ml[0m [45m[37mi[0mf you [44m[37mw[0m[42m[37ma[0mn[43m[30mt[0m sh[44m[31ma[0m[44m[31md[0my [45m[31mt[0mhen [45m[31mt[0m[45m[37mh[0m[45m[37mi[0m[42m[31ms[0m [45m[37mi[0ms [44m[37mw[0mha[43m[30mt[0m [45m[37mi[0m[45m[37ml[0m[43m[31ml[0m g[45m[37mi[0m[45m[37mv[0me y[42m[37ma[0m 
A [43m[31ml[0m[45m[37mi[0m[43m[30mt[0m[43m[30mt[0ml[43m[31me[0m b[45m[37mi[0m[43m[30mt[0m of [44m[37mw[0mee[44m[31md[0m [40m[31mm[0m[45m[37mi[0m[40m[37mx[0m[42m[31me[0m[43m[30md[0m [44m[37mw[0m[45m[37mi[0m[45m[37mt[0m[45m[31mh[0m [42m[31ms[0mom[40m[31me[0m h[42m[37ma[0mr[44m[31md[0m [43m[31ml[0m[43m[31mi[0m[45m[37mq[0m[40m[37mu[0mor 
[42m[31ms[0mom[40m[31me[0m v[42m[37mo[0m[44m[31md[0m[40m[37mk[0ma [45m[31mt[0mha[43m[30mt[0m [44m[37mw[0m[45m[37mi[0m[45m[37ml[0m[43m[31ml[0m ju[40m[31mm[0mp[42m[31ms[0m[43m[30mt[0ma

In [None]:
sentence = 'This is a random sentence that I am not trying to rhyme'
print_colored_rhymes(sentence)
print()
print_colored_rhymes(sentence, False)
print()
print_colored_rhymes(sentence, top=1)

[44m[31mt[0m[42m[31mh[0m[42m[31mi[0m[40m[31ms[0m [42m[31mi[0ms [45m[31ma[0m [45m[37mr[0m[43m[31ma[0m[40m[37mn[0md[45m[31mo[0m[42m[37mm[0m [40m[31ms[0me[40m[37mn[0m[43m[30mt[0m[43m[30me[0m[45m[31mn[0m[40m[37mc[0m[40m[31me[0m [44m[31mt[0m[43m[31mh[0m[43m[31ma[0m[43m[30mt[0m [44m[37mi[0m [43m[31ma[0m[42m[37mm[0m [40m[37mn[0mo[43m[30mt[0m [43m[30mt[0m[45m[37mr[0m[44m[37my[0m[44m[37mi[0mng [43m[30mt[0mo [45m[37mr[0m[45m[37mh[0m[44m[37my[0m[42m[37mm[0m[42m[37me[0m 

This is a [45m[37mr[0ma[40m[37mn[0mdo[42m[37mm[0m se[40m[37mn[0m[43m[30mt[0m[43m[30me[0mn[40m[37mc[0me tha[43m[30mt[0m [44m[37mi[0m a[42m[37mm[0m [40m[37mn[0mo[43m[30mt[0m [43m[30mt[0m[45m[37mr[0m[44m[37my[0m[44m[37mi[0mng [43m[30mt[0mo [45m[37mr[0m[45m[37mh[0m[44m[37my[0m[42m[37mm[0m[42m[37me[0m 

This is a random sen[43m[30mt[0m[43m[30me[0mnce tha

In [None]:
sentence = input('input: ')
print_colored_rhymes(sentence)
print()
print_colored_rhymes(sentence, False)
for i in range(1, 6):
  print()
  print_colored_rhymes(sentence, top=i)

input: a b c d e f g h i j k l m n o p q r s t u v w x y z
a [42m[31m[43m[30mb[0m[0m [44m[37m[43m[30mc[0m[0m [43m[30md[0m [43m[30me[0m [45m[37mf[0m [43m[30mg[0m [43m[31mh[0m [45m[31mi[0m [43m[31mj[0m [43m[31m[42m[37mk[0m[0m [44m[31m[45m[37ml[0m[0m [45m[37mm[0m [45m[37mn[0m o [43m[30mp[0m [40m[31m[40m[37m[42m[37mq[0m[0m[0m r [44m[37m[45m[37ms[0m[0m [43m[30mt[0m [40m[31m[40m[37mu[0m[0m [43m[30mv[0m [44m[31m[42m[31m[40m[37mw[0m[0m[0m [44m[37m[42m[37m[45m[37mx[0m[0m[0m [45m[31my[0m [43m[30mz[0m 

a [43m[30mb[0m [44m[37m[43m[30mc[0m[0m [43m[30md[0m [43m[30me[0m [45m[37mf[0m [43m[30mg[0m h i j [42m[37mk[0m [45m[37ml[0m [45m[37mm[0m [45m[37mn[0m o [43m[30mp[0m [40m[37m[42m[37mq[0m[0m r [44m[37m[45m[37ms[0m[0m [43m[30mt[0m [40m[37mu[0m [43m[30mv[0m [40m[37mw[0m [44m[37m[42m[37m[45m[37mx[0m[0m[0m y [43m[30mz[0m 

a [43m