In [1]:
import re
from pprint import pprint
import numpy as np

worddict = {}

content = open(r"RevisedDictionary.txt").read()
content2 = re.findall("(.*?)\n", content)
for j in content2:
    a = j.split("  ")
    worddict[a[0]] = a[1].split(" ")

wordlist = list(worddict.keys())
phonelist = list(worddict.values())
phonedict = {}

for i, phonemes in enumerate(phonelist):
    for phonm in phonemes:
        if phonm in phonedict:
            phonedict[phonm].append(wordlist[i])
        else:
            phonedict[phonm] = [wordlist[i]]

In [2]:
def searchfn(phoneme):
    return phonedict[phoneme]

def get_phoneme(word):
    word = word.upper()
    if word in worddict:
        return worddict[word]
    else:
        raise Exception("Word does not exist.")

In [3]:
def contains(small, big):
    for i in range(len(big)-len(small)+1):
        for j in range(len(small)):
            if big[i+j] != small[j]:
                break
        else:
            return i, i+len(small)
    return False

def searchlistfn(phone_set):
    wds = None
    for phonm in phone_set:
        if wds:
            wds = wds.intersection(set(phonedict[phonm]))
        else:
            wds = set(phonedict[phonm])

    phones = [worddict[i] for i in wds]
    return list(zip(list(wds), phones))

def orderedlistfn(phone_set):
    wds = []
    
    for i, phonm in enumerate(phonelist):
        idx = contains(phone_set, phonm)
        if idx:
            wds.append([wordlist[i], phonm, idx])
            
    return wds

def rank_similar(word, phone_set):
    word = word.upper()
    sim_dict = {}
    tot_len = len(phone_set)
    set_len = len(phone_set)
    rank = 1
    copy_set = phone_set[:]
    rem_set = None
    
    while set_len > 1:
        if set_len != tot_len:
            rem_set = phone_set[set_len:][0]

        wds = orderedlistfn(copy_set)
        for ord_wd in wds:
            if ord_wd[0] not in sim_dict:
                sim_dict[ord_wd[0]] = rank

                if rem_set in ord_wd[1][ord_wd[2][1]:]:
                    sim_dict[ord_wd[0]] = rank - 0.5
                
        copy_set = copy_set[:-1]
        rank += 2
        set_len -= 1
    
    set_len = len(phone_set) - 1
    rank = 2
    copy_set = phone_set[1:]
    
    while set_len > 1:
        rem_len = tot_len - set_len
        rem_set = phone_set[:rem_len][-1]

        wds = orderedlistfn(copy_set)
        for ord_wd in wds:
            if ord_wd[0] not in sim_dict:                
                sim_dict[ord_wd[0]] = rank
                
                if rem_set in ord_wd[1][:ord_wd[2][0]]:
                    sim_dict[ord_wd[0]] = rank - 0.5
                
        copy_set = copy_set[1:]
        rank += 2
        set_len -= 1
    
    
    for wd in sim_dict.keys():
        if word in wd:
            sim_dict[wd] = -1
    
    simwds = sim_dict.keys()
    simvals = sim_dict.values()
    
    simwds = sorted(simwds, key = lambda x: sim_dict[x])
    simvals = sorted(simvals)
    
    return list(zip(simwds, simvals)), sorted(list(set(simvals)))

def get_pun_wds(word):
    res, vals = rank_similar(word, get_phoneme(word))
    
    if vals[0] == -1:
        vals = vals[1:]
    
    while len(vals):
        finalRes = [i[0] for i in res if i[1] == vals[0]]
        print("Type " + str(vals[0]) + ":")
        print(*clean_res(finalRes))
        vals = vals[1:]

def get_pun_wds_p(word, phoneme):
    res, vals = rank_similar(word, phoneme)
    
    if vals[0] == -1:
        vals = vals[1:]
    
    while len(vals):
        finalRes = [i[0] for i in res if i[1] == vals[0]]
        print("Type " + str(vals[0]) + ":")
        print(*clean_res(finalRes))
        vals = vals[1:]
    
def clean_res(res):
    for i in res:
        if (i + "S") in res:
            res.remove(i + "S")
        if (i + "IES") in res:
            res.remove(i + "IES")
        if (i + "LY") in res:
            res.remove(i + "LY")
        if (i + "NESS") in res:
            res.remove(i + "NESS")
        if (i + "ING") in res:
            res.remove(i + "ING")
        if i[-1] == "Y":
            if (i[:-1] + "IES") in res:
                res.remove(i[:-1] + "IES")
        if (i + "Y") in res:
            res.remove(i + "Y")
        if (i + "ED") in res:
            res.remove(i + "ED")
        if i[-1] == "E":
            if (i + "D") in res:
                res.remove(i + "D")
            if (i + "R") in res:
                res.remove(i + "R")
            if (i + "RS") in res:
                res.remove(i + "RS")
            if (i[:-1] + "ING") in res:
                res.remove(i[:-1] + "ING")
        if i[-1] == "H":
            if (i + "ES") in res:
                res.remove(i + "ES")
    
    if len(res) > 30:
        res = np.random.choice(res, 30)
    return res

In [7]:
get_phoneme("orange")

['AO1', 'R', 'AH0', 'N', 'JH']

In [12]:
get_phoneme("uh")

['AH1']

In [15]:
get_pun_wds("armadillo")

Type 8:
CAUDILLO
Type 9:
HARMATTAN HARMONIZATION PHARMACEUTICAL PHARMACOLOGICAL PHARMACOLOGIST PHARMACOLOGY UNDERGARMENT
Type 10:
BILLOW KILOBYTE PILLOW TRILLO
Type 11:
FIREARM GENDARME PHILHARMONIC
Type 11.5:
ACYCLOVIR DISLOCATE GIGOLO
Type 12:
LOGICIAN MARTELLO MARSHMALLOW SOLOS VELODROME MALLOW YELLOWEST FLOTATION NOLO MEATLOAF FELLOWSHIP FLOTILLA LOGICIAN IDEOLOGUE FLOTATION FILO MEATLOAF CELLO APOLLO FLOTILLA SILO YELLOWISH MEGALOMANIAC APOLLO MEGALOMANIAC CYTOMEGALOVIRUS FOLLOWUP SWALLOW FOLLOWER DELO
Type 12.5:
ARBORETUM ARGUMENTATION ARGUMENTATIVE CARCINOMA CARDIOPULMONARY COMPARTMENTAL COMPARTMENTALIZE DEPARTMENTAL DEPARTMENTALIZE PARLIAMENTARIAN PARLIAMENTARY PARSIMONIOUS
Type 13:
FLUORSPAR SKYLARK LARYNGEAL EMBARCADERO ARCHANGEL LIFEGUARD IMPARTIALITY OLIGARCH SIDEBAR RAMPARTS CARCINOGENIC ARCHEOLOGY OUTSMART PARTICULARLY RAILCAR BIRTHMARK RHUBARB GUARANI ARCHEOLOGICAL JAGUAR UPMARKET RADAR MARGINALIZATION CARBOHYDRATE MAGYAR LEOTARD FLOWCHART ARBITRATION MARKETEER ARTIODACT

In [17]:
pnm = get_phoneme("han")

In [18]:
pnm

['HH', 'AA1', 'N']

In [21]:
pnm = ['SH', 'ER0', 'IY1', 'HH', 'AA1', 'N']

In [23]:
get_pun_wds_p("SHRIHAN", pnm)

Type 6:
HAN HON HONDAS
Type 7:
SHARIF
Type 7.5:
HARMONIC HARMONICA HEDONIC HEGEMONIC HISTRIONIC HORIZONTAL HWAN HYALURONIC HYDROPONIC HYPERSONIC HYPERTONIC HYPOCHONDRIA HYPOCHONDRIAC PHILHARMONIC
Type 8:
CONSCIENCE INFANTE ONBOARD ENTREE PNEUMONIC NONALIGNED INCONTINENT ECONOMY OVONIC CONTROVERSY DEBUTANTE MONISM INCONSPICUOUS SYNCHRONIC GENDARME SONIC MONOPLANE NONPROFITS RESPONSE ANA CONSTITUTE LLANO THEREUPON FROND FONDLING CONTEXT PONCHO ULTRASONIC SWAN CONQUESTS
Type 9:
FLESHER HARSHER DEPRESSURIZE DEPRESSURIZE SLASHER HABERDASHERY PUSHER FLESHER BRASHER DASHER THRESHER CENSURE FISSURE FISHERMAN FRESHER PRESSURE GAUCHER PUNISHER THRESHER PHOTOFINISHER HARSHER PUBLISHER EXTINGUISHER UNPRESSURIZED FINISHER PHOTOFINISHER CHESHIRE UNSTRUCTURED HABERDASHERY WASHER


In [309]:
phonedict.keys()

dict_keys(['EY2', 'EY1', 'AA1', 'R', 'D', 'V', 'AA2', 'K', 'S', 'G', 'T', 'IY2', 'AE1', 'B', 'AH0', 'AE2', 'L', 'OW1', 'N', 'IY0', 'Z', 'IH0', 'NG', 'M', 'SH', 'ZH', 'Y', 'IY1', 'AE0', 'AH1', 'AH2', 'ER0', 'EH1', 'EH2', 'TH', 'HH', 'AO1', 'AY1', 'IH1', 'JH', 'EH0', 'UW1', 'AO0', 'OW0', 'IH2', 'AO2', 'F', 'AW1', 'P', 'UW2', 'CH', 'ER1', 'AA0', 'DH', 'UW0', 'EY0', 'AY2', 'OW2', 'UH1', 'W', 'OY2', 'OY1', 'ER2', 'UH2', 'AY0', 'AW2', 'UH0', 'OY0', 'AW0'])

In [102]:
get_phoneme("fire")

['F', 'AY1', 'ER0']

In [299]:
i = "RESERVE"
res = "RESERVES"
if (i + "S") in res:
    res.remove(i + "S")
if (i + "LY") in res:
    res.remove(i + "LY")
if (i + "NESS") in res:
    res.remove(i + "NESS")
if i[-1] == "Y":
    if (i[:-1] + "IES") in res:
        res.remove(i[:-1] + "IES")
if (i + "Y") in res:
    res.remove(i + "Y")
if (i + "ED") in res:
    res.remove(i + "ED")
if i[-1] == "E":
    if (i + "D") in res:
        res.remove(i + "D")
    if (i + "R") in res:
        res.remove(i + "R")
    if (i + "RS") in res:
        res.remove(i + "RS")
    if (i + "ING") in res:
        res.remove(i + "ING")

AttributeError: 'str' object has no attribute 'remove'