In [1]:
import numpy as np
from collections import defaultdict

wordToSupertag = dict()
with open('../../NLP_Resources/supertags.txt') as f:
    temp = [tuple(x.split()) for x in f]
    wordToSupertag = {word: tag for (word, tag) in temp}

In [2]:
wordToBase = dict()

with open('../../NLP_Resources/superbazy.txt') as f:
    temp = [tuple(x.split()) for x in f]
    wordToBase = {word: base for (word, base) in temp}

In [3]:
vowels = {'a', 'e', 'i', 'o', 'u', 'ę', 'ą', 'ó', 'y'}

def getRhyme(s):
    count = 0
    for i in range(len(s)-1, -1, -1):
        if s[i] == 'i' and i+1 < len(s) and s[i+1] in vowels:
            continue
        if s[i] in vowels:
            count += 1
        if count == 2:
            return s[i:]
    return s

def countSyllables(s):
    count = 0

    for i in range(len(s)):
        if s[i-1] == 'i' and s[i] in vowels:
            continue

        if s[i] in vowels:
            count += 1
    return count

In [4]:
import random

findRhymeDict = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) #sylaby->tag->rym->[lista]

candidatesDict = defaultdict(lambda: defaultdict(list))

with open('../../NLP_Resources/supertags.txt') as f:
    for line in f:
        word, tag = line.split()
        
        sylaby = countSyllables(word)

        rym = getRhyme(word)

        findRhymeDict[sylaby][tag][rym].append(word)

        candidatesDict[sylaby][tag].append(word)


In [5]:
sentences = []

with open('../../NLP_Resources/rytmiczne_zdania_z_korpusu.txt') as f:
    for line in f:
        startIndex = line.find(':') + 1
        line = line[startIndex:len(line)-2].strip()
        sentences.append(line)
        

In [6]:
import sklearn.preprocessing

base_vectors = {}
with open('../../NLP_Resources/poleval_base_vectors.txt') as f:
    for line in f:
        lineSplit = line.split()
        vec = np.array([[np.float64(x) for x in lineSplit[1:]]])
        sklearn.preprocessing.normalize(vec, copy=False)
        base_vectors[lineSplit[0]] = vec[0]


In [7]:
def findWord(word, rhyme = False):
    base = wordToBase[word]
    tag = wordToSupertag[word]
    sylables = countSyllables(word)
    
    if rhyme:
        rym = getRhyme(word)
        candidates = findRhymeDict[sylables][tag][rym]
    else:
        candidates = candidatesDict[sylables][tag]

    bestOption = chooseCandidate(candidates, base)
    if bestOption == base:
        return word
    #print("best: ", bestOption, "<- replacing: ", word)
    return bestOption


def chooseCandidate(candidates, wordReplaced):
    results = []
    for word in candidates:
        if word not in wordToBase:
            continue
        base = wordToBase[word]
        if base not in base_vectors:
            continue
        score = np.dot(base_vectors[base], base_vectors[wordReplaced])
        results.append((score, word))
    results = sorted(results, reverse=True)

    if len(results) < 2:
        return wordReplaced
    return results[1][1]


In [8]:
def replaceLastWord():
    success = False
    while not success:
        sentence = np.random.choice(sentences)

        wersy = [wers.split() for wers in sentence.split('[*]')]

        wordsToChange = [wersy[0][-1], wersy[1][-1]]

        newWords = [findWord(wordsToChange[0], True), findWord(wordsToChange[1], True)]

        if newWords[1] != wordsToChange[1]:
            wersy[1][-1] = newWords[1]
        if newWords[0] != wordsToChange[0]:
            wersy[0][-1] = newWords[0]
        if wersy[0][-1] == wordsToChange[0] and wersy[1][-1] == wordsToChange[1]:
            continue

        print(f"OLD: {sentence}")
        print(f"NEW: {' '.join(wersy[0])} [*] {' '.join(wersy[1])}")
        success = True
    

replaceLastWord()

OLD: mam konkretne przykłady związane z trudnymi [*] problemami prawnymi i politycznymi
NEW: mam konkretne przykłady związane z łatwymi [*] problemami prawnymi i opozycyjnymi


In [9]:
def replaceChosenWords():
    #sentence = np.random.choice(sentences)
    sentence = "seria dziecięcych skarpet antypoślizgowych [*] z motywami mieszkańców obszarów polarnych"

    wersy = [wers.split() for wers in sentence.split('[*]')]
    temp = 0
    for wers in wersy:
        for i in range(len(wers)):
            wers[i] = (wers[i], i + temp)
        temp += len(wers)
    print(wersy[0])
    print(wersy[1])

    wordsIndices = [int(index) for index in input("Type the numbers of words you would like to replace: ", ).split()]
    wordsToChange = []
    for index in wordsIndices:
        if index < len(wersy[0]):
            wordsToChange.append(wersy[0][index])
        else:
            wordsToChange.append(wersy[1][index-len(wersy[0])])

    #print(wordsToChange)
    for i in range(len(wordsToChange)):
        word, index = wordsToChange[i]
        if index == len(wers[0]) -1 or index == len(wers[0]) + len(wers[1]) -1:
            word = findWord(word, True)
        else:
            word = findWord(word)
        wordsToChange[i] = (word, index)

    for (word, index) in wordsToChange:
         if index < len(wersy[0]):
            wersy[0][index] = word
         else:
            wersy[1][index - len(wersy[0])] = word

    for wers in wersy:
        for i in range(len(wers)):
            x = wers[i]
            if isinstance(x, tuple):
                x = x[0]
            wers[i] = x

    print(f"OLD: {sentence}")
    print(f"NEW: {' '.join(wersy[0])} [*] {' '.join(wersy[1])}")

replaceChosenWords()

[('seria', 0), ('dziecięcych', 1), ('skarpet', 2), ('antypoślizgowych', 3)]
[('z', 4), ('motywami', 5), ('mieszkańców', 6), ('obszarów', 7), ('polarnych', 8)]
OLD: seria dziecięcych skarpet antypoślizgowych [*] z motywami mieszkańców obszarów polarnych
NEW: seria dziewczęcych skarpet antypoślizgowych [*] z sztafażami gorliczan terenów arktycznych


In [10]:
def replaceEverythingYouCan():
    sentence = np.random.choice(sentences)
    #sentence = "seria dziecięcych skarpet antypoślizgowych [*] z motywami mieszkańców obszarów polarnych"

    wersy = [wers.split() for wers in sentence.split('[*]')]
    
    for wers in wersy:
        for i in range(len(wers)):
            if not wers[i].isalnum():
                continue
            if i == len(wers) -1:
                wers[i] = findWord(wers[i], True)
            else:
                wers[i] = findWord(wers[i])

    print(f"OLD: {sentence}")
    print(f"NEW: {' '.join(wersy[0])} [*] {' '.join(wersy[1])}")

replaceEverythingYouCan()

OLD: strażnik będzie miał prawo posiadania broni [*] na podstawie decyzji komendanta straży
NEW: złodziej będzie miał światło zaświadczania broni [*] po przesłance aprobacji kwatermistrza straży
