In [1]:
from datasets import load_dataset
import markovify


dataset = load_dataset("biglam/gutenberg-poetry-corpus")
poetry_text = '\n'.join(dataset['train']['line'])
model = markovify.Text(poetry_text)
poetry_stanza = model.make_sentence()

print(poetry_stanza)


South, where the Ships sail to the slab before you, surging to the farther shore Saw a maiden Fleet of foot was fleetest; Thy voice in voice Discern'd, when one tries to teach the people of that fell capsize, As half in joy and be humble; I was delayed three hours -- what drove to flight; And men laugh To encounter such chaff.


In [2]:
from textblob import TextBlob

fav_poetry = "All about you And your brother the prince, kidnapped by his inclined body, flying, who seeks the good for us."
sentiment = TextBlob(fav_poetry).sentiment
print("Sentiment analysis:")
print("Polaritate:", sentiment.polarity)
print("Subiectivitate:", sentiment.subjectivity)

Sentiment analysis:
Polaritate: 0.7
Subiectivitate: 0.6000000000000001


In [3]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\x\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [4]:
import spacy
import random
from nltk.corpus import wordnet
#import en_core_web_sm

#nlp = en_core_web_sm.load()

nlp = spacy.load(r'C:\Users\x\AppData\Local\Programs\Python\Python310\Lib\site-packages\en_core_web_sm\en_core_web_sm-3.7.1')



def get_synonyms(word):
    synonyms = []
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.append(lemma.name())
    return list(set(synonyms))

def generate_synonym(word):
    synonyms = get_synonyms(word)
    if synonyms:
        return random.choice(synonyms)
    else:
        return word

doc = nlp(poetry_stanza)
for token in doc:
    # fara cuvinte cheie (nu este un punctuație sau spațiu)
    if token.text.strip() and not token.is_punct and not token.is_space:
        synonym = generate_synonym(token.text)
        poetry_stanza = poetry_stanza.replace(token.text, synonym, 1) 


print("Poetry with synonyms replaced:")
print(poetry_stanza)


Poetry with synonyms replaced:
to_the_south, where the embArk voyage to the slab earlier you, surgiodinendianag to the farther land go_out a indiumitiative fade of animal_foot follow fleet; Thy sound in vox Discern'd, when unity try_on to instruct the multitude of that devolve turn_turtle, American_Samoa one-half in joy and equal humble; I follow check three hours -- what swarm to trajectory; And human_being gag To happen such chaff.


In [5]:
fav_poetry = "All about you And your brother the prince, kidnapped by his inclined body, flying, who seeks the good for us."

In [6]:
from nltk.translate.bleu_score import sentence_bleu
import random

class LantMarkov:

    def __init__(self, file):
        self.input_file = file
        self.lant_markov = {}
        
        self.cuvinte = []
        
        self._citeste_text()
        self._construieste_lant_markov()

    def _citeste_text(self):
        with open(self.input_file, 'r', encoding='utf-8') as fisier:
            text = fisier.read()
        text = text.lower() 
        self.cuvinte = text.split() 
    
    def _construieste_lant_markov(self):
        for i in range(len(self.cuvinte) - 1):
            cuvant = self.cuvinte[i]
            urmatorul_cuvant = self.cuvinte[i + 1]
            if cuvant in self.lant_markov:
                self.lant_markov[cuvant].append(urmatorul_cuvant)
            else:
                self.lant_markov[cuvant] = [urmatorul_cuvant]
    
    def genereaza_text(self,cuvant_initial,  lungime):
        text_generat = [cuvant_initial]
        for _ in range(lungime):
            urmatorul_cuvant = random.choice(self.lant_markov[text_generat[-1]])
            text_generat.append(urmatorul_cuvant)
        return ' '.join(text_generat)


lm = LantMarkov('D:\Facultate - Github\Facultate\AI\Tema8\data\proverbe.txt')
generated_poem = lm.genereaza_text("baba", 6)
print(generated_poem)
reference_poem = "baba nu aude dar le potriveste."


reference_tokens = nltk.word_tokenize(reference_poem.lower())
generated_tokens = nltk.word_tokenize(generated_poem.lower())

bleu_score = sentence_bleu([reference_tokens], generated_tokens)

# Afișarea scorului BLEU
print("BLEU Score:", bleu_score)


baba se face. cine se face primavara.
BLEU Score: 1.2508498911928379e-231


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [7]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\x\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True