<h4> Lanturile Markov avand n stari </h4>

<h1>Importuri</h1>

In [38]:
import numpy as np
import pandas as pd
import os
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

<h1>Citirea datelor</h1>

In [39]:
def read_all(path):
    txt = []
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if line == '----------':
                break
            if line != '':
                txt.append(line)
    return txt

path = "D:/FACULTATE/SEM4/AI/Lab8/data/proverbe.txt"
stories = read_all(path)
print("Numărul de linii =", len(stories))

Numărul de linii = 330


<h1>Cleaning DataSet</h1>

In [40]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower() 
        
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        
        tokens = word_tokenize(line)  # Tokenizam 
        words = [word for word in tokens if word.isalpha()]  # Alegem doar cuvintele
        cleaned_txt.extend(words)  
    return cleaned_txt

cleaned_stories = clean_txt(stories)
print("Numărul de cuvinte =", len(cleaned_stories))
print(cleaned_stories[:50])

Numărul de cuvinte = 2211
['a', 'aduce', 'apa', 'dupa', 'ce', 'stins', 'focul', 'a', 'aduna', 'nuiele', 'pentru', 'spinarea', 'sa', 'a', 'ajunge', 'cutitul', 'la', 'os', 'a', 'auzit', 'clopotul', 'dar', 'nu', 'stie', 'de', 'la', 'ce', 'biserica', 'a', 'avea', 'ac', 'de', 'cojocul', 'cuiva', 'a', 'avea', 'mai', 'mult', 'noroc', 'decat', 'minte', 'a', 'bate', 'apa', 'in', 'piua', 'a', 'bate', 'calul', 'care']


<h1>Crearea modelului Markov</h1>

In [41]:
def make_markov_model(cleaned_stories, n_gram):
    markov_model = {}
    for i in range(len(cleaned_stories)-n_gram):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " " if i + j + n_gram < len(cleaned_stories) else ""
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [49]:
n = 4
markov_model = make_markov_model(cleaned_stories, n)

In [50]:
print("number of states = ", len(markov_model.keys()))

number of states =  2164


In [51]:
print("All possible transitions from 'a face' state: \n")
print(markov_model['a face pe cineva'])

All possible transitions from 'a face' state: 

{'cu ou si cu': 0.5, 'de doua parale a': 0.5}


<h1>Generarea de proverbe</h1>

In [52]:
def generate_story(markov_model, limit, start):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story+=curr_state+" "
    while n<limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story+=curr_state+" "
        n+=1
    return story

<h1>Utilizare</h1>

In [53]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, 8 ,"a face pe cineva"))

0.  a face pe cineva de doua parale a face treaba in doi peri a face umbra pamantului degeaba a face un bine inseamna a isi bate cuie in talpa a fi cu ochii in patru a 
1.  a face pe cineva cu ou si cu otet a face pe cineva de doua parale a face treaba in doi peri a face umbra pamantului degeaba a face un bine inseamna a isi bate cuie 
2.  a face pe cineva cu ou si cu otet a face pe cineva de doua parale a face treaba in doi peri a face umbra pamantului degeaba a face un bine inseamna a isi bate cuie 
3.  a face pe cineva cu ou si cu otet a face pe cineva de doua parale a face treaba in doi peri a face umbra pamantului degeaba a face un bine inseamna a isi bate cuie 
4.  a face pe cineva de doua parale a face treaba in doi peri a face umbra pamantului degeaba a face un bine inseamna a isi bate cuie in talpa a fi cu ochii in patru a 
5.  a face pe cineva de doua parale a face treaba in doi peri a face umbra pamantului degeaba a face un bine inseamna a isi bate cuie in talpa a fi cu ochi