## Lyrics Generation using Markov Models

In [31]:
# function to generate a table from a text
# a table contains all the string with k characters as keys and the value for a possible key is
# another dictionary
# this dictioary contains as keys all the possible characters that can follow the k-string
# and the values in this dictionary are the the number of times a key appears

def generate_table(data, k=4):
    
    table = {}
    
    for i in range (len(data)-k):
        
        X = data[i:i+k]
        Y = data[i+k]
        
        if table.get(X) is None:
            
            table[X] = {}
            table[X][Y] = 1
        
        else:
            if table[X].get(Y) is None:
                
                table[X][Y] = 1
            
            else:
                
                table[X][Y] = table[X][Y]+1
                
        
    return table 
                    
    

In [32]:
# function to create a table of probabilities from an original table
# the function will go over each key in table
# for each key will count the total number of appearances for it
# then will go for each character that follow a key a divide its number of apperances over
# the total sum
# this will give the probbaility for each character to follow a certain k-length string


def table_to_probs(table):
    
    for key in table.keys():
        
        suma = 0
        
        for value in table[key].values():
            
            suma = suma + value
        
        for key2 in table[key].keys():
            
            table[key][key2] = table[key][key2]/suma
            
    return table

In [33]:
# read the data

text_path = "manele.txt"
def load_text(filename):
    with open(filename,encoding='utf8') as f:
        return f.read().lower()
    
text = load_text(text_path)
text = text.replace("\n"," ")

print('Loaded the dataset.')

Loaded the dataset.


In [34]:
# function to create a markov chain
# this function will produce a table of probabilities from a text

def markov_chain(text):
    
    table = generate_table(text)
    
    t = table_to_probs(table)
    
    return t

In [35]:
# build a table of probabilities named model

model = markov_chain(text)

#print(model)

In [36]:
import numpy as np


# function to return the next character from a starting sentence
# it will use only the last k characters from the sentence and will 
# get all the possible characters and their probabilities that follow the sentence
# after that, the function will sample randomly and return a character based on their 
# probabilities



def sample_next (context, model, k):
    
    context = context[-k:]
    
    if model.get(context) is None:
        return " "
    
    possible_chars = list(model[context].keys())
    possible_values = list(model[context].values())
    
    #print(possible_chars)
    #print(possible_values)
    
    return np.random.choice(possible_chars, p = possible_values)

In [37]:
# function to generate text based on a start sentence
# it will generate the character that follows the starting sentence
# then it will append that predicted character to the original sentence
# and it will continue to generate characters in this manner

def generate_text(start, k = 4, max_len = 1000):
    
    res = start
    
    start = start[-k:]
    
    for i in range(max_len):
        
        char = sample_next(start, model, k)
        
        res = res+char
        
        start = res[-k:]
        
    return res
    

In [38]:
text = generate_text("langa", k=4, max_len=2000)

print(text)

langa cine, pentru tine ai dau seama cat e de ce-mi planga cine, lange de dor daca pleci  lang noapte auuuu uuuu  ce-ti face langa tine bine vis vis vis vis si orice ai fost ce simt eu nu te-as paradis dis dis dis iti faci sa tem ca je tàime cu tine, lang in urma te iubitu' ce-mi place iubesc cat doresc oriunde azi il voi uita chiar cand si oriunde azi il voi uita chiar daca nu-ti arat cat te-as paradis dis dar mult, mai pot doar pe tine nu te-as paris  auuuu uuu ce-mi iei iubire  niciodata zic : hai in vis vis vis si ti-am gresit  te iubire pentru cine, doare mineata  nici o sa pot dor de ai fac viata? de vis vis  ce-mi plang noaptea  as fac viata nu ma satur de orice aveam mai mult cu fie cand stapanesti de feri..  toate prima sa ma singurata paris  am stiut ca doar pe jumatatea sora imi va fi sigur niciodata in vis vis  am sa mor de orice astept in viata in viata in brate e cand te iubesc si ti-am cuvinte si orice ai face  parca ai fi si alt pont sa-ti ia locul, numai planga tine zi

In [39]:
# the word may seem sometimes to have no relationship between them, because we are only 
# storing the syntactic information