# Used Lbarires

In [57]:
import random
import re
import pandas as pd

import warnings

# Ignore FutureWarning
warnings.filterwarnings("ignore", category=FutureWarning)

# Loading the dataset

In [58]:
df_new  = pd.read_csv("/content/names.csv")

In [59]:
df_new.head(5)

Unnamed: 0,Proprietary Name
0,amyvid
1,tauvid
2,trulicity
3,emgality
4,taltz


# Preprocessing

In [60]:
# # Convert all names in 'Proprietary Name' column to lowercase
# df_new['Proprietary Name'] = df_new['Proprietary Name'].str.lower()

# Extracting the list of medicine names from the DataFrame
medicine_names = df_new['Proprietary Name'].tolist()

#Builder functions

In [61]:


# building chain from a list of names
def  create_markov_chain(names_list):
    chain = {
        '_start': {}  # the start state
    }

    for name in names_list:
        word = name.strip().lower()  # convert to lowercase and remove white spaces

        word = word + '.'  # adding the end character

        # adding first character to first characters count
        if word[0] not in chain['_start']:
            chain['_start'][word[0]] = 1
        else:
            # incrementing frequency
            chain['_start'][word[0]] += 1



        for ix in range(len(word) - 1):
            if word[ix] in chain:
                # current letter IS in chain

                if word[ix+1] not in chain[word[ix]]:
                    # first time seeing the next character following this character.
                    chain[word[ix]][word[ix+1]] = 1
                else:
                    # incrementing frequency
                    chain[word[ix]][word[ix+1]] += 1

            else:
                # current letter IS NOT in chain
                chain[word[ix]] = {word[ix+1]: 1}
   #print("chain is",chain)
    return chain



In [62]:
# true if character is a vowel
def isVowel(char: str):
    return char in 'aeiouy'

# true if character is a consonant
def isConson(char: str):
    return char.isalpha() and not isVowel(char)


In [63]:

# picks a character given dict of frequencies
def select_character(probs: dict):
    total_next_chars = sum(probs.values())
   #print("total_start_sum is" ,total_next_chars )
    rand = random.random() * total_next_chars
    #print("random" , rand)
    for char, freq in probs.items():
        rand -= freq
        #print("rand = rand -freq is " , rand)
        if rand < 0:
            return char

    raise ValueError("Failed to pick a character")


In [64]:

# picks next state given previous state and current state
def next_char(prev: str, current: str, chain: dict):
    #print("current_chain" , chain[current])
    if current == '.':
        # base case of recursion
        return ''
    else:
        picked_char = select_character(chain[current])
        #print("picked char is" , picked_char)
        prevTwoVowel = isVowel(prev) and isVowel(current)
        prevTwoConson = isConson(prev) and isConson(current)

        # making sure there are no more than two consecutive consonants/vowels
        while (prevTwoVowel and isVowel(picked_char)) or (prevTwoConson and isConson(picked_char)):
            picked_char = select_character(chain[current])

        # recursively building the string
        return current + next_char(current, picked_char, chain)



In [65]:
def make_word(chain: dict):
    first_char = select_character(chain['_start'])
    #print("first_char Is" ,first_char )
    return next_char('', first_char, chain)



In [66]:
# checks if word contains a zangy letter
def is_zangy(word: str):
    return bool(re.match(r'.*[qxzjr]', word))



In [67]:
# generates num names between min_length and max_length.

def generate_from_names(num: int, min_length: int, max_length: int, names_list: list):
    chain =  create_markov_chain(names_list)
    print(chain)

    print("***** genarates medicine Names******* ")
    for i in range(num):
        word = make_word(chain)

        while (len(word) < min_length or len(word) > max_length) or not is_zangy(word):
            word = make_word(chain)

            # print("discarded word " , word)

        print("accepted word after satisfying all condition :=>" , word)





In [68]:

# Using the modified function to generate names from the list of medicine names
generate_from_names(5, 6, 9, medicine_names)

{'_start': {'a': 310, 't': 223, 'e': 173, 'r': 134, 's': 199, 'c': 322, 'z': 102, 'o': 118, 'v': 133, 'b': 134, 'l': 166, 'h': 60, 'g': 92, 'p': 246, 'f': 135, 'n': 161, 'y': 10, 'd': 199, 'k': 58, 'i': 105, 'x': 46, 'j': 27, 'm': 188, 'u': 31, 'q': 25, 'w': 6}, 'a': {'m': 253, 'u': 12, 'l': 294, 't': 208, '.': 390, 'x': 69, 'n': 353, 'q': 14, 'r': 267, 's': 127, 'g': 44, 'z': 205, 'b': 83, 'c': 218, 'v': 77, 'a': 4, 'i': 42, 'p': 118, 'y': 27, 'f': 58, 'd': 103, 'h': 5, 'k': 26, 'j': 1, 'e': 2}, 'm': {'y': 61, 'g': 2, 'o': 87, 'b': 22, 'i': 207, 'a': 117, 'z': 6, 't': 11, 'j': 3, 'l': 10, 'u': 27, 'p': 39, '.': 151, 'r': 7, 'e': 191, 'c': 10, 'n': 8, 'm': 6, 'v': 7, 'f': 8, 'h': 1, 's': 7, 'd': 4}, 'y': {'v': 12, '.': 47, 'm': 33, 'a': 24, 'p': 11, 'r': 37, 'u': 8, 'c': 90, 'd': 35, 'e': 5, 't': 50, 's': 50, 'z': 10, 'l': 65, 'g': 8, 'n': 47, 'k': 4, 'x': 12, 'f': 4, 'q': 1, 'b': 13, 'h': 1, 'o': 18, 'i': 3, 'w': 1}, 'v': {'i': 163, 'm': 1, 'o': 66, 'e': 105, '.': 20, 'a': 165, 'y': 2