In [4]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.lm import MLE
from nltk.util import everygrams

In [5]:
#pre processing

def preprocess_data(text):
    #remove html tags
    text = re.sub('<.*?>','', text)
    
    # convert text to lower
    text = text.lower()
    
    # remove all punctuation
    text = re.sub(f'[^\w\s]','', text)
    
    # remove stop words
    stop_words = set(stopwords.words())
    tokens = word_tokenize(text)
    text = [word for word in tokens if word not in stop_words]

  text = re.sub(f'[^\w\s]','', text)


In [6]:
# nltk installs

nltk.download('punkt')

[nltk_data] Downloading package punkt to C:\Users\Abhishek
[nltk_data]     Chintapalli\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [7]:
# language model function

def language_model(text_data):
    tokens = [word_tokenize(sentence) for sentence in text_data]
    tokens = [word for sublist in tokens for word in sublist]
    bigrams = list(everygrams(tokens, max_len=2))
    
    if bigrams:
        language_model = MLE(2)
        
        vocabulary = nltk.lm.Vocabulary(tokens)
        language_model.fit([bigrams], vocabulary)
        return language_model
    else:
        print("No bigrams found. Please input valid text data.")
        return None
    
text_data = [
    "The quick brown fox jumps over the lazy dog.",
    "She sells sea shells on the sea shore.",
    "The cat in the hat plays with the dog in the fog.",
    "To be or not to be, that is the question.",
    "All's well that ends well.",
    "A stitch in time saves nine.",
    "The early bird catches the worm."
]

language_model = language_model(text_data)

In [8]:
#generate mad libs

def generate_mad_libs(template, language_model):
    placeholders = re.findall(r'{(.*?)}', template)
    mad_libs = template
    
    for i in placeholders:
        if i == "noun":
            replacement = input("Enter a noun: ")
        elif i == "verb":
            replacement = input("Enter a verb: ")
        elif i == "adjective":
            replacement = input("Enter a adjecive: ")
        elif i == "adverb":
            replacement = input("Enter a adverb: ")
        else:
            replacement = input("Enter a random word: ")
        
        mad_libs = mad_libs.replace(f"{{{i}}}", replacement, 1)
        
    return mad_libs

template = "The {adjective} {noun} {verb} {adverb} through the {adjective} forest."
spiderman = "Spider-Man's Uncle once said 'With great {noun} comes great {noun}.'"
finished_mad_libs = generate_mad_libs(spiderman, language_model)
print(finished_mad_libs)

The bat bat bat bat through the bat forest.
