## Markov Chain predictions

### Imports

In [3]:
import pandas as pd
import re
import markovify
import nltk
import spacy

In [2]:
# !python -m spacy download en

In [29]:
df = pd.read_csv('../data/recipes.csv').drop(columns=['Unnamed: 0'])
print(df.shape)

(761, 17)


---
### Cleaning for markovify

In [30]:
# text cleaning function credit: https://towardsdatascience.com/creating-word-embeddings-coding-the-word2vec-algorithm-in-python-using-deep-learning-b337d0ba17a8
def clean_text(
    string: str, 
    punctuations=r'''!()-[]{};:'"\,<>/?@#$%^&*_~''',
    stop_words=['the', 'a', 'and', 'is', 'be', 'will']) -> str:
    """
    A method to clean text 
    """
    # Cleaning the urls
    string = re.sub(r'https?://\S+|www\.\S+', '', string)

    # Cleaning the html elements
    string = re.sub(r'<.*?>', '', string)

    # Removing the punctuations
    for x in string.lower(): 
        if x in punctuations: 
            string = string.replace(x, "") 

    # Converting the text to lower
    string = string.lower()

    # not this time
    # Removing stop words
    # string = ' '.join([word for word in string.split() if word not in stop_words])

    # Cleaning the whitespaces
    string = re.sub(r'\s+', ' ', string).strip()
    
    # Remove linebreak
    string = string.replace('\r\n', '')

    return string  

In [31]:
df.head(1)

Unnamed: 0,vegetarian,vegan,glutenFree,dairyFree,veryHealthy,cheap,sustainable,lowFodmap,title,readyInMinutes,summary,cuisines,dishTypes,occasions,instructions,simplifiedIngredients,simplifiedInstructions
0,1,0,0,0,0,0,0,0,Gingerbread Mummies,45,Gingerbread Mummies might be just the dessert ...,[],['dessert'],['christmas'],"In a bowl of an electric mixer, beat the butte...","['unsalted butter', 'wheat flour', 'sugar', 'm...","['In a bowl of an electric mixer, beat the but..."


In [32]:
df['instructions'] = [clean_text(str(row)) for row in df['instructions']]

In [33]:
df['instructions']

0      in a bowl of an electric mixer beat the butter...
1      preheat oven to 375 degrees f. in a large bowl...
2      preheat oven to 350 degrees f. sift flour onto...
3      preheat your oven to 400 degrees f. add the sl...
4      beat butter in large bowl in an electric mixer...
                             ...                        
756    prepare the graham crust preheat the oven to 3...
757    preheat oven to 375 degrees. in a large skille...
758    preheat oven into 180c. gently pound the chick...
759    rinse the chickpeas and soak for 8 hours or ov...
760    oven 325f place six ramekins in a water bath. ...
Name: instructions, Length: 761, dtype: object

---
### Parse

Originally going to use [spaCy](https://spacy.io/usage/models), but did it manually above in cleaning instead

In [34]:
# accuracy
# nlp = spacy.load("en_core_web_sm")

# efficiency
nlp = spacy.load('en_core_web_sm')

In [51]:
recipe_soup = ''
for instr in df['instructions']:
    recipe_soup += instr
# verify: oh it worked
# recipe_soup

---
### Markovify model

In [52]:
# 'fit' the markovify model
mark = markovify.Text(recipe_soup, state_size=2)

In [53]:
print(mark.make_sentence())

3. in a heavy 10 ovenproof skillet. melt butter and sugars. slowly add 1 teaspoon salt and pepper.dip the chicken into the prepared cookie sheets spacing them a light coat of glaze on the baking dish microwaved so it does i am visiting her right now. remove the chicken and set aside.for the bchamel sauce add 14 c.of batter to cling to the tomato sauce and assemble slidersadd drained meatballs to tomato sauce parmesan cheese ketchup brown sugar flour almond flour cornstarch baking powder sugar and salt cook through. season and serve.crepeswhisk the eggs.add the wet ingredients into a large stockpot and add bitters and grenadine.use one of the saucepan. cook over medium heat. add the garlic in a separate bowl set aside. empty enchilada sauce mushroom soup and mix well and pour in your spinach.let the spinach and artichoke hearts.pour in large pot heat olive oil. pour bruschetta mixture in and combine wheat and corn flour salt and whirl for 5 minutes.stir in two cups flour cover with foil