#### Notes

Regular expression to find every instance of a weight, usually with whatever unit of measurement comes after it. Saved an example below.

Current expressions are omitting the 1/2 and 1/4 weights, would need to account for decimals too.

Then, a loop to replace each one...

Current Issues:

1. The part in recipe where it says 'to make 1/2 to 3/4 cup', regex is only finding the second one
2. The worst problem is that regex is finding lone fractions e.g. 1/4 tsp salt, and working fine, but with "1 1/2 tbsp", it only captures "2 tbsp"

In [75]:
# https://www.recipetineats.com/greek-chicken-gyros-with-tzatziki/comment-page-34/#recipe
recipe = '''Ingredients
▢2 lb / 1 kg chicken thigh fillets , boneless skinless
Marinade
▢3 large garlic cloves , minced (~ 3 tsp)
▢1 tbsp white wine vinegar (or red wine or apple cider vinegar)
▢3 tbsp lemon juice
▢1 tbsp extra virgin olive oil
▢3 tbsp Greek yogurt , preferably full fat
▢1 1/2 tbsp dried oregano
▢1 tsp salt
▢Black pepper
Tzatziki
▢2 cucumbers (to make about 1/2 – 3/4 cup grated cucumber after squeezing out juice)
▢1 1/4 cups Greek yoghurt , preferably full fat
▢1 tbsp lemon juice
▢1 tbsp extra virgin olive oil (or more if you want richer)
▢1 garlic clove , minced
▢1/4 tsp salt
▢Black pepper
Salad
▢3 tomatoes , desseeded and diced
▢3 cucumbers , diced
▢1/2 red spanish onion , peeled and finely chopped
▢1/4 cup fresh parsley leaves (optional)
▢Salt and pepper
To Serve
▢4 to 6 pita breads or flat breads'''


recipe2 = '''1 (12-count) package) Hawaiian rolls

1 cup apple pie filling

2 (0.74 ounce) pouches apple cider powdered drink mix (such as Alpine Spiced Apple Cider®)

5 tablespoons butter, divided

1/2 cup sugar

1 teaspoon cinnamon'''

In [2]:
import re

#### Lbs

Clear this needs some reworking as recipes can start with just the fraction

In [78]:
lb_reg = re.compile(r'''((\d+|\d+/\d+) #Initial digit, captured for conversion
(\.\d+)?                               #Optional decimal
(?:\s)*?                               #Optional whitespace
(\d+/\d+)?                             #Optional fraction
(lb(?:s)?|pound(?:s)?))''',re.X|re.I)  #For one or more lbs, captured for conversion

In [80]:
lb_reg.findall(recipe2)

[]

#### tsp

In [70]:
tsp_reg = re.compile(r'''((\d+|\d+/\d+) #Initial digit, captured for conversion
(\.\d+)?                       #Optional decimal
(?:\s)*?                       #Optional whitespace
(\d+/\d+)?                     #Optional fraction
(tsp(?:s)?|teaspoon(?:s)?))''',re.X|re.I)             #For one or more lbs, captured for conversion

In [71]:
tsp_reg.findall(recipe)

[('3 tsp', '3', '', '', 'tsp', ''),
 ('1 tsp', '1', '', '', 'tsp', ''),
 ('4 tsp', '4', '', '', 'tsp', '')]

#### TBSP

In [72]:
tbsp_reg = re.compile(r'''((\d+|\d+/\d+) #Initial digit, captured for conversion
(\.\d+)?                       #Optional decimal
(?:\s)*?                       #Optional whitespace
(\d+/\d+)?                     #Optional fraction
(tbsp|tbsps|tablespoon(s)?))''',re.X|re.I)             #For one or more lbs, captured for conversion

In [73]:
tbsp_reg.findall(recipe)

[('1 tbsp', '1', '', '', 'tbsp', ''),
 ('3 tbsp', '3', '', '', 'tbsp', ''),
 ('1 tbsp', '1', '', '', 'tbsp', ''),
 ('3 tbsp', '3', '', '', 'tbsp', ''),
 ('2 tbsp', '2', '', '', 'tbsp', ''),
 ('1 tbsp', '1', '', '', 'tbsp', ''),
 ('1 tbsp', '1', '', '', 'tbsp', '')]

#### Cups

In [83]:
cup_reg = re.compile(r'''((\d+|\d+/\d+) #Initial digit, captured for conversion
(\.\d+)?                       #Optional decimal
(?:\s)*?                       #Optional whitespace
(\d+/\d+)?                     #Optional fraction
(cup|cups|c))''',re.X|re.I)             #For one or more lbs, captured for conversion

In [82]:
cup_reg.findall(recipe2)

[('1 cup', '1', '', '', 'cup'), ('1/2 cup', '1/2', '', '', 'cup')]

#### Ounces

In [84]:
cup_reg = re.compile(
r'''((\d+|\d+/\d+)             #Initial digit or fraction, captured for conversion
(\.\d+)?                       #Optional decimal
(?:\s)*?                       #Optional whitespace
(\d+/\d+)?                     #Optional fraction
(ounce(?:s)?|oz(?:s)?))''',re.X|re.I) #For one or more lbs, captured for conversion

#### Master

In [274]:
unit_reg = re.compile(
r'''((\d+|\d+/\d+)             #Initial digit or fraction, captured for conversion
((\.\d+)?|(?:\s)*?                       #Optional decimal
(\d+/\d+)?                     #Optional fraction
(\w+))''',re.X|re.I) #For one or more lbs, captured for conversion

In [296]:
unit_reg = re.compile(r'''
    (
        (\d+\.\d+           # decimal, e.g. 2.5
        | \d+\s+(\d+/\d+)       # mixed fraction, e.g. 2 1/2
        | \d+/\d+             # simple fraction, e.g. 2/3
        | \d+ )               # whole number, e.g. 2
        \s*                   # optional whitespace
        (\w+)                   # unit (any word)
    )
''', re.X | re.I)

#### Testing the Regex for digit and fraction

In [272]:
msg = '2 3/4 cups of something'
test_reg = re.compile(r'(\d+|\d+/\d+)(?:\s*)?(\d+/\d+)?') #This works...

In [278]:
res = test_reg.search(msg)
res.group()

'2 3/4 cups'

In [287]:
msg_d = '1 1/4 cups Greek yoghurt'
res_d = test_reg.search(msg)
res_d.group()

'2 3/4 cups'

In [291]:
unit_reg.findall(recipe2)

[('12', '1', '', '', '2'),
 ('1 cup', '1', '', '', 'cup'),
 ('0.74 ounce', '0.74', '', '', 'ounce'),
 ('5 tablespoons', '5', '', '', 'tablespoons'),
 ('1/2 cup', '1/2', '', '', 'cup'),
 ('1 teaspoon', '1', '', '', 'teaspoon')]

In [297]:
unit_reg.findall(recipe)

[('2 lb', '2', '', 'lb'),
 ('1 kg', '1', '', 'kg'),
 ('3 large', '3', '', 'large'),
 ('3 tsp', '3', '', 'tsp'),
 ('1 tbsp', '1', '', 'tbsp'),
 ('3 tbsp', '3', '', 'tbsp'),
 ('1 tbsp', '1', '', 'tbsp'),
 ('3 tbsp', '3', '', 'tbsp'),
 ('1 1/2 tbsp', '1 1/2', '1/2', 'tbsp'),
 ('1 tsp', '1', '', 'tsp'),
 ('2 cucumbers', '2', '', 'cucumbers'),
 ('3/4 cup', '3/4', '', 'cup'),
 ('1 1/4 cups', '1 1/4', '1/4', 'cups'),
 ('1 tbsp', '1', '', 'tbsp'),
 ('1 tbsp', '1', '', 'tbsp'),
 ('1 garlic', '1', '', 'garlic'),
 ('1/4 tsp', '1/4', '', 'tsp'),
 ('3 tomatoes', '3', '', 'tomatoes'),
 ('3 cucumbers', '3', '', 'cucumbers'),
 ('1/2 red', '1/2', '', 'red'),
 ('1/4 cup', '1/4', '', 'cup'),
 ('4 to', '4', '', 'to'),
 ('6 pita', '6', '', 'pita')]

In [121]:
test_w = None
if '/' in unit_reg.findall(recipe2)[4][1]:
    test_w = int(unit_reg.findall(recipe2)[4][1].split('/')[0]) / int(unit_reg.findall(recipe2)[4][1].split('/')[1])

test_w

0.5

In [153]:
unit_reg.findall(recipe2)[1][4]

'cup'

In [111]:
unit_reg.findall(recipe)

[('2 lb', '2', '', '', 'lb'),
 ('1 kg', '1', '', '', 'kg'),
 ('3 large', '3', '', '', 'large'),
 ('3 tsp', '3', '', '', 'tsp'),
 ('1 tbsp', '1', '', '', 'tbsp'),
 ('3 tbsp', '3', '', '', 'tbsp'),
 ('1 tbsp', '1', '', '', 'tbsp'),
 ('3 tbsp', '3', '', '', 'tbsp'),
 ('1 1', '1', '', '', '1'),
 ('2 tbsp', '2', '', '', 'tbsp'),
 ('1 tsp', '1', '', '', 'tsp'),
 ('2 cucumbers', '2', '', '', 'cucumbers'),
 ('3/4 cup', '3/4', '', '', 'cup'),
 ('1 1', '1', '', '', '1'),
 ('4 cups', '4', '', '', 'cups'),
 ('1 tbsp', '1', '', '', 'tbsp'),
 ('1 tbsp', '1', '', '', 'tbsp'),
 ('1 garlic', '1', '', '', 'garlic'),
 ('1/4 tsp', '1/4', '', '', 'tsp'),
 ('3 tomatoes', '3', '', '', 'tomatoes'),
 ('3 cucumbers', '3', '', '', 'cucumbers'),
 ('1/2 red', '1/2', '', '', 'red'),
 ('1/4 cup', '1/4', '', '', 'cup'),
 ('4 to', '4', '', '', 'to'),
 ('6 pita', '6', '', '', 'pita')]

## Conversions

In [124]:
#Original, only changed the last one so far
def cup_to_ml(cups):
    ml = cups * 237
    return round(ml, 2)

def oz_to_g(oz):
    g = oz * 28.3495
    return round(g, 2)

def tbsp_to_ml(tbsp):
    ml = tbsp * 14.7868
    return round(ml, 2)

def tsp_to_ml(tsp):
    ml = tsp * 4.92892
    return round(ml, 2)

def lbs_to_metric(lbs):
    if lbs > 2.2:
        metric = str(round(lbs * 0.453592, 2)) + ' kg'
    else:
        metric = str(round(lbs * 453.592, 2)) + ' g'
    return metric

In [164]:
def cup_to_ml(cups):
    ml = str(round(cups * 237,2)) + ' ml'
    return ml

def oz_to_g(oz):
    g = str(round(oz * 28.3495, 2)) + ' g'
    return g

def tbsp_to_ml(tbsp):
    ml = str(round(tbsp * 14.7868,2)) + ' ml'
    return ml

def tsp_to_ml(tsp):
    ml = str(round(tsp * 4.92892, 2)) + ' ml'
    return ml

def lbs_to_metric(lbs):
    if lbs > 2.2:
        metric = str(round(lbs * 0.453592, 2)) + ' kg'
    else:
        metric = str(round(lbs * 453.592, 2)) + ' g'
    return metric

In [125]:
lbs_to_metric(250)

'113.4 kg'

In [102]:
cup_to_ml(25)

5925

In [103]:
oz_to_g(12)

340.19

In [104]:
tsp_to_ml(3)

14.79

So, I think the best way I could do it is to run each sub in sequence and the conversion. This makes replacing the units easier. e.g. Run the sub for cups and the conversion for cups and finish that, then the program repeats for the next weight unit.

OR

Put them all together

Structure:

Full string[0] | First Unit[1] | Decimal or Fraction if not in first[2] | whitespace[3] | unit[4]

In [None]:
def cup_to_ml(cups):
    ml = str(round(cups * 237,2)) + ' ml'
    return ml

def oz_to_g(oz):
    g = str(round(oz * 28.3495, 2)) + ' g'
    return g

def tbsp_to_ml(tbsp):
    ml = str(round(tbsp * 14.7868,2)) + ' ml'
    return ml

def tsp_to_ml(tsp):
    ml = str(round(tsp * 4.92892, 2)) + ' ml'
    return ml

def lbs_to_metric(lbs):
    if lbs > 2.2:
        metric = str(round(lbs * 0.453592, 2)) + ' kg'
    else:
        metric = str(round(lbs * 453.592, 2)) + ' g'
    return metric

In [293]:
#Generally working as is, some troubleshooting to go over!

import pprint

def convert_weight_units(text):
    unit_reg = re.compile(
    r'''((\d+|\d+/\d+)             #Initial digit or fraction, captured for conversion
    (\.\d+)?                       #Optional decimal
    (?:\s)*?                       #Optional whitespace
    (\d+/\d+)?                     #Optional fraction
    (\w+))''',re.X|re.I) #For one or more lbs, captured for conversion

    weights = unit_reg.findall(text)

    #Next, loop and convert...

    for w in weights:                                #Determining the first number
        if '.' in w[1]:
            m = float(w[1])                  #If a decimal is found, m = that as a float
        elif '/' in w[1]:
            m = int(w[1].split('/')[0]) / int(w[1].split('/')[1])    #If a slash is present, split either side and calculate the value
        else:
            m = int(w[1])                    #Otherwise, it's a whole number

        if '.' in w[2]:                     # If decimals are found in position [2]
            d = float(w[2])                 # Make it a float
        elif '/' in w[2]:
            d = int(w[2].split('/')[0]) / int(w[2].split('/')[1]) # Same as above for fractions
        else:
            d = 0                                  # If neither are present, we'll assume it's empty and d==0

        no_to_convert = m + d                           #The actual amount that needs to be converted e.g. 1.5
#Having some trouble here on where everything should go, but this has created the number to convert within the loop, so I can work with it
    #If I remain within the loop

        unit = w[4]
        pattern = str(w[0])
       
        if unit in ('lbs','pounds','lb','pound'):
            text= re.sub(pattern, lbs_to_metric(no_to_convert), text)                #pounds to g or kg
        elif unit in ('tsp','tsps','teaspoon','teaspoons'):
            text= re.sub(pattern, tsp_to_ml(no_to_convert), text)                      #teaspoons to ml
        elif unit in ('tbsp','tbsps','tablespoon','tablespoons'):
            text= re.sub(pattern, tbsp_to_ml(no_to_convert), text)                      #Tablespoons to ml
        elif unit in ('cup','cups','c'):
            text= re.sub(pattern, cup_to_ml(no_to_convert), text)                       #Cups to ml
        elif unit in ('oz','ozs','ounce','ounces'):
            text= re.sub(pattern, oz_to_g(no_to_convert), text)                        #Ounces to grams

    pprint.pprint(text)

In [294]:
#The issue is with converting the fractions
#After seeing the issue might be with the regex, have changed the order it looks for the initial digit, looking for the fraction first, then an integer
#This must be the issue as the lone 1/4tsp of salt gets converted fine

import pprint
def convert_weight_units(text):

    weights = unit_reg.findall(text)

    #Next, loop and convert...

    for w in weights:                                #Determining the first number
        if '.' in w[1]:
            m = float(w[1])                  #If a decimal is found, m = that as a float
        elif '/' in w[1]:
            m = int(w[1].split('/')[0]) / int(w[1].split('/')[1])    #If a slash is present, split either side and calculate the value
        else:
            m = int(w[1])                    #Otherwise, it's a whole number

        if '.' in w[2]:                     # If decimals are found in position [2]
            d = float(w[2])                 # Make it a float
        elif '/' in w[2]:
            d = int(w[2].split('/')[0]) / int(w[2].split('/')[1]) # Same as above for fractions
        else:
            d = 0                                  # If neither are present, we'll assume it's empty and d==0

        no_to_convert = m + d                           #The actual amount that needs to be converted e.g. 1.5
#Having some trouble here on where everything should go, but this has created the number to convert within the loop, so I can work with it
    #If I remain within the loop

        unit = w[4]                     # unit will equal the measurement unit e.g. lbs
        pattern = str(w[0])             # The pattern to replace is equal to the entire string captured in the [0] group
       
        if unit in ('lbs','pounds','lb','pound'):
            text= re.sub(pattern, lbs_to_metric(no_to_convert), text)                #pounds to g or kg
        elif unit in ('tsp','tsps','teaspoon','teaspoons'):
            text= re.sub(pattern, tsp_to_ml(no_to_convert), text)                      #teaspoons to ml
        elif unit in ('tbsp','tbsps','tablespoon','tablespoons'):
            text= re.sub(pattern, tbsp_to_ml(no_to_convert), text)                      #Tablespoons to ml
        elif unit in ('cup','cups','c'):
            text= re.sub(pattern, cup_to_ml(no_to_convert), text)                       #Cups to ml
        elif unit in ('oz','ozs','ounce','ounces'):
            text= re.sub(pattern, oz_to_g(no_to_convert), text)                        #Ounces to grams

    pprint.pprint(text)

In [226]:
test_w = None
oregano = unit_reg.findall(recipe)[6][2]
if '/' in oregano:
    test_w = int(oregano.split('/')[0]) / int(oregano.split('/')[1])

test_w

In [213]:
oregano

''

In [261]:
unit_reg.findall(recipe)[9] # I think this is it... which means it's the regex causing the issue?

('2 tbsp', '2', '', '', 'tbsp')

In [257]:
unit_reg.findall(recipe)[6][1]

'1'

In [295]:
convert_weight_units(recipe)

ValueError: invalid literal for int() with base 10: '1 1'

In [181]:
convert_weight_units(recipe2)

1 (237 ml-count) package) Hawaiian rolls

237 ml apple pie filling

2 (237 ml) pouches apple cider powdered drink mix (such as Alpine Spiced Apple Cider®)

237 ml butter, divided

237 ml sugar

237 ml cinnamon


In [151]:
unit_reg.sub(lbs_to_metric(2), recipe2)  

'1 (907.18 g-count) package) Hawaiian rolls\n\n907.18 g apple pie filling\n\n2 (907.18 g) pouches apple cider powdered drink mix (such as Alpine Spiced Apple Cider®)\n\n907.18 g butter, divided\n\n907.18 g sugar\n\n907.18 g cinnamon'

In [159]:
if cup_reg.findall(recipe2)[0][4] in ('cup','cups','c'):
    print(unit_reg.sub('hullabalooh', recipe2))