In [1]:
# lets start by getting pandas, regex, and numpy
import pandas as pd
import re
import numpy as np
import requests
from bs4 import BeautifulSoup

In [2]:
recipes = pd.read_csv('recipes_3.csv')
ingredients_df = pd.read_csv('ingredient_df.csv')
pantry = pd.read_csv('pantry_2.csv')
# lets go to the download folder and import the ingredients_3.csv file
conversion = pd.read_csv('conversion.csv')


In [3]:

def calculate_cost_df(df):
    """
    Calculate the total cost of purchasing ingredients for a dish.

    Args:
    df (pd.DataFrame): DataFrame containing dish ingredients and prices.

    Returns:
    pd.DataFrame: DataFrame with an additional column for the total cost of purchasing ingredients for each dish.
    """
    # Calculate the cost for purchasing each ingredient
    # Assuming 'price' is the price for 'purchase_size' amount of the ingredient
    for item in df['ingredients'].unique():
        price = df[df['ingredients'] == item]['price'].values[0]
        amount = df[df['ingredients'] == item]['amounts'].values[0]
        purchase_size = df[df['ingredients'] == item]['purchase_size'].values[0]

        if amount > purchase_size:
            upper = np.ceil(amount / purchase_size)
            purchase_cost = upper * price
        else:
            purchase_cost = price

        df.loc[df['ingredients'] == item, 'price'] = purchase_cost
            
    # Calculate the total cost for each dish
    total_cost = df.groupby('urls')['price'].sum().reset_index()

    # Merge total cost back into the original dataframe
    df = df.merge(total_cost, on='urls', how='left', suffixes=('', '_total'))

    return df

def ingredient_divider(url):
    local_ingredient_amount = len(pantry[pantry.urls == url])
    local_matches = len(pantry[(pantry.urls == url) & (pantry.in_recipe == True)])
    if local_ingredient_amount == 0 or local_matches == 0:
        return 0
    else:
        return local_matches / local_ingredient_amount

def matching_ingredients(url):
    sample_ingredients = pantry[pantry.urls == url].ingredients.tolist()
    pantry['in_recipe'] = False
    pantry.loc[pantry.ingredients.isin(sample_ingredients), 'in_recipe'] = True
    recipes['matches'] = recipes.urls.apply(ingredient_divider)

def matching_ingredient_df(df):
    sample_ingredients = df.ingredients.tolist()
    pantry['in_recipe'] = False
    pantry.loc[pantry.ingredients.isin(sample_ingredients), 'in_recipe'] = True
    recipes['matches'] = recipes.urls.apply(ingredient_divider)

def check_fridge_availability(ingredient, amount_needed):
    """
    Check if the ingredient is available in the fridge and if there is enough of it.

    Args:
    ingredient (str): The name of the ingredient.
    amount_needed (float): The amount of the ingredient needed.

    Returns:
    float: The amount that still needs to be purchased.
    """
    if ingredient in fridge['ingredients'].values:
        available_amount = fridge[fridge['ingredients'] == ingredient]['amounts'].values[0]
        if available_amount >= amount_needed and amount_needed > 0:
            # Enough available, no need to purchase more
            return 0
        else:
            # Some amount still needs to be purchased
            return amount_needed - available_amount
    else:
        # Ingredient not in fridge, need to purchase the entire amount
        return amount_needed

def calculate_cost_and_remaining(row):
    amount_to_purchase = check_fridge_availability(row['ingredients'], row['amounts'])
    if row['purchase_size'] == 0 or pd.isna(row['purchase_size']):
        # Handle the case where purchase_size is zero or NaN
        # You can return a default value or handle it as per your requirement
        return pd.Series([0, 0], index=['ingredient_cost', 'remaining'])
    
    if amount_to_purchase > 0:
        if amount_to_purchase > row['purchase_size']:
            num_units = np.ceil(amount_to_purchase / row['purchase_size'])
            cost = num_units * row['local_price']
            remaining = (num_units * row['purchase_size']) - amount_to_purchase
        else:
            cost = row['local_price']
            remaining = row['purchase_size'] - amount_to_purchase
    else:
        cost = 0
        remaining = 0  # Or adjust based on what's left in the fridge
    
    return pd.Series([cost, remaining], index=['ingredient_cost', 'remaining'])

# Existing function
def calculate_cost_of_urls(urls):
    # Assuming combined_df is your DataFrame containing the grocery list
    combined_df = pantry[pantry.urls.isin(urls)].copy()
    # Group by ingredients to sum the amounts needed for each
    grouped_df = combined_df.groupby('ingredients').agg({'amounts': 'sum', 'purchase_size': 'first', 'local_price': 'first'}).reset_index()

    # Apply the function to calculate the cost and remaining amount for each ingredient
    grouped_df[['ingredient_cost', 'remaining']] = grouped_df.apply(calculate_cost_and_remaining, axis=1)

    # Summing up the total cost
    total_cost = grouped_df['ingredient_cost'].sum()

    # print(f"Total Cost: {total_cost}")
    grouped_df['cumsum'] = grouped_df['ingredient_cost'].cumsum()
    return grouped_df


In [9]:
pantry

Unnamed: 0,urls,amounts,units,ingredients,purchase_unit,spice,price,purchase_size,local_price,pareto,in_recipe,matches
0,https://www.valdemarsro.dk/indisk-vegetar-bowl/,1.0,spsk,mayonnaise,g,0.0,40.0,350.0,40.0,True,True,0.173913
1,https://www.valdemarsro.dk/indisk-vegetar-bowl/,1.0,stk,æg,stk,0.0,36.0,10.0,36.0,True,True,0.173913
2,https://www.valdemarsro.dk/indisk-vegetar-bowl/,0.5,tsk,røget paprika,stk,1.0,0.0,1.0,0.0,True,False,0.173913
3,https://www.valdemarsro.dk/indisk-vegetar-bowl/,250.0,g,blomkål,stk,0.0,30.0,1.0,30.0,True,False,0.173913
4,https://www.valdemarsro.dk/indisk-vegetar-bowl/,50.0,g,pankorasp,,,,,0.0,False,False,0.173913
...,...,...,...,...,...,...,...,...,...,...,...,...
27052,Kikærte og avocado salat med persille og tomat,50,g,bredbladet persille,,0.0,0.0,0.0,0.0,0,False,0.000000
27053,Kikærte og avocado salat med persille og tomat,1/2,stk,rød chili,,0.0,0.0,0.0,0.0,0,False,0.000000
27054,Kikærte og avocado salat med persille og tomat,2,fed,hvidløg,,0.0,0.0,0.0,0.0,0,False,0.000000
27055,Kikærte og avocado salat med persille og tomat,1/2,stk,citron,,0.0,0.0,0.0,0.0,0,False,0.000000


In [4]:
priority = recipes[(recipes.vegeterian == True) & (recipes.dinner == True)].urls.to_list()

In [5]:
recipes

Unnamed: 0,urls,text,fullfillment,dinner,addon,vegeterian,price_add,matches
0,https://www.valdemarsro.dk/indisk-vegetar-bowl/,Indisk vegetarbowl med rispilaf og sprød blomk...,0.0,1,0,1,7714.0,0.434783
1,https://www.valdemarsro.dk/pasta-med-graeskar/,Pasta med græskar og masser af god smag - opsk...,0.0,1,0,1,6415.0,0.363636
2,https://www.valdemarsro.dk/blomkaal-og-halloum...,Blomkål og halloumi i sursød sauce med dejlig ...,0.0,1,0,1,6278.0,0.250000
3,https://www.valdemarsro.dk/graeskarmuffins/,Græskarmuffins - opskrift på saftige muffins m...,0.0,0,0,0,5100.0,0.200000
4,https://www.valdemarsro.dk/vegetar-taco/,Vegetar taco med krydret bagt blomkål og mango...,0.0,1,0,1,4967.0,0.250000
...,...,...,...,...,...,...,...,...
3255,Kikærte og mango curry med basmati ris,Navn: Kikærte og mango curry med basmati ris\n...,0.0,0,0,0,0.0,0.000000
3256,Kikærte- og kyllingefad med avocado og cherryt...,Navn: Kikærte- og kyllingefad med avocado og c...,0.0,0,0,0,0.0,0.000000
3257,Persille-ris med kikærter og yoghurt-dressing,Navn: Persille-ris med kikærter og yoghurt-dre...,0.0,0,0,0,0.0,0.000000
3258,Avocado og kikærte tacos,Navn: Avocado og kikærte tacos\n\nIngredienser...,0.0,0,0,0,0.0,0.000000


In [6]:
# now lets pick a random vegeterian recipe and see how many matches it has
url = recipes[recipes.vegeterian == True].sample().urls.values[0]
# now lets make a column in the recipe dataframe that shows the matching ingredients
# we will do this by giving a function a url, and then it will iterate through the pantry, matching the ingredients against the original recipe
# then it will count how many ingredients match, and return that number

# now lets pick a random vegeterian recipe and see how many matches it has
url = recipes[recipes.vegeterian == True].sample().urls.values[0]

url = 'https://www.valdemarsro.dk/fransk-bondesalat/'
matching_ingredients(url)

In [7]:
recipes[recipes.dinner == True].sort_values(by='matches', ascending=False).head(10)

Unnamed: 0,urls,text,fullfillment,dinner,addon,vegeterian,price_add,matches
543,https://www.valdemarsro.dk/fransk-bondesalat/,Fransk bondesalat - opskrift på en virkelig fa...,0.0,1,0,1,0.0,1.0
660,https://www.valdemarsro.dk/salade-nicoise/,Salade Nicoise - Opskrift på den klassiske sal...,0.0,1,0,0,40.0,0.727273
529,https://www.valdemarsro.dk/salade-lyonnaise/,Salade Lyonnaise Opskrift - en skøn frokost sa...,0.0,1,0,0,70.0,0.692308
963,https://www.valdemarsro.dk/blt-sandwich/,BLT sandwich – opskrift på sandwich med bacon ...,0.0,1,0,0,59.0,0.625
799,https://www.valdemarsro.dk/salade-chevre-chaud/,Salade Chevre Chaud - Opskrift på lækker salat...,0.0,1,0,1,50.0,0.6
1186,https://www.valdemarsro.dk/aeggekage-opskrift/,Æggekage med kartofler og purløg - opskrift\nO...,,1,0,0,39.0,0.571429
413,https://www.valdemarsro.dk/bagt-torsk-med-kape...,Bagt torsk med kapersvinaigrette og grønt\nOps...,0.0,1,0,0,157.0,0.545455
1273,https://www.valdemarsro.dk/svinekrone/,Svinekrone med honning og timian – opskrift ti...,0.0,1,0,0,0.0,0.5
878,https://www.valdemarsro.dk/pariserbof/,Pariserbøf - Opskrift på den bedste klassiske ...,0.0,1,0,0,97.0,0.5
816,https://www.valdemarsro.dk/roedbedeboeffer-ala...,Rødbedebøffer - Opskrift på lækre vegetar bøff...,0.0,1,0,1,68.0,0.461538


In [None]:
pantry

In [8]:
price_cap = 400
test_var = True
local_urls = ['https://www.valdemarsro.dk/fransk-bondesalat/']

fridge = pd.DataFrame(columns=['ingredients', 'amounts'])
fridge.loc[0] = ['olivenolie',  990]

test_df = calculate_cost_of_urls(local_urls)

test_df

TypeError: '>' not supported between instances of 'str' and 'int'

In [37]:
ingredients_df[ingredients_df.ingredients == 'smør']

Unnamed: 0,ingredients,pareto,purchase_unit,type,price,purchase_size,count
0,smør,True,g,0.0,26.0,200.0,577


In [42]:
def set_individual_price(row):
    amount = row['amounts']
    purchase_size = row['purchase_size']
    price = row['price']
    input_units = row['units']
    purchase_unit = row['purchase_unit']
    if len(conversion[(conversion.units == input_units) & (conversion.purchase_unit == purchase_unit)] ) > 0:
        faktor = conversion[(conversion.units == input_units) & (conversion.purchase_unit == purchase_unit)].rate.values[0]
    else:
        return 0

    if purchase_size == 0:
        return 0
    else:
        if input_units == purchase_unit:
            y = np.ceil(amount / purchase_size) * price if amount > purchase_size else price
        else:
            if input_units == 'g' and purchase_unit == 'stk':
                y = price
            else:
                # we need to make it so that it looks in the conversion table and then returns the 
                y = np.ceil(amount * faktor / purchase_size) * price  if amount > purchase_size else price
    
    # lets also round the product of faktor by purchase_size and amount
    z = np.ceil(amount / purchase_size) * faktor if amount > purchase_size else faktor * amount
    
    return y

pantry['local_price'] = pantry.apply(set_individual_price, axis=1)

In [43]:
local_urls = recipes[(recipes.dinner == True) & (recipes.vegeterian == True)].sort_values('matches', ascending=False).head(10).sample(1).urls.tolist()

In [44]:
local_urls

['https://www.valdemarsro.dk/aspargessuppe/']

In [54]:
fridge = pd.DataFrame(columns=['ingredients', 'amounts'])
fridge.loc[0] = ['olivenolie',  990]


local_urls = find_recipes(local_urls=local_urls, price_cap=500, dishes=3)

In [57]:
gpt_test = calculate_cost_of_urls(local_urls)
# now lets make sure that each ingredient has their purchase_unit
gpt_test = gpt_test.merge(ingredients_df[['ingredients', 'purchase_unit']], on='ingredients', how='left')

gpt_test[['ingredients', 'remaining', 'purchase_unit']].to_clipboard(index=False)

In [58]:
local_urls

['https://www.valdemarsro.dk/melanzane-alla-parmigiana/',
 'https://www.valdemarsro.dk/wraps-med-ristede-kikaerter-og-blomkaal-med-sumak/',
 'https://www.valdemarsro.dk/pasta-aglio-e-olio/',
 'https://www.valdemarsro.dk/citronpasta/']

Unnamed: 0,ingredients,amounts
0,olivenolie,985.75
1,agurk,0.0
2,avocado,33.5
3,bredbladet persille,178.5
4,burgerboller,2.0
5,cayenne peber,0.75
6,frisk basilikum,0.25
7,hvidløg,3.5
8,kikærter,399.0
9,løg,3.5


In [46]:
fridge = pd.DataFrame(columns=['ingredients', 'amounts'])
fridge.loc[0] = ['olivenolie',  990]

def find_recipes(local_urls, price_cap = 450, dishes = 2):
    test_var = True

    tries = 0

    test_df = calculate_cost_of_urls(local_urls)
    current_price = test_df['cumsum'].max()

    while test_var:
        next_url = recipes[(recipes.vegeterian == True) &(recipes.dinner == True) & (~recipes.urls.isin(local_urls))].sort_values(['matches', 'price_add'], ascending=False).head(40).sample(1).urls.values[0]

        local_urls.append(next_url)

        potential_df = calculate_cost_of_urls(local_urls)

        if potential_df['cumsum'].max() < price_cap:
            if len(local_urls) > dishes - 1:
                test_var = False
                # print(local_urls)
        else:
            local_urls.remove(next_url)
            test_var = True

        if tries > 1000:
            print('i tried a thousand times')
            test_var = False

        tries += 1
    return local_urls

def set_fridge(potential_df):
    for ingredient in potential_df.ingredients:
        if isinstance(ingredient, str):
        # if the value is in the fridge we need to subtract the amounts from the fridge
            if ingredient in fridge.ingredients.values:
                amount = potential_df[potential_df.ingredients == ingredient].amounts.values[0]
                if fridge[fridge.ingredients == ingredient].amounts.values[0] == 0:
                    pass
                else:
                    if fridge[fridge.ingredients == ingredient].amounts.values[0] - amount < 0:
                        pass
                    else:
                        fridge.loc[fridge.ingredients == ingredient, 'amounts'] = fridge[fridge.ingredients == ingredient].amounts.values[0] - amount
            if ingredient not in fridge.ingredients.values:
            # if the value is not in the fridge we need to add it to the fridge
                remainder = potential_df[potential_df.ingredients == ingredient].remaining.values[0]
                if remainder == 0:
                    pass
                else:
                    fridge.loc[len(fridge)] = [ingredient, remainder]
    return fridge
           
local_urls = find_recipes(local_urls=local_urls, price_cap=500, dishes=2)

history = pd.DataFrame(columns=['dish_1', 'dish_2', 'dish_3', 'price'])

old_urls = []

for i in range(3):
    local_urls = find_recipes(local_urls=local_urls, price_cap=500, dishes=3)
    if len(local_urls) == 1:
        local_urls = recipes[(recipes.dinner == True) & (recipes.vegeterian == True) & (~recipes.urls.isin(old_urls))].sort_values('matches', ascending=False).head(10).sample(1).urls.tolist()
        find_recipes(local_urls=local_urls, price_cap=500, dishes=3)
    local_groceries = calculate_cost_of_urls(local_urls)
    price = local_groceries['cumsum'].max()
    # fridge = set_fridge(local_groceries)
    sample = local_urls
    history.loc[len(history)] = [local_urls[0], local_urls[1], local_urls[2], price]
    old_urls = local_urls
    matching_ingredient_df(fridge)
    local_urls = recipes[(recipes.dinner == True) & (recipes.vegeterian == True) & (~recipes.urls.isin(old_urls))].sort_values('matches', ascending=False).head(10).sample(1).urls.tolist()

i tried a thousand times
i tried a thousand times


In [47]:
sample = calculate_cost_of_urls(history.loc[0].values.tolist()[:3])

In [49]:
fridge

Unnamed: 0,ingredients,amounts
0,olivenolie,985.75
1,agurk,0.0
2,avocado,33.5
3,bredbladet persille,178.5
4,burgerboller,2.0
5,cayenne peber,0.75
6,frisk basilikum,0.25
7,hvidløg,3.5
8,kikærter,399.0
9,løg,3.5


In [48]:
sample

Unnamed: 0,ingredients,amounts,purchase_size,local_price,ingredient_cost,remaining,cumsum
0,agurk,0.5,1.0,9.0,9.0,0.5,9.0
1,avocado,1.0,35.0,5.0,0.0,0.0,9.0
2,bredbladet persille,0.5,180.0,21.0,0.0,0.0,9.0
3,burgerboller,2.0,6.0,40.0,0.0,0.0,9.0
4,cayenne peber,0.25,1.0,0.0,0.0,0.0,9.0
5,chiliflager,0.25,,0.0,0.0,0.0,9.0
6,dryp ketchup,2.0,,0.0,0.0,0.0,9.0
7,frisk basilikum,0.5,1.0,0.0,0.0,0.75,9.0
8,grøn salat,0.5,,0.0,0.0,0.0,9.0
9,hvidløg,2.5,12.0,30.0,0.0,0.0,9.0


In [63]:
sample

Unnamed: 0,ingredients,amounts,purchase_size,local_price,ingredient_cost,remaining,cumsum,cumsum_local_price
0,aubergine,1.0,1.0,16.0,16.0,0.5,16.0,16.0
1,bredbladet persille,1.0,180.0,21.0,0.0,0.0,16.0,37.0
2,butterbeans,0.5,,0.0,0.0,0.0,16.0,37.0
3,cayenne peber,1.0,1.0,0.0,0.0,0.5,16.0,37.0
4,citron,0.25,6.0,15.0,0.0,0.0,16.0,52.0
5,dåser hakkede tomater,1.5,,0.0,0.0,0.0,16.0,52.0
6,feta,25.0,150.0,33.0,33.0,137.5,49.0,85.0
7,frisk basilikum,0.5,1.0,0.0,0.0,0.5,49.0,85.0
8,grøntsagsbouillon,0.5,200.0,45.0,0.0,0.0,49.0,130.0
9,gulerod,1.5,8.0,20.0,0.0,0.0,49.0,150.0


In [45]:
calculate_cost_of_urls(local_urls)

Unnamed: 0,ingredients,amounts,purchase_size,local_price,ingredient_cost,remaining,cumsum
0,champignon,200.0,200.0,18.0,18.0,0.0,18.0
1,grøntsagsbouillon,0.5,200.0,45.0,0.0,0.0,18.0
2,løg,0.5,6.0,15.0,0.0,0.0,18.0
3,olivenolie,0.5,1000.0,160.0,0.0,0.0,18.0
4,persille,0.5,180.0,21.0,0.0,0.0,18.0
5,piskefløde,0.5,27.0,500.0,0.0,0.0,18.0
6,smør,10.0,5.0,24.0,48.0,0.0,66.0
7,squash,150.0,1.0,8.0,1200.0,0.0,1266.0
8,svampe,100.0,,0.0,0.0,0.0,1266.0
9,tarteletter,6.0,,0.0,0.0,0.0,1266.0


In [64]:
history

Unnamed: 0,dish_1,dish_2,dish_3,price
0,https://www.valdemarsro.dk/cremet-tomatsuppe-m...,https://www.valdemarsro.dk/pita-med-aubergine-...,https://www.valdemarsro.dk/krydrede-boennedeller/,405.0
1,https://www.valdemarsro.dk/malfatti/,https://www.valdemarsro.dk/gnocchi-sommersalat/,https://www.valdemarsro.dk/pizza-stromboli/,197.5
2,https://www.valdemarsro.dk/risotto-med-svampe/,https://www.valdemarsro.dk/ravioli-med-svampe-...,https://www.valdemarsro.dk/bradepandepizza-med...,99.0
3,https://www.valdemarsro.dk/cremet-tomatsuppe-m...,https://www.valdemarsro.dk/quinoadeller/,https://www.valdemarsro.dk/gnocchi-sommersalat/,128.0
4,https://www.valdemarsro.dk/risotto-med-svampe/,https://www.valdemarsro.dk/vegetar-fyldte-pebe...,https://www.valdemarsro.dk/krydrede-boennedeller/,82.0
5,https://www.valdemarsro.dk/ravioli-med-svampe-...,https://www.valdemarsro.dk/citronpasta/,https://www.valdemarsro.dk/vegetar-lady-og-vag...,167.0
6,https://www.valdemarsro.dk/spansk-tortilla/,https://www.valdemarsro.dk/quinoadeller/,https://www.valdemarsro.dk/bradepandepizza-med...,66.0
7,https://www.valdemarsro.dk/malfatti/,https://www.valdemarsro.dk/peinirli/,https://www.valdemarsro.dk/feta-pasta-med-tomat/,254.0
8,https://www.valdemarsro.dk/spansk-tortilla/,https://www.valdemarsro.dk/pizza-stromboli/,https://www.valdemarsro.dk/grontsagsfad/,222.0
9,https://www.valdemarsro.dk/vegetar-lady-og-vag...,https://www.valdemarsro.dk/vegetarisk-shepards...,https://www.valdemarsro.dk/citronpasta/,282.0


In [65]:
ingredients_df.to_csv('ingredient_df.csv', index=False)
recipes.to_csv('recipes_2.csv', index=False)
pantry.to_csv('matching.csv', index=False)
fridge.to_csv('fridge.csv', index=False)
history.to_csv('history.csv', index=False)
conversion.to_csv('conversion.csv', index=False)

In [None]:
# this might be sufficient first step in getting meal planner to work

# next step is to find a way to incrase the overlap
# or just give the as

In [283]:
potential_df[]

Unnamed: 0,ingredients,amounts,purchase_size,local_price,ingredient_cost,remaining,cumsum
0,agurk,0.25,1.0,9.0,0.0,0.0,0.0
1,avocado,0.5,35.0,5.0,0.0,0.0,0.0
2,bacon,30.0,100.0,20.0,20.0,70.0,20.0
3,blandet salat,62.5,100.0,20.0,20.0,75.0,40.0
4,cayenne peber,0.25,1.0,0.0,0.0,0.75,40.0
5,cornichoner,25.0,,0.0,0.0,0.0,40.0
6,cremefraiche 8 %,0.5,,0.0,0.0,0.0,40.0
7,dijon sennep,0.5,380.0,35.0,0.0,0.0,40.0
8,dryp ketchup,2.0,,0.0,0.0,0.0,40.0
9,frisk dild,0.5,180.0,9.0,9.0,179.5,49.0


In [284]:
fridge

Unnamed: 0,ingredients,amounts
0,olivenolie,988.5
1,agurk,0.5
2,avocado,34.5
3,babykartofler,825.0
4,blandet salat,37.5
5,burgerboller,4.0
6,dijon sennep,378.5
7,grønne asparges,150.0
8,gulerod,7.5
9,havregryn,999.5


In [268]:
# now lets make sure that we take the remaining ingredients and add them to the fridge
# if the remaining val is nan we pass them
# if the remaining val is 0 we take the amounts and subtract them from the fridge values






Unnamed: 0,ingredients,amounts
0,olivenolie,988.5
1,agurk,0.5
2,avocado,34.5
3,babykartofler,825.0
4,blandet salat,37.5
5,burgerboller,4.0
6,dijon sennep,378.5
7,grønne asparges,150.0
8,gulerod,7.5
9,havregryn,999.5


In [176]:
new_test_df = calculate_cost_df(pantry[pantry.urls == next_url[0]])
new_test_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0,urls,amounts,units,ingredients,purchase_unit,spice,price,purchase_size,local_price,pareto,price_total
0,https://www.valdemarsro.dk/roedbedeslaw/,200.0,g,rødbede,g,0.0,15.0,1000.0,15.0,True,331.0
1,https://www.valdemarsro.dk/roedbedeslaw/,0.5,stk,rødløg,stk,0.0,12.0,7.0,12.0,True,331.0
2,https://www.valdemarsro.dk/roedbedeslaw/,0.5,stk,æble,stk,0.0,18.0,9.0,18.0,True,331.0
3,https://www.valdemarsro.dk/roedbedeslaw/,0.5,stk,citron,stk,0.0,15.0,6.0,15.0,True,331.0
4,https://www.valdemarsro.dk/roedbedeslaw/,12.5,g,tranebær,,,,,0.0,False,331.0
5,https://www.valdemarsro.dk/roedbedeslaw/,0.5,spsk,olivenolie,ml,0.0,160.0,1000.0,160.0,True,331.0
6,https://www.valdemarsro.dk/roedbedeslaw/,0.5,tsk,dijon sennep,g,0.0,35.0,380.0,35.0,True,331.0
7,https://www.valdemarsro.dk/roedbedeslaw/,0.5,spsk,ingefær,g,0.0,15.0,200.0,15.0,True,331.0
8,https://www.valdemarsro.dk/roedbedeslaw/,0.5,tsk,honning,g,0.0,25.0,400.0,25.0,True,331.0
9,https://www.valdemarsro.dk/roedbedeslaw/,0.5,spsk,solsikkekerner,g,0.0,11.0,250.0,11.0,True,331.0


In [211]:
import pandas as pd
import numpy as np
import math
def calculate_cost_of_urls(urls):
    # Assuming combined_df is your DataFrame containing the grocery list
    combined_df = pantry[pantry.urls.isin(urls)].copy()
    # Group by ingredients to sum the amounts needed for each
    grouped_df = combined_df.groupby('ingredients').agg({'amounts': 'sum', 'purchase_size': 'first', 'local_price': 'first'}).reset_index()

    # Function to calculate the cost and remaining amount
    def calculate_cost_and_remaining(row):
        if row['amounts'] > row['purchase_size']:
            num_units = np.ceil(row['amounts'] / row['purchase_size'])
            cost = num_units * row['local_price']
            remaining = (num_units * row['purchase_size']) - row['amounts']
        else:
            cost = row['local_price']
            remaining = row['purchase_size'] - row['amounts']
        return pd.Series([cost, remaining], index=['ingredient_cost', 'remaining'])

    # Apply the function to calculate the cost and remaining amount for each ingredient
    grouped_df[['ingredient_cost', 'remaining']] = grouped_df.apply(calculate_cost_and_remaining, axis=1)

    # Summing up the total cost
    total_cost = grouped_df['ingredient_cost'].sum()

    print(f"Total Cost: {total_cost}")
    grouped_df['cumsum'] = grouped_df['ingredient_cost'].cumsum()
    return grouped_df

groceries = calculate_cost_of_urls(local_urls)
groceries

Total Cost: 541.0


Unnamed: 0,ingredients,amounts,purchase_size,local_price,ingredient_cost,remaining,cumsum
0,babykartofler,175.0,1000.0,25.0,25.0,825.0,25.0
1,citron,0.5,6.0,15.0,15.0,5.5,40.0
2,dijon sennep,1.5,380.0,35.0,35.0,378.5,75.0
3,grøn salat,125.0,,0.0,0.0,,75.0
4,grønne asparges,100.0,250.0,35.0,35.0,150.0,110.0
5,haricots verts,100.0,,0.0,0.0,,110.0
6,honning,1.5,400.0,25.0,25.0,398.5,135.0
7,ingefær,0.5,200.0,15.0,15.0,199.5,150.0
8,mandler,0.5,150.0,25.0,25.0,149.5,175.0
9,mayonnaise,1.0,350.0,40.0,40.0,349.0,215.0


In [207]:
local_urls

['https://www.valdemarsro.dk/fransk-bondesalat/',
 'https://www.valdemarsro.dk/roedbedeslaw/']

In [205]:
grouped_df[['ingredients', 'remaining']].to_clipboard(index=False)

In [198]:
def matching_ingredients_df(dataframe):
    sample_ingredients = dataframe.ingredients.tolist()
    sample_ingredients = set(sample_ingredients)
    recipes['matches'] = 0
    for url in recipes.urls:
        matches = 0
        recipe_ingredients = pantry[(pantry.urls == url) & (pantry.pareto == True)].ingredients.tolist()
        recipe_ingredients = set(recipe_ingredients)
        if sample_ingredients == recipe_ingredients:
            matches = len(sample_ingredients)
        else:
            for ingredient in sample_ingredients:
                if ingredient in recipe_ingredients:
                    matches += 1

        local_ingredients = (len(pantry[pantry.urls == url]))
        if local_ingredients == 0:
            recipes.loc[recipes.urls == url, 'matches'] = 0
        else:
            recipes.loc[recipes.urls == url, 'matches'] = matches / local_ingredients


matching_ingredients_df(grouped_df)

In [203]:
recipes[recipes.dinner == True].sort_values('matches', ascending=False).head(20)

Unnamed: 0,urls,text,fullfillment,dinner,addon,vegeterian,price_add,matches,percentile
543,https://www.valdemarsro.dk/fransk-bondesalat/,Fransk bondesalat - opskrift på en virkelig fa...,0.0,True,False,True,0.0,1.0,1
884,https://www.valdemarsro.dk/roedbedeslaw/,Rødbedeslaw - opskrift på en skøn råkost med r...,0.0,True,False,True,99.0,0.909091,1
660,https://www.valdemarsro.dk/salade-nicoise/,Salade Nicoise - Opskrift på den klassiske sal...,0.0,True,False,False,40.0,0.727273,1
529,https://www.valdemarsro.dk/salade-lyonnaise/,Salade Lyonnaise Opskrift - en skøn frokost sa...,0.0,True,False,False,70.0,0.692308,1
963,https://www.valdemarsro.dk/blt-sandwich/,BLT sandwich – opskrift på sandwich med bacon ...,0.0,True,False,False,59.0,0.625,1
799,https://www.valdemarsro.dk/salade-chevre-chaud/,Salade Chevre Chaud - Opskrift på lækker salat...,0.0,True,False,True,50.0,0.6,1
1614,https://www.valdemarsro.dk/citron-og-rosmarin-...,Ovnstegt kylling med citron og rosmarin - Opsk...,0.0,True,False,False,15.0,0.6,0
1186,https://www.valdemarsro.dk/aeggekage-opskrift/,Æggekage med kartofler og purløg - opskrift\nO...,0.0,True,False,False,39.0,0.571429,1
1242,https://www.valdemarsro.dk/pink-tundeller/,Tundeller – opskrift på sunde og lækre pink de...,0.0,True,False,False,72.0,0.545455,1
413,https://www.valdemarsro.dk/bagt-torsk-med-kape...,Bagt torsk med kapersvinaigrette og grønt\nOps...,0.0,True,False,False,157.0,0.545455,1


In [182]:
matching_ingredients()

Unnamed: 0,urls,amounts,units,ingredients,purchase_unit,spice,price,purchase_size,local_price,pareto,price_total,ingredient_cost
0,https://www.valdemarsro.dk/fransk-bondesalat/,2.0,stk,æg,stk,0.0,36.0,10.0,36.0,True,442.0,72.0
1,https://www.valdemarsro.dk/fransk-bondesalat/,1.0,spsk,æblecidereddike,ml,0.0,53.0,750.0,53.0,True,442.0,53.0
2,https://www.valdemarsro.dk/fransk-bondesalat/,2.0,stk,skiver bacon,,,,,0.0,True,442.0,0.0
3,https://www.valdemarsro.dk/fransk-bondesalat/,1.0,tsk,smør,g,0.0,12.0,5.0,12.0,True,442.0,12.0
4,https://www.valdemarsro.dk/fransk-bondesalat/,1.0,stk,skiver brød,,,,,0.0,True,442.0,0.0
5,https://www.valdemarsro.dk/fransk-bondesalat/,100.0,g,grønne asparges,g,0.0,35.0,250.0,35.0,True,442.0,3500.0
6,https://www.valdemarsro.dk/fransk-bondesalat/,100.0,g,haricots verts,,,,,0.0,True,442.0,0.0
7,https://www.valdemarsro.dk/fransk-bondesalat/,125.0,g,grøn salat,,,,,0.0,True,442.0,0.0
8,https://www.valdemarsro.dk/fransk-bondesalat/,0.5,stk,rødløg,stk,0.0,12.0,7.0,12.0,True,442.0,6.0
9,https://www.valdemarsro.dk/fransk-bondesalat/,175.0,g,babykartofler,g,0.0,25.0,1000.0,25.0,True,442.0,4375.0


In [179]:
# now lets make a function that takes a list of urls and returns a grocery list
input_df = pantry[pantry.urls.isin(local_urls)].copy().reset_index(drop=True)
grocery_df = pd.DataFrame(columns=['ingredients', 'amounts', 'purchase_size', 'price', 'purchase_cost', 'urls'])

for i in range(len(input_df)):
    row = input_df.iloc[i]
    if row['ingredients'] in grocery_df['ingredients'].values:
        grocery_df.loc[grocery_df['ingredients'] == row['ingredients'], 'amounts'] += row['amounts']
    else:
        grocery_df = grocery_df.append(row)

test = grocery_df[grocery_df.columns[:4]].copy()
test['cumsum'] = test.price.cumsum()
test

Unnamed: 0,ingredients,amounts,purchase_size,price,cumsum
0,æg,2.0,10.0,36.0,36.0
1,æblecidereddike,1.0,750.0,53.0,89.0
2,skiver bacon,2.0,,,
3,smør,1.0,5.0,12.0,101.0
4,skiver brød,1.0,,,
5,grønne asparges,100.0,250.0,35.0,136.0
6,haricots verts,100.0,,,
7,grøn salat,125.0,,,
8,rødløg,1.0,7.0,12.0,148.0
9,babykartofler,175.0,1000.0,25.0,173.0


In [180]:
test.sort_values('price')

Unnamed: 0,ingredients,amounts,purchase_size,price,cumsum
11,purløg,0.5,180.0,9.0,182.0
25,solsikkekerner,0.5,250.0,11.0,516.0
3,smør,1.0,5.0,12.0,101.0
8,rødløg,1.0,7.0,12.0,148.0
23,ingefær,0.5,200.0,15.0,505.0
19,citron,0.5,6.0,15.0,490.0
16,rødbede,200.0,1000.0,15.0,457.0
18,æble,0.5,9.0,18.0,475.0
13,honning,1.5,400.0,25.0,242.0
26,mandler,0.5,150.0,25.0,541.0


In [60]:
ingredients_df.to_csv('ingredients_df.csv', index=False)
recipes.to_csv('recipes_2.csv', index=False)
pantry.to_csv('matching.csv', index=False)
# grocery_df.to_csv('grocery_list.csv', index=False)
conversion.to_csv('conversion.csv', index=False)    