## Preprocessing Merged Dataset

### Import Dependencies

In [1]:
import pandas as pd
from IPython.display import Image

# Taste preprocessing
import re
import ast

### Import dataset

In [2]:
# Load Dataset Allrecipes
food_df = pd.read_csv('Food_Dataset.csv', index_col=None)
food_df.head()

Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
0,15 Best Air Fryer Thanksgiving Recipes,,,,,,,,https://www.allrecipes.com/thmb/zNe_lQRZgjj1rS...,https://www.allrecipes.com/gallery/best-air-fr...
1,Air Fryer Turkey Breast,"{'quantity': '1', 'unit': 'tablespoon', 'name'...",,263.0,0g,40g,10g,6.0,https://www.allrecipes.com/thmb/PaF8nNOY0bLCvo...,https://www.allrecipes.com/recipe/275372/air-f...
2,16 Quick-and-Easy Side Dish Recipes for the Ai...,,,,,,,,https://www.allrecipes.com/thmb/91y3R4leqrUtBV...,https://www.allrecipes.com/gallery/air-fryer-s...
3,Best Holiday Party Appetizers to Make in the A...,,,,,,,,https://cdn.jwplayer.com/v2/media/rggkwMPu/pos...,https://www.allrecipes.com/article/best-holida...
4,Air Fryer Lemon Garlic Parmesan Chicken,"{'quantity': '1 1/2', 'unit': 'pounds', 'name'...",Gather all ingredients.. Preheat an air fryer ...,365.0,8g,46g,17g,4.0,https://www.allrecipes.com/thmb/5nJvgXENSeFx82...,https://www.allrecipes.com/air-fryer-lemon-gar...


In [3]:
food_df.describe()

Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
count,24191,23120,20624,22870,22847,22803,22648,22863,24047,24191
unique,19311,18356,17071,2874,902,750,721,201,19120,19346
top,Grilled Asparagus,"{'quantity': '1', 'unit': 'pound', 'name': 'fr...","Gather the ingredients.. Place whitefish, sour...",'0',1g,3g,0g,4,https://cdn.jwplayer.com/v2/media/Ug0PzrYB/thu...,https://www.allrecipes.com/recipe/17445/grille...
freq,9,9,8,88,415,1490,813,4792,10,9


### Remove NaN

In [4]:
# Count of NaN values in each column
nan_count = food_df.isna().sum()

# Print the count of NaN values
print("Count of NaN values in each column:")
print(nan_count)

Count of NaN values in each column:
name              0
ingredients    1071
steps          3567
calories       1321
carbs          1344
protein        1388
fat            1543
servings       1328
image_url       144
link              0
dtype: int64


In [5]:
# Drop rows with any NaN values
food_df = food_df.dropna()

# Display the cleaned DataFrame
print("DataFrame after dropping rows with NaN values:")
food_df.describe()


DataFrame after dropping rows with NaN values:


Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
count,20089,20089,20089,20089,20089,20089,20089,20089,20089,20089
unique,16615,16628,16633,2825,893,740,708,196,16621,16634
top,Smoked Fish Dip,"{'quantity': '2', 'unit': 'cups', 'name': 'fla...","Gather the ingredients.. Place whitefish, sour...",'0',1g,3g,0g,4,https://cdn.jwplayer.com/v2/media/Ug0PzrYB/thu...,https://www.allrecipes.com/recipe/45291/smoked...
freq,8,8,8,88,336,1295,719,4353,10,8


### Remove duplicate

In [6]:
# Print the sum of duplicate rows
duplicate_count = food_df.duplicated().sum()
print(f"Number of duplicate rows: {duplicate_count}")

Number of duplicate rows: 3455


In [7]:
# Remove the duplicate rows
food_df= food_df.drop_duplicates()

# Display the cleaned DataFrame
print("\nDataFrame after removing duplicate rows:")
food_df.describe()


DataFrame after removing duplicate rows:


Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
count,16634,16634,16634,16634,16634,16634,16634,16634,16634,16634
unique,16615,16628,16633,2825,893,740,708,196,16621,16634
top,Sriracha Deviled Eggs,[],Pour the 1/2 cup of water or milk into a small...,'0',1g,3g,0g,4,https://cdn.jwplayer.com/v2/media/Ug0PzrYB/thu...,https://www.allrecipes.com/air-fryer-lemon-gar...
freq,2,4,2,88,256,1030,540,3710,4,1


### Clean '[]' ingridients

In [8]:
# Remove rows where 'ingredients' is '[]'
food_df = food_df[food_df['ingredients'] != '[]']

# Display the cleaned DataFrame
print("DataFrame after removing rows with '[]' in the 'ingredients' column:")
food_df.describe()

DataFrame after removing rows with '[]' in the 'ingredients' column:


Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
count,16630,16630,16630,16630,16630,16630,16630,16630,16630,16630
unique,16611,16627,16629,2824,892,739,706,196,16617,16630
top,Pecan Shortbread Cookies,"{'quantity': '2 ¼', 'unit': 'cups', 'name': 'I...",Pour the 1/2 cup of water or milk into a small...,'0',1g,3g,0g,4,https://cdn.jwplayer.com/v2/media/Ug0PzrYB/thu...,https://www.allrecipes.com/air-fryer-lemon-gar...
freq,2,2,2,88,256,1030,540,3709,4,1


### Lowercase all name

In [9]:
# Convert all food names to lowercase
food_df['name'] = food_df['name'].str.lower()

# Display the updated DataFrame
print("DataFrame after converting food names to lowercase:")
food_df.head()

DataFrame after converting food names to lowercase:


Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
4,air fryer lemon garlic parmesan chicken,"{'quantity': '1 1/2', 'unit': 'pounds', 'name'...",Gather all ingredients.. Preheat an air fryer ...,365,8g,46g,17g,4,https://www.allrecipes.com/thmb/5nJvgXENSeFx82...,https://www.allrecipes.com/air-fryer-lemon-gar...
6,air fryer s’mores,"{'quantity': '1', 'unit': 'sleeve', 'name': 'g...",Preheat an air fryer to 380 degrees F (193 deg...,143,20g,2g,6g,10,https://www.allrecipes.com/thmb/_EDaiFt0gIGQOL...,https://www.allrecipes.com/air-fryer-s-mores-r...
7,air fryer baked yams,"{'quantity': '1', 'unit': None, 'name': 'yam'}...",Preheat an air fryer to 400 degrees F (200 deg...,283,62g,3g,3g,1,https://www.allrecipes.com/thmb/156WNgRfzvGn-s...,https://www.allrecipes.com/air-fryer-baked-yam...
8,lemon garlic butter chicken spiedini,"{'quantity': '1/2', 'unit': 'cup', 'name': 'ex...","Whisk together olive oil, wine, 2 tablespoons ...",636,21g,43g,41g,6,https://cdn.jwplayer.com/v2/media/ahbYMLcr/thu...,https://www.allrecipes.com/lemon-garlic-butter...
9,air fryer grilled pimento cheese,"{'quantity': '4', 'unit': 'slices', 'name': 'F...",Preheat the air fryer to 370 degrees F (188 de...,902,108g,29g,40g,2,https://www.allrecipes.com/thmb/cdL3DKZH3beUk5...,https://www.allrecipes.com/air-fryer-grilled-p...


### Remove duplicated recipe name

In [10]:
# Remove duplicate food names, keeping the first occurrence
food_df = food_df.drop_duplicates(subset='name', keep='first')

# Display the cleaned DataFrame
print("DataFrame after removing duplicate food names:")
food_df.describe()

DataFrame after removing duplicate food names:


Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
count,16598,16598,16598,16598,16598,16598,16598,16598,16598,16598
unique,16598,16595,16597,2809,885,733,703,195,16586,16598
top,air fryer lemon garlic parmesan chicken,"{'quantity': '12', 'unit': '', 'name': 'eggs'}",Pour the 1/2 cup of water or milk into a small...,'0',1g,3g,0g,4,https://cdn.jwplayer.com/v2/media/Ug0PzrYB/thu...,https://www.allrecipes.com/air-fryer-lemon-gar...
freq,1,2,2,88,255,1030,540,3701,4,1


### Check top frequency image_url

In [11]:
# Getting the mode
mode_image_url = food_df['image_url'].mode()

# Get the frequency of the most common image_url
mode_count = food_df['image_url'].value_counts().iloc[0]

# Print the full mode and the frequency (mode number)
print("Full mode of image_url column:")
print(mode_image_url.iloc[0])

print("\nMode count (number of occurrences):")
print(mode_count)

Full mode of image_url column:
https://cdn.jwplayer.com/v2/media/Ug0PzrYB/thumbnails/RflzUhLv.jpg

Mode count (number of occurrences):
4


In [12]:
# URL of the image
image_url = "https://cdn.jwplayer.com/v2/media/Ug0PzrYB/thumbnails/RflzUhLv.jpg"

# Display the image
Image(url=image_url)

No need to remove duplicate image

### Splitting into 3 df for to transform each website dataset into a same format

In [13]:
# Ensure there are no leading or trailing spaces in the link column
food_df['link'] = food_df['link'].str.strip()

# Split the DataFrame based on the 'link' column
Allrecipe_df = food_df[food_df['link'].str.contains('https://www.allrecipes.com', na=False)]
BBC_df = food_df[food_df['link'].str.contains('https://www.bbcgoodfood.com', na=False)]
Taste_df = food_df[food_df['link'].str.contains('https://www.taste.com.au', na=False)]

# Reset the index
Allrecipe_df.reset_index(drop=True, inplace=True)
BBC_df.reset_index(drop=True, inplace=True)
Taste_df.reset_index(drop=True, inplace=True)

### Transform ALLrecipe dataset into a same format (We use ALLrecipe_df 's format as the model for ther 2 other website)

In [14]:
Allrecipe_df.head()

Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
0,air fryer lemon garlic parmesan chicken,"{'quantity': '1 1/2', 'unit': 'pounds', 'name'...",Gather all ingredients.. Preheat an air fryer ...,365,8g,46g,17g,4,https://www.allrecipes.com/thmb/5nJvgXENSeFx82...,https://www.allrecipes.com/air-fryer-lemon-gar...
1,air fryer s’mores,"{'quantity': '1', 'unit': 'sleeve', 'name': 'g...",Preheat an air fryer to 380 degrees F (193 deg...,143,20g,2g,6g,10,https://www.allrecipes.com/thmb/_EDaiFt0gIGQOL...,https://www.allrecipes.com/air-fryer-s-mores-r...
2,air fryer baked yams,"{'quantity': '1', 'unit': None, 'name': 'yam'}...",Preheat an air fryer to 400 degrees F (200 deg...,283,62g,3g,3g,1,https://www.allrecipes.com/thmb/156WNgRfzvGn-s...,https://www.allrecipes.com/air-fryer-baked-yam...
3,lemon garlic butter chicken spiedini,"{'quantity': '1/2', 'unit': 'cup', 'name': 'ex...","Whisk together olive oil, wine, 2 tablespoons ...",636,21g,43g,41g,6,https://cdn.jwplayer.com/v2/media/ahbYMLcr/thu...,https://www.allrecipes.com/lemon-garlic-butter...
4,air fryer grilled pimento cheese,"{'quantity': '4', 'unit': 'slices', 'name': 'F...",Preheat the air fryer to 370 degrees F (188 de...,902,108g,29g,40g,2,https://www.allrecipes.com/thmb/cdL3DKZH3beUk5...,https://www.allrecipes.com/air-fryer-grilled-p...


In [15]:
Allrecipe_df.describe()

Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
count,12685,12685,12685,12685,12685,12685,12685,12685,12685,12685
unique,12685,12682,12684,1058,174,105,112,76,12673,12685
top,air fryer lemon garlic parmesan chicken,"{'quantity': '2 ¼', 'unit': 'cups', 'name': 'I...",Pour the 1/2 cup of water or milk into a small...,215,1g,3g,0g,8,https://cdn.jwplayer.com/v2/media/Ug0PzrYB/thu...,https://www.allrecipes.com/air-fryer-lemon-gar...
freq,1,2,2,38,255,1030,540,2369,4,1


In [16]:
print(Allrecipe_df['ingredients'][1])

{'quantity': '1', 'unit': 'sleeve', 'name': 'graham crackers'}, {'quantity': '5', 'unit': '(1.5 ounce)', 'name': 'chocolate candy bars'}, {'quantity': '10', 'unit': None, 'name': 's’more-sized marshmallows'}


### Transform BBCGoodFood dataset into a same format

In [17]:
#Check what format we're dealing with
BBC_df.head()

Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
0,"('salsa verde baked eggs',)","([{'ingredient': 'tbspolive oil', 'quantity': ...",(['Step 1: Drizzle 1 tbsp of the olive oil in ...,'268low','7g','12g','21g',4,https://images.immediate.co.uk/production/vola...,https://www.bbcgoodfood.com/recipes/salsa-verd...
1,"('sausage & fennel orecchiette',)","([{'ingredient': 'and finely sliced', 'quantit...","(['Step 1: Boil a large pan of salted water, t...",'527','62g','21g','19g',4,https://images.immediate.co.uk/production/vola...,https://www.bbcgoodfood.com/recipes/sausage-fe...
2,"('roasted cod with zingy beetroot salad',)","([{'ingredient': 'new potatoes', 'quantity': '...",(['Step 1: Heat the oven to 200C/180C fan/gas ...,'262low','21g','36g','3glow',4,https://images.immediate.co.uk/production/vola...,https://www.bbcgoodfood.com/recipes/roasted-co...
3,"('tomato soup & hummus crispbreads',)","([{'ingredient': 'tbsprapeseed oil', 'quantity...",(['Step 1: Heat the oil in a large non-stick p...,'403','51g','15g','12g',4,https://images.immediate.co.uk/production/vola...,https://www.bbcgoodfood.com/recipes/tomato-sou...
4,"('puy lentils with seared salmon',)","([{'ingredient': 'puy lentils', 'quantity': '1...",(['Step 1: Put the lentils in a pan of water w...,'519','29g','38g','25g',2,https://images.immediate.co.uk/production/vola...,https://www.bbcgoodfood.com/recipes/puy-lentil...


In [18]:
BBC_df.describe()

Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
count,3094,3094,3094,3094,3094,3094,3094,3094,3094,3094
unique,3094,3094,3094,1007,178,183,151,168,3094,3094
top,"('salsa verde baked eggs',)","([{'ingredient': 'tbspolive oil', 'quantity': ...",(['Step 1: Drizzle 1 tbsp of the olive oil in ...,'0','0g','2g','0g',4,https://images.immediate.co.uk/production/vola...,https://www.bbcgoodfood.com/recipes/salsa-verd...
freq,1,1,1,88,111,196,182,875,1,1


In [19]:
# Function to clean up the columns
def clean_column(value):
    return value.strip("(['),]").replace("'", "").replace("low", "").strip()

# Apply the function to the relevant columns
BBC_df['name'] = BBC_df['name'].apply(clean_column)
BBC_df['ingredients'] = BBC_df['ingredients'].apply(lambda x: x.strip("(['),])"))
BBC_df['steps'] = BBC_df['steps'].apply(lambda x: x.strip("(['),])"))
BBC_df['calories'] = BBC_df['calories'].apply(clean_column)
BBC_df['carbs'] = BBC_df['carbs'].apply(clean_column)
BBC_df['protein'] = BBC_df['protein'].apply(clean_column)
BBC_df['fat'] = BBC_df['fat'].apply(clean_column)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  BBC_df['name'] = BBC_df['name'].apply(clean_column)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  BBC_df['ingredients'] = BBC_df['ingredients'].apply(lambda x: x.strip("(['),])"))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  BBC_df['steps'] = BBC_df['steps'].apply(lambda x: x.strip("(['),])"))
A

In [20]:
print(BBC_df['ingredients'][1])

{'ingredient': 'and finely sliced', 'quantity': '1', 'unit': 'leekhalved', 'item_note': 'halved and finely sliced'}, {'ingredient': 'smallfennel bulb', 'quantity': '1', 'unit': None, 'item_note': 'halved and finely sliced, fronds reserved'}, {'ingredient': 'long-stemmedbroccoli', 'quantity': '250', 'unit': 'g', 'item_note': 'larger stalks finely sliced'}, {'ingredient': 'any short pasta', 'quantity': '300', 'unit': 'gorecchietteor', 'item_note': 'or any short pasta'}, {'ingredient': 'tbspolive oil', 'quantity': '1', 'unit': None, 'item_note': None}, {'ingredient': '', 'quantity': '6', 'unit': 'sausages', 'item_note': None}, {'ingredient': 'cloves', 'quantity': '2', 'unit': 'garlic', 'item_note': 'finely chopped'}, {'ingredient': 'tspfennel seeds', 'quantity': '2', 'unit': None, 'item_note': None}, {'ingredient': 'tbsptomato purée', 'quantity': '1', 'unit': None, 'item_note': None}, {'ingredient': '', 'quantity': '2', 'unit': 'carrotsgrated', 'item_note': 'grated'}, {'ingredient': 'canc

In [21]:
#Check result
BBC_df.head()

Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
0,salsa verde baked eggs,"{'ingredient': 'tbspolive oil', 'quantity': '5...",Step 1: Drizzle 1 tbsp of the olive oil in afr...,268,7g,12g,21g,4,https://images.immediate.co.uk/production/vola...,https://www.bbcgoodfood.com/recipes/salsa-verd...
1,sausage & fennel orecchiette,"{'ingredient': 'and finely sliced', 'quantity'...","Step 1: Boil a large pan of salted water, turn...",527,62g,21g,19g,4,https://images.immediate.co.uk/production/vola...,https://www.bbcgoodfood.com/recipes/sausage-fe...
2,roasted cod with zingy beetroot salad,"{'ingredient': 'new potatoes', 'quantity': '20...",Step 1: Heat the oven to 200C/180C fan/gas 6. ...,262,21g,36g,3g,4,https://images.immediate.co.uk/production/vola...,https://www.bbcgoodfood.com/recipes/roasted-co...
3,tomato soup & hummus crispbreads,"{'ingredient': 'tbsprapeseed oil', 'quantity':...",Step 1: Heat the oil in a large non-stick pan ...,403,51g,15g,12g,4,https://images.immediate.co.uk/production/vola...,https://www.bbcgoodfood.com/recipes/tomato-sou...
4,puy lentils with seared salmon,"{'ingredient': 'puy lentils', 'quantity': '160...",Step 1: Put the lentils in a pan of water with...,519,29g,38g,25g,2,https://images.immediate.co.uk/production/vola...,https://www.bbcgoodfood.com/recipes/puy-lentil...


### Transform Taste dataset into a same format

In [22]:
# Function to parse a list of ingredients
def parse_ingredients(ingredient_list_str):
    # Convert the string representation of a list into an actual list
    ingredient_list = eval(ingredient_list_str)
    return [parse_ingredient(ingredient) for ingredient in ingredient_list]

In [23]:
# Function to extract the calorie value from the string
def extract_calories(energy_str):
    match = re.search(r'(\d+)\s*cal', energy_str)
    if match:
        return match.group(1)  # Extracts the calorie value
    return None

In [24]:

## drop fibre
# Taste_df.drop('fibre', axis=1, inplace=True)
# Taste_df.drop('sodium', axis=1, inplace=True)

# Handle the 'steps' column
# Check for null or invalid values before applying the transformation
if 'steps' in Taste_df.columns:
    Taste_df['steps'] = Taste_df['steps'].apply(
        lambda step_list: ', '.join(eval(step_list)) if isinstance(step_list, str) and step_list.startswith('[') else step_list
    )


# test dataframe

# Convert the 'ingredient' column to lists and parse
if 'ingredient' in Taste_df.columns:
    # Convert the string representation of a list to an actual list
    Taste_df['ingredient'] = Taste_df['ingredient'].apply(
        lambda ing_list: [
            parse_ingredient(item) for item in ast.literal_eval(ing_list)
        ] if isinstance(ing_list, str) and ing_list.startswith('[') else ing_list
    )


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Taste_df['steps'] = Taste_df['steps'].apply(


In [25]:
print(Taste_df['ingredients'][1])

['1 egg, lightly whisked', '500g Coles Beef 3 Star Regular Mince', '50g (1 cup) panko breadcrumbs', '40g packet French onion soup mix', '80g Colby cheese, cut into 20 small pieces', '1 tbsp vegetable oil', '200g portobello mushrooms, thickly sliced', '2 tbsp tomato paste', '125ml (1 ⁄2 cup) thickened cream', '60g baby spinach', '4 crusty bread slices']


In [26]:
Taste_df.head()

Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
0,easiest-ever fried rice,"['1 cup (200g) brown rice', '1 cup (200g) whit...",Boil brown and white rice separately following...,2272 kj (543cal),87.4g,26.8g,8.7g,4,https://content.api.news/v3/images/bin/640b903...,https://www.taste.com.au/recipes/unique-fried-...
1,cheesy french onion meatballs and mushrooms,"['1 egg, lightly whisked', '500g Coles Beef 3 ...","Combine the egg, beef, breadcrumbs and 1&1/2 t...",3138 kj (750cal),39.0g,35.9g,49.4g,4,https://img.taste.com.au/gJ9jKwKI/w720-h480-cf...,https://www.taste.com.au/recipes/cheesy-french...
2,thai chicken lettuce cups with avocado and lime,"[""50g Chang's rice vermicelli noodles"", '1 1/2...",Bring a large saucepan of salted water to a bo...,4775 kj (1141cal),80.9g,83.5g,56.8g,4,https://img.taste.com.au/fqti94HE/w720-h480-cf...,https://www.taste.com.au/recipes/thai-chicken-...
3,oven-baked mushroom risotto with pesto,"['1 tbsp olive oil', '1 leek, pale section onl...",Preheat oven to 150°C. Heat half the oil in a ...,2685 kj (642cal),69.8g,15.2g,31.4g,6,https://img.taste.com.au/-botctrO/w720-h480-cf...,https://www.taste.com.au/recipes/oven-baked-mu...
4,tomato-tofu salad with creamy ginger dressing,"['1 sheet nori (3g)', '3 tbsp toasted sesame s...",Preheat the oven to 220°C (200°C fan-forced). ...,1367 kj (327cal),16.0g,8.1g,27.2g,4,https://img.taste.com.au/AWH3XZcf/w720-h480-cf...,https://www.taste.com.au/recipes/tomato-tofu-s...


In [27]:
Taste_df.describe()

Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
count,819,819,819,819,819,819,819,819,819,819
unique,819,819,819,744,574,483,476,14,819,819
top,easiest-ever fried rice,"['1 cup (200g) brown rice', '1 cup (200g) whit...",Boil brown and white rice separately following...,2294 kj (548cal),35.3g,6.7g,0.2g,4,https://content.api.news/v3/images/bin/640b903...,https://www.taste.com.au/recipes/unique-fried-...
freq,1,1,1,3,6,9,6,490,1,1


### Merge back formated datasets

In [28]:
#Merge back
new_df = pd.concat([Allrecipe_df, BBC_df, Taste_df], ignore_index=True)

new_df.describe()

Unnamed: 0,name,ingredients,steps,calories,carbs,protein,fat,servings,image_url,link
count,16598,16598,16598,16598,16598,16598,16598,16598,16598,16598
unique,16543,16595,16597,1829,730,616,565,195,16586,16598
top,red lentil soup,"{'quantity': '12', 'unit': '', 'name': 'eggs'}",Pour the 1/2 cup of water or milk into a small...,0,3g,3g,0g,4,https://cdn.jwplayer.com/v2/media/Ug0PzrYB/thu...,https://www.allrecipes.com/air-fryer-lemon-gar...
freq,2,2,2,88,329,1204,756,3701,4,1


### Change the metrics to gram and mililiters

### Make The Ingredent Column For each Ingredient

### Export to cleaned.csv

In [29]:
# Save the DataFrame to a CSV file
new_df.to_csv('cleaned.csv', index=False)

# Confirm the saving
print("DataFrame has been saved to 'cleaned.csv'")

DataFrame has been saved to 'cleaned.csv'
