In [3]:
import pandas as pd
import numpy as np

In [4]:
recipes = pd.read_csv('recipes.csv')
recipes.head()


Unnamed: 0,name,rating,tagline,prep_time,cook_time,total_time,servings,cuisine,category,sub_category
0,Vegetable Biryani,4.5,A very tasty and interesting biryani; the colo...,35 mins,30 mins,1 hr 5 mins,8,indian,main-dishes,rice
1,Our 12 Best Biryani Recipes,0.0,,0,0,0,1,indian,main-dishes,rice
2,Curried Pork Chops and Cauliflower with Basmat...,4.4,Pork chops are flavored with curry and apple c...,20 mins,40 mins,1 hr,4,indian,main-dishes,rice
3,Slow-Cooked Chicken Biryani,4.0,This slow cooker biryani is a variation of two...,20 mins,2 hrs 10 mins,30 mins,3 hrs,indian,main-dishes,rice
4,Shyam's Goat Biryani,4.5,This is an excellent homemade biryani recipe!,15 mins,1 hr 55 mins,9 hrs,11 hrs 10 mins,indian,main-dishes,rice


In [5]:
#Check for empty or null values
recipes.isna().sum()

name             0
rating           0
tagline         19
prep_time        0
cook_time        0
total_time       0
servings         0
cuisine          0
category         0
sub_category     0
dtype: int64

In [6]:
#Recipes without prep time are not really recipes 
recipes[recipes['prep_time'] == '0']

Unnamed: 0,name,rating,tagline,prep_time,cook_time,total_time,servings,cuisine,category,sub_category
1,Our 12 Best Biryani Recipes,0.0,,0,0,0,1,indian,main-dishes,rice
143,8 Favorite Indian Butter Chicken Recipes,0.0,,0,0,0,1,indian,main-dishes,chicken
180,7 Chicken Biryani Recipes That Are Full of Flavor,0.0,,0,0,0,1,indian,main-dishes,chicken
221,9 Sweet Recipes to Complete Your Indian Dinner,0.0,,0,0,0,1,asian,indian,desserts
238,"10 Easy, Healthy Indian Side Dishes to Complet...",0.0,,0,0,0,1,asian,indian,side-dishes
297,10 Rich and Flavorful Lassi Recipes,0.0,,0,0,0,1,asian,indian,drinks
310,How to Make Homemade Sushi,0.0,Roll call!,0,0,0,1,asian,japanese,main-dishes
314,15 Unbeatable Udon Noodle Recipes,0.0,,0,0,0,1,asian,japanese,main-dishes
329,20 Easy Japanese Recipes for Quick Weeknight D...,0.0,,0,0,0,1,asian,japanese,main-dishes
355,12 Japanese Beef Recipes for Simple Comforting...,0.0,,0,0,0,1,asian,japanese,main-dishes


In [7]:
#drop these from recipes
drop_index = recipes[recipes['prep_time'] == '0'].index
cleaned_recipes = recipes.drop(drop_index)
cleaned_recipes.reset_index()
cleaned_recipes.head()

Unnamed: 0,name,rating,tagline,prep_time,cook_time,total_time,servings,cuisine,category,sub_category
0,Vegetable Biryani,4.5,A very tasty and interesting biryani; the colo...,35 mins,30 mins,1 hr 5 mins,8,indian,main-dishes,rice
2,Curried Pork Chops and Cauliflower with Basmat...,4.4,Pork chops are flavored with curry and apple c...,20 mins,40 mins,1 hr,4,indian,main-dishes,rice
3,Slow-Cooked Chicken Biryani,4.0,This slow cooker biryani is a variation of two...,20 mins,2 hrs 10 mins,30 mins,3 hrs,indian,main-dishes,rice
4,Shyam's Goat Biryani,4.5,This is an excellent homemade biryani recipe!,15 mins,1 hr 55 mins,9 hrs,11 hrs 10 mins,indian,main-dishes,rice
5,Bombay Chicken and Rice,4.1,Chicken parts are brushed with a butter and cu...,20 mins,1 hr,1 hr 20 mins,6,indian,main-dishes,rice


In [8]:
cleaned_recipes.isna().sum()

name            0
rating          0
tagline         0
prep_time       0
cook_time       0
total_time      0
servings        0
cuisine         0
category        0
sub_category    0
dtype: int64

In [9]:
def convert_time_to_mins(time: str):
    split = time.split()

    if len(split) < 2: #Just a number , return it
        return int(split[0])

    elif len(split) == 2: #Either hours or mins
        if split[1] == 'mins':
            return int(split[0])  # Return minutes as it is
        else:
            return int(split[0])*60 #Convert hours to minutes
    
    else: # Both hours and mins
        if split[1] !='hr':
            return np.nan #Invalid time
        else:
            return int(split[0])*60 + int(split[2])



        

In [10]:
cleaned_recipes['prep_time'] = cleaned_recipes['prep_time'].apply(convert_time_to_mins)

In [11]:
cleaned_recipes['cook_time'].apply(lambda x:len(x.split())).value_counts()

cook_time
2    1017
4     152
6       1
5       1
8       1
Name: count, dtype: int64

In [12]:
cleaned_recipes['cook_time'] = cleaned_recipes['cook_time'].apply(convert_time_to_mins)
print(cleaned_recipes['cook_time'].isna().sum(),' invalid cooking times')

48  invalid cooking times


In [13]:
#Fill invalid cooking times with their prep times
cleaned_recipes['cook_time'] = cleaned_recipes['cook_time'].fillna(cleaned_recipes['prep_time']).astype(int)

In [14]:
def append_categories(row):
    cat = row['category']
    sub_cat = row['sub_category']
    if cat == 'main-dishes' or cat == 'side-dishes':
        cat = cat[:-2]
    if sub_cat == 'main-dishes' or sub_cat == 'side-dishes':
        sub_cat = sub_cat[:-2]
    
    return [cat,sub_cat]

In [15]:
cleaned_recipes['categories'] = cleaned_recipes.apply(append_categories,axis=1)

In [16]:
#Check unique cuisines
cleaned_recipes['cuisine'].value_counts()

cuisine
european     414
italian      264
asian        196
indian       184
main-dish     57
250           57
Name: count, dtype: int64

In [17]:
#main-dish represents italian lasagna and 250 represents italian pizza

cleaned_recipes['cuisine'] = cleaned_recipes['cuisine'].apply(lambda x: x.replace('main-dish','italian').replace('250','italian'))

In [18]:
#Check ratings
cleaned_recipes['rating'].describe()

count    1172.000000
mean        4.423891
std         0.779066
min         0.000000
25%         4.400000
50%         4.600000
75%         4.700000
max         5.000000
Name: rating, dtype: float64

In [23]:
#Check duplicate recipes
cleaned_recipes[cleaned_recipes['name'].duplicated()]

Unnamed: 0,name,rating,tagline,prep_time,cook_time,total_time,servings,cuisine,category,sub_category,categories
61,Curried Pork Chops and Cauliflower with Basmat...,4.4,Pork chops are flavored with curry and apple c...,20,40,1 hr,4,indian,main-dishes,curry,"[main-dish, curry]"
69,Vegan Sweet Potato Chickpea Curry,4.8,This sweet potato chickpea curry is a yummy ve...,10,20,30 mins,6,indian,main-dishes,vegetarian,"[main-dish, vegetarian]"
70,Vegetarian Korma,4.6,This vegetable,25,30,55 mins,4,indian,main-dishes,vegetarian,"[main-dish, vegetarian]"
73,Spicy Vegan Potato Curry,4.5,This vegan potato curry has abundant spices th...,30,30,1 hr,6,indian,main-dishes,vegetarian,"[main-dish, vegetarian]"
75,Chickpea Curry,4.3,We usually recommend preparing the beans at ho...,10,30,40 mins,8,indian,main-dishes,vegetarian,"[main-dish, vegetarian]"
...,...,...,...,...,...,...,...,...,...,...,...
1135,"Roasted Lettuce, Radicchio, and Endive",5.0,A Sicilian grandmother taught me to make roast...,20,10,30 mins,6,european,italian,salads,"[italian, salads]"
1145,Insalata Caprese I,4.7,This tastes like the salad that is served at a...,15,60,1 hr 15 mins,4,european,italian,salads,"[italian, salads]"
1193,Naan Bread Margherita Pizza with Prosciutto,4.8,This naan bread pizza recipe is one of my favo...,15,10,25 mins,2,italian,main-dish,pizza,"[main-dish, pizza]"
1204,Sicilian Christmas Pizza (Sfincione),4.9,Topping a thick-crust pizza with bread crumbs ...,25,65,1 hr 30 mins,3 hrs,italian,main-dish,pizza,"[main-dish, pizza]"


In [35]:
#Drop duplicate recipes
duplicate_index = cleaned_recipes[cleaned_recipes['name'].duplicated()].index
cleaned_recipes = cleaned_recipes.drop(duplicate_index)

In [54]:
#Keep only required columns
final_cleaned_recipes = cleaned_recipes[['name','rating','tagline','prep_time','cook_time','cuisine']]
final_cleaned_recipes = final_cleaned_recipes.reset_index(drop=True)
final_cleaned_recipes

Unnamed: 0,name,rating,tagline,prep_time,cook_time,cuisine
0,Vegetable Biryani,4.5,A very tasty and interesting biryani; the colo...,35,30,indian
1,Curried Pork Chops and Cauliflower with Basmat...,4.4,Pork chops are flavored with curry and apple c...,20,40,indian
2,Slow-Cooked Chicken Biryani,4.0,This slow cooker biryani is a variation of two...,20,20,indian
3,Shyam's Goat Biryani,4.5,This is an excellent homemade biryani recipe!,15,115,indian
4,Bombay Chicken and Rice,4.1,Chicken parts are brushed with a butter and cu...,20,60,indian
...,...,...,...,...,...,...
1036,Crab Rangoon Pizza,4.8,True crab rangoon flavor on a pizza. Easily ma...,10,17,italian
1037,Taco Pizza,4.5,I was looking for a recipe for a taco style pi...,120,20,italian
1038,Pepperoni Meatza,4.5,"For people that are gluten-intolerant, low-car...",30,15,italian
1039,Pizza Pinwheels,4.4,This recipe came from my Aunt when I was about...,20,12,italian


In [55]:
final_cleaned_recipes.to_csv('cleaned_recipes.csv',index=False)

In [50]:
def clean_category(cat : str):
    cat = cat.replace('-',' ')
    cat = cat.split()
    cat = [c[0].upper()+c[1:] for c in cat]
    
    return ' '.join(cat)

In [51]:
import csv
with open('recipe_categories.csv','w') as f:
    writer = csv.writer(f)
    for idx,row in cleaned_recipes.iterrows():
        for cat in row['categories']:
            
            writer.writerow((row['name'],clean_category(cat)))

In [45]:
for idx,row in cleaned_recipes.iterrows():
    print(row['categories'])
    break

['main-dish', 'rice']
