# Library

In [None]:
import numpy as np
import pandas as pd
import re

# EDA

In [None]:
data = pd.read_csv('/content/Indonesian_Food_Recipes.csv')

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14945 entries, 0 to 14944
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Title                14945 non-null  object
 1   Ingredients          14945 non-null  object
 2   Steps                14945 non-null  object
 3   Loves                14945 non-null  int64 
 4   URL                  14945 non-null  object
 5   Category             14945 non-null  object
 6   Title Cleaned        14925 non-null  object
 7   Total Ingredients    14945 non-null  int64 
 8   Ingredients Cleaned  14945 non-null  object
 9   Total Steps          14945 non-null  int64 
dtypes: int64(3), object(7)
memory usage: 1.1+ MB


In [None]:
data.isna().sum()

Unnamed: 0,0
Title,0
Ingredients,0
Steps,0
Loves,0
URL,0
Category,0
Title Cleaned,20
Total Ingredients,0
Ingredients Cleaned,0
Total Steps,0


In [None]:
print('jumlah duplikat', data.duplicated('Title Cleaned').sum())
print('jumlah duplikat', data.duplicated('Ingredients Cleaned').sum())

jumlah duplikat 3610
jumlah duplikat 9


# Data preparation

In [None]:
data.dropna(inplace=True)
print('jumlah missing value : ', data.isna().sum())

jumlah missing value :  Title                  0
Ingredients            0
Steps                  0
Loves                  0
URL                    0
Category               0
Title Cleaned          0
Total Ingredients      0
Ingredients Cleaned    0
Total Steps            0
dtype: int64


In [None]:
data.drop_duplicates(subset='Title Cleaned', inplace=True)
data.drop_duplicates(subset='Ingredients Cleaned', inplace=True)

print('jumlah duplikat', data.duplicated(['Title Cleaned', 'Ingredients Cleaned']).sum())

jumlah duplikat 0


In [None]:
data.head()

Unnamed: 0,Title,Ingredients,Steps,Loves,URL,Category,Title Cleaned,Total Ingredients,Ingredients Cleaned,Total Steps
0,Ayam Woku Manado,1 Ekor Ayam Kampung (potong 12)--2 Buah Jeruk ...,1) Cuci bersih ayam dan tiriskan. Lalu peras j...,1,https://cookpad.com/id/resep/4473027-ayam-woku...,ayam,ayam woku manado,14,"ayam kampung potong , jeruk nipis , garam , ku...",7
1,Ayam goreng tulang lunak,1 kg ayam (dipotong sesuai selera jangan kecil...,"1) Haluskan bumbu2nya (BaPut, ketumbar, kemiri...",1,https://cookpad.com/id/resep/4471956-ayam-gore...,ayam,ayam goreng tulang lunak,11,"ayam dipotong , serai , daun jeruk , bawang pu...",5
2,Ayam cabai kawin,1/4 kg ayam--3 buah cabai hijau besar--7 buah ...,1) Panaskan minyak di dalam wajan. Setelah min...,2,https://cookpad.com/id/resep/4473057-ayam-caba...,ayam,ayam cabai kawin,10,"ayam , cabai hijau , cabai merah rawit , bawan...",3
3,Ayam Geprek,250 gr daging ayam (saya pakai fillet)--Secuku...,1) Goreng ayam seperti ayam krispi\n2) Ulek se...,10,https://cookpad.com/id/resep/4473023-ayam-geprek,ayam,ayam geprek,7,"daging ayam fillet , gula garam , tepung ayam ...",3
4,Minyak Ayam,400 gr kulit ayam & lemaknya--8 siung bawang p...,1) Cuci bersih kulit ayam. Sisihkan\n2) Ambil ...,4,https://cookpad.com/id/resep/4427438-minyak-ayam,ayam,minyak ayam,5,"kulit ayam & lemaknya , bawang putih , cincang...",6


In [None]:
data = data.drop(columns=['Title',	'Ingredients', 'URL', 'Total Ingredients', 'Total Steps' ])


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11334 entries, 0 to 14943
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Steps                11334 non-null  object
 1   Loves                11334 non-null  int64 
 2   Category             11334 non-null  object
 3   Title Cleaned        11334 non-null  object
 4   Ingredients Cleaned  11334 non-null  object
dtypes: int64(1), object(4)
memory usage: 531.3+ KB


In [None]:
data = data.rename(columns={'Title Cleaned': 'Title', 'Ingredients Cleaned': 'Ingredients'})
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11334 entries, 0 to 14943
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Steps        11334 non-null  object
 1   Loves        11334 non-null  int64 
 2   Category     11334 non-null  object
 3   Title        11334 non-null  object
 4   Ingredients  11334 non-null  object
dtypes: int64(1), object(4)
memory usage: 531.3+ KB


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(data['Ingredients'])

In [None]:
def recommend_recipes(user_input, top_n=5):
    user_input = user_input.lower()

    user_vec = tfidf.transform([user_input])

    similarity_scores = cosine_similarity(user_vec, tfidf_matrix).flatten()

    top_indices = similarity_scores.argsort()[::-1][:top_n]

    recommendations = data.iloc[top_indices][['Title', 'Ingredients', 'Steps']].copy()
    recommendations['Similarity'] = similarity_scores[top_indices]

    return recommendations


In [None]:
recommend_recipes('ayam, cabai, bawang', top_n=5)

Unnamed: 0,Title,Ingredients,Steps,Similarity
10862,telur sambel korek,"telur ayam , bawang putih , cabai rawit , gula...","1) Haluskan cabai, bawang putih, gula dan gara...",0.712582
9755,telur dadar hijau,"telur ayam , daun cabai muda , cabai merah , c...","1) Ulek halus cabai, bawang merah dan jahe\n2)...",0.709089
2,ayam cabai kawin,"ayam , cabai hijau , cabai merah rawit , bawan...",1) Panaskan minyak di dalam wajan. Setelah min...,0.651378
10300,telur ceplok sambal matah,"telur ayam , bawang merah , bawang putih , cab...","1) Iris semua bumbu sambal matah nya ya, tamba...",0.633811
13197,tumis buncis wortel udang,"udang , buncis , wortel , bawang putih , bawan...",1) Cuci semua bahan Iris bawang dan cabai sisi...,0.594116


In [None]:
import joblib

joblib.dump(tfidf, 'tfidf_vectorizer.pkl')

joblib.dump(tfidf_matrix, 'tfidf_matrix.pkl')

data.to_csv('data_clean.csv', index=False)