In [None]:
! chmod 600 /content/kaggle.json

In [None]:
! KAGGLE_CONFIG_DIR=/content/ kaggle datasets download -d sinusinu/indonesian-food

Downloading indonesian-food.zip to /content
  0% 0.00/12.1k [00:00<?, ?B/s]
100% 12.1k/12.1k [00:00<00:00, 9.80MB/s]


## Data Understanding

In [None]:
# Import library
import pandas as pd
import numpy as np 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from pathlib import Path
import matplotlib.pyplot as plt

In [None]:
import zipfile
zip_file = zipfile.ZipFile('/content/indonesian-food.zip')
zip_file.extractall('/tmp/')
zip_file.close()

In [None]:
food = pd.read_csv('/tmp/indonesian_food.csv')
food

Unnamed: 0,foodId,Nama,Tipe
0,1,Sosis Bakar,ayam-daging
1,2,Ngohiong Ayam Udang,ayam-daging
2,3,Rawon Ayam,ayam-daging
3,4,Usus Goreng Crispy,ayam-daging
4,5,Ceker Rica Rica,ayam-daging
...,...,...,...
1268,1269,Es Cincau,buah-minuman
1269,1270,Asinan Rambutan,buah-minuman
1270,1271,Asinan Buah,buah-minuman
1271,1272,Sop Buah,buah-minuman


In [None]:
food.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1273 entries, 0 to 1272
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   foodId  1273 non-null   int64 
 1   Nama    1273 non-null   object
 2   Tipe    1273 non-null   object
dtypes: int64(1), object(2)
memory usage: 30.0+ KB


In [None]:
food.head(10)

Unnamed: 0,foodId,Nama,Tipe
0,1,Sosis Bakar,ayam-daging
1,2,Ngohiong Ayam Udang,ayam-daging
2,3,Rawon Ayam,ayam-daging
3,4,Usus Goreng Crispy,ayam-daging
4,5,Ceker Rica Rica,ayam-daging
5,6,Ceker Kecap Pedas,ayam-daging
6,7,Ayam Suwir Kecap,ayam-daging
7,8,Burung Puyuh Bakar,ayam-daging
8,9,Dangkot Ayam,ayam-daging
9,10,Ayam Bumbu Merah,ayam-daging


In [None]:
food.describe()

Unnamed: 0,foodId
count,1273.0
mean,637.0
std,367.627756
min,1.0
25%,319.0
50%,637.0
75%,955.0
max,1273.0


In [None]:
food.isnull().sum()

foodId    0
Nama      0
Tipe      0
dtype: int64

In [None]:
len(food)

1273

In [None]:
# Mengurutkan makanan berdasarkan id kemudian memasukkannya ke dalam variabel fix_food
fix_food = food.sort_values('foodId', ascending=True)
fix_food

Unnamed: 0,foodId,Nama,Tipe
0,1,Sosis Bakar,ayam-daging
1,2,Ngohiong Ayam Udang,ayam-daging
2,3,Rawon Ayam,ayam-daging
3,4,Usus Goreng Crispy,ayam-daging
4,5,Ceker Rica Rica,ayam-daging
...,...,...,...
1268,1269,Es Cincau,buah-minuman
1269,1270,Asinan Rambutan,buah-minuman
1270,1271,Asinan Buah,buah-minuman
1271,1272,Sop Buah,buah-minuman


In [None]:
# Mengecek berapa jumlah fix_food
len(fix_food.foodId.unique())

1273

In [None]:
# Mengonversi data series ‘foodId’ menjadi dalam bentuk list
food_id = fix_food['foodId'].tolist()
 
# Mengonversi data series ‘Nama’ menjadi dalam bentuk list
food_nama = fix_food['Nama'].tolist()
 
# Mengonversi data series ‘Tipe’ menjadi dalam bentuk list
food_tipe = fix_food['Tipe'].tolist()

 
print(len(food_id))
print(len(food_nama))
print(len(food_tipe))

1273
1273
1273


In [None]:
# Membuat dictionary untuk data ‘foodId’, ‘Nama’, ‘tipe’.
food_new = pd.DataFrame({
    'foodId': food_id,
    'name': food_nama,
    'tipe': food_tipe,
})
food_new

Unnamed: 0,foodId,name,tipe
0,1,Sosis Bakar,ayam-daging
1,2,Ngohiong Ayam Udang,ayam-daging
2,3,Rawon Ayam,ayam-daging
3,4,Usus Goreng Crispy,ayam-daging
4,5,Ceker Rica Rica,ayam-daging
...,...,...,...
1268,1269,Es Cincau,buah-minuman
1269,1270,Asinan Rambutan,buah-minuman
1270,1271,Asinan Buah,buah-minuman
1271,1272,Sop Buah,buah-minuman


In [None]:
data = food_new
data.sample(5)

Unnamed: 0,foodId,name,tipe
218,219,Iga Bakar Madu,ayam-daging
154,155,Chicken (Ayam) Teriyaki,ayam-daging
783,784,Sup Kacang Merah Daging Sapi,sop-soto-bakso
196,197,Selat Solo,ayam-daging
586,587,Sambal Petis,sambal


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
 
# Inisialisasi TfidfVectorizer
tf = TfidfVectorizer()
 
# Melakukan perhitungan idf pada data tipe
tf.fit(data['tipe']) 
 
# Mapping array dari fitur index integer ke fitur nama
tf.get_feature_names()



['ayam',
 'bakso',
 'buah',
 'daging',
 'ikan',
 'jajanan',
 'jeli',
 'keripik',
 'kerupuk',
 'kue',
 'mie',
 'minuman',
 'nasi',
 'pasar',
 'pasta',
 'puding',
 'roti',
 'sambal',
 'sayur',
 'seafood',
 'sop',
 'soto',
 'tahu',
 'telur',
 'tempe']

In [None]:
# Melakukan fit lalu ditransformasikan ke bentuk matrix
tfidf_matrix = tf.fit_transform(data['tipe']) 
 
# Melihat ukuran matrix tfidf
tfidf_matrix.shape 

(1273, 25)

In [None]:
# Mengubah vektor tf-idf dalam bentuk matriks dengan fungsi todense()
tfidf_matrix.todense()

matrix([[0.70710678, 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.70710678, 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.70710678, 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.70710678, ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.70710678, ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.70710678, ..., 0.        , 0.        ,
         0.        ]])

In [None]:
# Membuat dataframe untuk melihat tf-idf matrix
# Kolom diisi dengan genre buku
# Baris diisi dengan judul buku
 
pd.DataFrame(
    tfidf_matrix.todense(), 
    columns=tf.get_feature_names(),
    index=data.name
).sample(22, axis=1).sample(10, axis=0)

Unnamed: 0_level_0,sambal,ayam,jeli,kerupuk,minuman,nasi,ikan,roti,sop,telur,...,puding,tahu,jajanan,bakso,mie,buah,pasar,seafood,pasta,kue
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Urap Bali,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Red Velvet,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107
Sate Ikan Tongkol,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0
Nasi Jinggo,0.0,0.707107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Nasi Gemuk,0.0,0.0,0.0,0.0,0.0,0.57735,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.57735,0.0,0.0,0.0,0.57735,0.0
Lasagna Panggang,0.0,0.0,0.0,0.0,0.0,0.57735,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.57735,0.0,0.0,0.0,0.57735,0.0
Soto Tangkar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.57735,0.0,...,0.0,0.0,0.0,0.57735,0.0,0.0,0.0,0.0,0.0,0.0
Kerang Dara Saus Pedas,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0
Nasi Tutug Oncom,0.0,0.0,0.0,0.0,0.0,0.57735,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.57735,0.0,0.0,0.0,0.57735,0.0
Silky Puding,0.0,0.0,0.707107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.707107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
 
# Menghitung cosine similarity pada matrix tf-idf
cosine_sim = cosine_similarity(tfidf_matrix) 
cosine_sim

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.]])

In [None]:
# Membuat dataframe dari variabel cosine_sim dengan baris dan kolom berupa tilte
cosine_sim_df = pd.DataFrame(cosine_sim, index=data['name'], columns=data['name'])
print('Shape:', cosine_sim_df.shape)
 
# Melihat similarity matrix pada setiap judul buku
cosine_sim_df.sample(5, axis=1).sample(10, axis=0)

Shape: (1273, 1273)


name,Mie Godog Jawa (Mie Rebus Jawa),Pastel Isi Ayam,Pilus Cikur,Pepes Udang,Sambal Belut
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bubur Korea (Dakjuk),1.0,0.0,0.0,0.0,0.0
Kue Lapis Beras,0.0,1.0,1.0,0.0,0.0
Semur Telur,0.0,0.0,0.0,0.0,0.0
Balado Kentang,0.0,0.0,0.0,0.0,1.0
Kue Ketan Hitam Kukus,0.0,1.0,1.0,0.0,0.0
Iga Penyet,0.0,0.0,0.0,0.0,0.0
Laksa Bogor,1.0,0.0,0.0,0.0,0.0
Sempol Ikan Tenggiri,0.0,0.0,0.0,0.0,0.0
Karipap (Curry Puff),0.0,1.0,1.0,0.0,0.0
Martabak Telur,0.0,0.0,0.0,0.0,0.0


In [None]:
from os import name
def food_recommendations(name, similarity_data=cosine_sim_df, items=data[['name', 'tipe']], k=20):
    """
    Rekomendasi makanan berdasarkan kemiripan dataframe
 
    Parameter:
    ---
    name : tipe data string (str)
                nama makanan (index kemiripan dataframe)
    similarity_data : tipe data pd.DataFrame (object)
                      Kesamaan dataframe, simetrik, dengan nama sebagai 
                      indeks dan kolom
    items : tipe data pd.DataFrame (object)
            Mengandung kedua nama dan fitur lainnya yang digunakan untuk mendefinisikan kemiripan
    k : tipe data integer (int)
        Banyaknya jumlah rekomendasi yang diberikan
    ---
 
 
    Pada index ini, kita mengambil k dengan nilai similarity terbesar 
    pada index matrix yang diberikan (i).
    """
 
 
    # Mengambil data dengan menggunakan argpartition untuk melakukan partisi secara tidak langsung sepanjang sumbu yang diberikan    
    # Dataframe diubah menjadi numpy
    # Range(start, stop, step)
    index = similarity_data.loc[:,name].to_numpy().argpartition(
        range(-1, -k, -1))
    
    # Mengambil data dengan similarity terbesar dari index yang ada
    closest = similarity_data.columns[index[-1:-(k+2):-1]]
    
    # Drop name agar name yang dicari tidak muncul dalam daftar rekomendasi
    closest = closest.drop(name, errors='ignore')
 
    return pd.DataFrame(closest).merge(items).head(k)

In [None]:
data[data.name.eq('Peanut Butter Cookies')]

Unnamed: 0,foodId,name,tipe
832,833,Peanut Butter Cookies,kue-roti


In [None]:
food_recommendations('Semur Telur')

Unnamed: 0,name,tipe
0,Papeda Gulung,tahu-tempe-telur
1,Omelet Telur,tahu-tempe-telur
2,Kupat Tahu Magelang,tahu-tempe-telur
3,Tahu Teriyaki,tahu-tempe-telur
4,Pepes Telur Asin,tahu-tempe-telur
5,Tahu Jeletot,tahu-tempe-telur
6,Egg Roll Korea,tahu-tempe-telur
7,Chawanmushi,tahu-tempe-telur
8,Mun Tahu,tahu-tempe-telur
9,Tahu Lontong,tahu-tempe-telur
