# NUSANTAMART
 *(Nusantara market) Website UMKM dengan Sistem Rekomendasi*

## Data Understanding

In [1]:
# Import library
import pandas as pd
import numpy as np 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from pathlib import Path
import pickle

Data Loading

In [2]:
food = pd.read_csv('indonesian_food.csv')
food

Unnamed: 0,foodId,Nama,Tipe
0,1,Sosis Bakar,ayam-daging
1,2,Ngohiong Ayam Udang,ayam-daging
2,3,Rawon Ayam,ayam-daging
3,4,Usus Goreng Crispy,ayam-daging
4,5,Ceker Rica Rica,ayam-daging
...,...,...,...
1268,1269,Es Cincau,buah-minuman
1269,1270,Asinan Rambutan,buah-minuman
1270,1271,Asinan Buah,buah-minuman
1271,1272,Sop Buah,buah-minuman


##**Data Preprocessing**

melihat informasi pada data

In [3]:
food.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1273 entries, 0 to 1272
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   foodId  1273 non-null   int64 
 1   Nama    1273 non-null   object
 2   Tipe    1273 non-null   object
dtypes: int64(1), object(2)
memory usage: 30.0+ KB


In [4]:
food.head()

Unnamed: 0,foodId,Nama,Tipe
0,1,Sosis Bakar,ayam-daging
1,2,Ngohiong Ayam Udang,ayam-daging
2,3,Rawon Ayam,ayam-daging
3,4,Usus Goreng Crispy,ayam-daging
4,5,Ceker Rica Rica,ayam-daging


In [5]:
food.describe()

Unnamed: 0,foodId
count,1273.0
mean,637.0
std,367.627756
min,1.0
25%,319.0
50%,637.0
75%,955.0
max,1273.0


##**Data Preparation**

Memeriksa Missing Value Pada dataset

In [6]:
food.isnull().sum()

foodId    0
Nama      0
Tipe      0
dtype: int64

In [7]:
len(food)

1273

In [8]:
# Mengurutkan makanan berdasarkan id kemudian memasukkannya ke dalam variabel fix_food
fix_food = food.sort_values('foodId', ascending=True)
fix_food

Unnamed: 0,foodId,Nama,Tipe
0,1,Sosis Bakar,ayam-daging
1,2,Ngohiong Ayam Udang,ayam-daging
2,3,Rawon Ayam,ayam-daging
3,4,Usus Goreng Crispy,ayam-daging
4,5,Ceker Rica Rica,ayam-daging
...,...,...,...
1268,1269,Es Cincau,buah-minuman
1269,1270,Asinan Rambutan,buah-minuman
1270,1271,Asinan Buah,buah-minuman
1271,1272,Sop Buah,buah-minuman


In [9]:
# Mengecek berapa jumlah fix_food
len(fix_food.foodId.unique())

1273

In [10]:
# Mengonversi data series ‘foodId’ menjadi dalam bentuk list
food_id = fix_food['foodId'].tolist()
 
# Mengonversi data series ‘Nama’ menjadi dalam bentuk list
food_nama = fix_food['Nama'].tolist()
 
# Mengonversi data series ‘Tipe’ menjadi dalam bentuk list
food_tipe = fix_food['Tipe'].tolist()

 
print(len(food_id))
print(len(food_nama))
print(len(food_tipe))

1273
1273
1273


In [11]:
# Membuat dictionary untuk data ‘foodId’, ‘Nama’, ‘tipe’.
food_new = pd.DataFrame({
    'foodId': food_id,
    'name': food_nama,
    'tipe': food_tipe,
})
food_new

Unnamed: 0,foodId,name,tipe
0,1,Sosis Bakar,ayam-daging
1,2,Ngohiong Ayam Udang,ayam-daging
2,3,Rawon Ayam,ayam-daging
3,4,Usus Goreng Crispy,ayam-daging
4,5,Ceker Rica Rica,ayam-daging
...,...,...,...
1268,1269,Es Cincau,buah-minuman
1269,1270,Asinan Rambutan,buah-minuman
1270,1271,Asinan Buah,buah-minuman
1271,1272,Sop Buah,buah-minuman


In [12]:
data = food_new
data.sample(5)

Unnamed: 0,foodId,name,tipe
796,797,Sop Sayur,sop-soto-bakso
1205,1206,Keripik Bayam,keripik-kerupuk
193,194,Ayam Serundeng,ayam-daging
688,689,Ayam Hainan,nasi-mie-pasta
983,984,Kue Ku,kue-roti


TF-IDF Vectorizer

In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer
 
# Inisialisasi TfidfVectorizer
tf = TfidfVectorizer()
 
# Melakukan perhitungan idf pada data tipe
tf.fit(data['tipe']) 
 
# Mapping array dari fitur index integer ke fitur nama
tf.get_feature_names()



['ayam',
 'bakso',
 'buah',
 'daging',
 'ikan',
 'jajanan',
 'jeli',
 'keripik',
 'kerupuk',
 'kue',
 'mie',
 'minuman',
 'nasi',
 'pasar',
 'pasta',
 'puding',
 'roti',
 'sambal',
 'sayur',
 'seafood',
 'sop',
 'soto',
 'tahu',
 'telur',
 'tempe']

In [14]:
# Melakukan fit lalu ditransformasikan ke bentuk matrix
tfidf_matrix = tf.fit_transform(data['tipe']) 
 
# Melihat ukuran matrix tfidf
tfidf_matrix.shape 

(1273, 25)

In [15]:
# Mengubah vektor tf-idf dalam bentuk matriks dengan fungsi todense()
tfidf_matrix.todense()

matrix([[0.70710678, 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.70710678, 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.70710678, 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.70710678, ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.70710678, ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.70710678, ..., 0.        , 0.        ,
         0.        ]])

In [16]:
# Membuat dataframe untuk melihat tf-idf matrix
# Kolom diisi dengan genre buku
# Baris diisi dengan judul buku
 
pd.DataFrame(
    tfidf_matrix.todense(), 
    columns=tf.get_feature_names(),
    index=data.name
).sample(22, axis=1).sample(10, axis=0)

Unnamed: 0_level_0,kerupuk,keripik,sambal,seafood,pasta,soto,ikan,mie,telur,ayam,...,jeli,roti,sop,bakso,sayur,kue,tahu,daging,minuman,buah
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Keripik Singkong,0.707107,0.707107,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Mie Goreng Jawa,0.0,0.0,0.0,0.0,0.57735,0.0,0.0,0.57735,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Teh Susu Telur (Talua) Khas Medan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.707107
Sup Merah,0.0,0.0,0.0,0.0,0.0,0.57735,0.0,0.0,0.0,0.0,...,0.0,0.0,0.57735,0.57735,0.0,0.0,0.0,0.0,0.0,0.0
Sambal Soto,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Lele Goreng Kremes,0.0,0.0,0.0,0.707107,0.0,0.0,0.707107,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Sambal Pencok Kacang Panjang,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bakso Keju,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.0,0.0
Es Gempol Pleret,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707107,0.707107
Pempek Kulit,0.0,0.0,0.0,0.707107,0.0,0.0,0.707107,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
from sklearn.metrics.pairwise import cosine_similarity
 
# Menghitung cosine similarity pada matrix tf-idf
cosine_sim = cosine_similarity(tfidf_matrix) 
cosine_sim

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.],
       [0., 0., 0., ..., 1., 1., 1.]])

In [18]:
# Membuat dataframe dari variabel cosine_sim dengan baris dan kolom berupa tilte
cosine_sim_df = pd.DataFrame(cosine_sim, index=data['name'], columns=data['name'])
print('Shape:', cosine_sim_df.shape)

# Melihat similarity matrix pada setiap judul buku
cosine_sim_df.sample(5, axis=1).sample(10, axis=0)

Shape: (1273, 1273)


name,Tumis Kacang Panjang,Lasagna Panggang,Nasi Telur Pontianak,Ayam Panggang Pecel,Sandwich Homemade
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Es Krim Strawberry,0.0,0.0,0.0,0.0,0.0
Tape Ketan,0.0,0.0,0.0,0.0,0.0
Pizza Seafood,0.0,0.0,0.0,0.0,1.0
Lupis,0.0,0.0,0.0,0.0,0.0
Trancam,1.0,0.0,0.0,0.0,0.0
Arsik Ikan Mas,0.0,0.0,0.0,0.0,0.0
Soto Ayam Kuning Lamongan,0.0,0.0,0.0,0.0,0.0
Tahu Gimbal Udang,0.0,0.0,1.0,0.0,0.0
Udang Bakar Madu,0.0,0.0,0.0,0.0,0.0
Burgo,1.0,0.0,0.0,0.0,0.0


In [24]:
from os import name
def food_recommendations(name, imilarity_data=cosine_sim_df, items=data[['name', 'tipe']], k=20):
    """
    Rekomendasi makanan berdasarkan kemiripan dataframe
 
    Parameter:
    ---
    name : tipe data string (str)
                nama makanan (index kemiripan dataframe)
    similarity_data : tipe data pd.DataFrame (object)
                      Kesamaan dataframe, simetrik, dengan nama sebagai 
                      indeks dan kolom
    items : tipe data pd.DataFrame (object)
            Mengandung kedua nama dan fitur lainnya yang digunakan untuk mendefinisikan kemiripan
    k : tipe data integer (int)
        Banyaknya jumlah rekomendasi yang diberikan
    ---
 
 
    Pada index ini, kita mengambil k dengan nilai similarity terbesar 
    pada index matrix yang diberikan (i).
    """
 
 
    # Mengambil data dengan menggunakan argpartition untuk melakukan partisi secara tidak langsung sepanjang sumbu yang diberikan    
    # Dataframe diubah menjadi numpy
    # Range(start, stop, step)
    index = similarity_data.loc[:,name].to_numpy().argpartition(
        range(-1, -k, -1))
    
    # Mengambil data dengan similarity terbesar dari index yang ada
    closest = similarity_data.columns[index[-1:-(k+2):-1]]
    
    # Drop name agar name yang dicari tidak muncul dalam daftar rekomendasi
    closest = closest.drop(name, errors='ignore')
 
    return pd.DataFrame(closest).merge(items).head(k)

In [None]:
data[data.name.eq('Kue Ketan Hitam Kukus')]

In [None]:
food_recommendations('Kue Ketan Hitam Kukus')