In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
data = pd.read_csv('https://raw.githubusercontent.com/rfajri27/prototype-text-classifier-app/refs/heads/main/resto.csv')

In [4]:
data.head(2)

Unnamed: 0,id,resto_name,cuisine
0,132560,puesto de gorditas,Regional
1,132572,Cafe Chaires,Cafeteria


In [6]:
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(data["cuisine"])
tfidf_matrix.shape

(95, 22)

In [7]:
tfidf_matrix.todense()

matrix([[0., 0., 0., ..., 1., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [8]:
tfidf.get_feature_names_out()

array(['american', 'armenian', 'bar', 'bar_pub_brewery', 'breakfast',
       'brunch', 'burgers', 'cafe', 'cafeteria', 'chinese', 'coffee_shop',
       'contemporary', 'family', 'fast_food', 'international', 'italian',
       'japanese', 'mexican', 'pizzeria', 'regional', 'seafood',
       'vietnamese'], dtype=object)

In [9]:
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
cosine_sim

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 1., 0.],
       [0., 0., 0., ..., 1., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [10]:
cosine_sim_df = pd.DataFrame(cosine_sim, index=data['resto_name'], columns=data['resto_name'])

In [12]:
cosine_sim_df.head(2)

resto_name,puesto de gorditas,Cafe Chaires,McDonalds Centro,Gorditas Dona Tota,tacos de barbacoa enfrente del Tec,Hamburguesas La perica,Pollo_Frito_Buenos_Aires,carnitas_mata,la perica hamburguesa,palomo tec,...,Restaurante Bar El Gallinero,Restaurante la Parroquia Potosina,Mariscos El Pescador,Koye Sushi,Tortas Locas Hipocampo,Mcdonalds Parque Tangamanga,Cafeteria cenidet,vips,El Rincón de San Francisco,Paniroles
resto_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
puesto de gorditas,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Cafe Chaires,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [13]:
def resto_recommendations(nama_resto, similarity_data=cosine_sim_df, items=data[['resto_name', 'cuisine']], k=5):
    """
    Rekomendasi Resto berdasarkan kemiripan dataframe

    Parameter:
    ---
    nama_resto : tipe data string (str)
                Nama Restoran (index kemiripan dataframe)
    similarity_data : tipe data pd.DataFrame (object)
                      Kesamaan dataframe, simetrik, dengan resto sebagai
                      indeks dan kolom
    items : tipe data pd.DataFrame (object)
            Mengandung kedua nama dan fitur lainnya yang digunakan untuk mendefinisikan kemiripan
    k : tipe data integer (int)
        Banyaknya jumlah rekomendasi yang diberikan
    ---


    Pada index ini, kita mengambil k dengan nilai similarity terbesar
    pada index matrix yang diberikan (i).
    """

    index = similarity_data.loc[:,nama_resto].to_numpy().argpartition(
        range(-1, -k, -1))

    closest = similarity_data.columns[index[-1:-(k+2):-1]]
    closest = closest.drop(nama_resto, errors='ignore')

    return pd.DataFrame(closest).merge(items).head(k)

In [14]:
data[data.resto_name.eq("McDonalds Centro")]

Unnamed: 0,id,resto_name,cuisine
2,132583,McDonalds Centro,American


In [15]:
resto_recommendations('KFC', k=10)

Unnamed: 0,resto_name,cuisine
0,VIPS,American
1,McDonalds Centro,American
2,tacos los volcanes,American
3,Pizzeria Julios,American
4,Sirlone,International
5,Unicols Pizza,Italian
6,Hamburguesas Valle Dorado,Burgers
7,Gorditas Doa Gloria,Mexican
8,puesto de tacos,Mexican
9,Cenaduria El RincÃ³n de Tlaquepaque,Mexican
