In [23]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# Load data
data = pd.read_csv("makanan4.csv", sep=";")

In [16]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
import numpy as np

def preprocess_data(filepath):
    # Baca data dari CSV
    data = pd.read_csv(filepath)

    # Drop kolom yang tidak diperlukan
    # Kolom tetap digunakan pada saat pembuatan API
    data = data.drop(['id', 'kode', 'sumber', 'gambar', 'satuan'], axis=1)

    # Ubah nilai yang asalnya ',' menjadi '.'
    numeric_cols = ['air_gram', 'energi_kal', 'protein_gram', 'lemak_gram', 'karbohidrat_gram', 'serat_gram',
                    'kalsium_mg', 'fosfor_mg', 'zatbesi_mg', 'natrium_mg', 'kalium_mg', 'tembaga_mg', 'vitc_mg']

    data[numeric_cols] = data[numeric_cols].replace({',': '.'}, regex=True).astype(float)

    return data, numeric_cols

def scaling(data, numeric_cols):
    scaler = StandardScaler()
    data_normalized = scaler.fit_transform(data[numeric_cols])
    return data_normalized, scaler

def nn_predictor(prep_data):
    neigh = NearestNeighbors(metric='cosine', algorithm='brute')
    neigh.fit(prep_data)
    return neigh

def build_pipeline(neigh, params):
    transformer = FunctionTransformer(neigh.kneighbors, kw_args=params)
    pipeline = Pipeline([('NN', transformer)])
    return pipeline

def extract_data(data, allergy_list):
    filtered_data = data.copy()
    if allergy_list is not None:
        for allergy in allergy_list:
            filtered_data = filtered_data[~filtered_data['nama_bahan'].str.contains(allergy, case=False, na=False)]
            filtered_data = filtered_data[~filtered_data['jenis_pangan'].str.contains(allergy, case=False, na=False)]
    return filtered_data

def apply_pipeline(pipeline, food_indices, extracted_data):
    return extracted_data.iloc[pipeline.transform(food_indices)[0]]

def recommend(data, food_names, numeric_cols, allergy_list=None, params={'n_neighbors': 10, 'return_distance': True}):
    extracted_data = extract_data(data, allergy_list)
    data_normalized, scaler = scaling(extracted_data, numeric_cols)
    neigh = nn_predictor(data_normalized)
    pipeline = build_pipeline(neigh, params)
    
    # Cari indeks makanan yang cocok dengan nama makanan yang diberikan
    food_indices = [data[data['nama_bahan'] == food_name].index[0] for food_name in food_names]
    
    distances, recommended_indices = pipeline.transform(data_normalized[food_indices])
    recommended_foods = extracted_data.iloc[recommended_indices[0]]['nama_bahan'].tolist()
    
    return recommended_foods, distances[0]


# Contoh penggunaan
filepath = "makanan8.csv"
allergy_list = ['susu']  # Ganti dengan alergi yang dimiliki user
food_names = ['alpukat segar']  # Ganti dengan makanan yang dimiliki user

data, numeric_cols = preprocess_data(filepath)
recommendations, distances = recommend(data, food_names, numeric_cols, allergy_list)

print("Rekomendasi makanan:")
for idx, (food, score) in enumerate(zip(recommendations, distances)):
    print(f"{idx+1}. {food} (Distance Score: {score})")


Rekomendasi makanan:
1. alpukat segar (Distance Score: 0.0)
2. tomat muda segar (Distance Score: 0.04009696706143839)
3. baligo segar (Distance Score: 0.04387945652807579)
4. labu air segar (Distance Score: 0.04562444757059858)
5. wortel rebus (Distance Score: 0.05351870932735969)
6. jeruk nipis segar (Distance Score: 0.05421769551528588)
7. Cabai merah, segar (Distance Score: 0.0550044558585433)
8. wortel kukus (Distance Score: 0.05660230833328639)
9. semangka segar (Distance Score: 0.05747077364039577)
10. bengkuang segar (Distance Score: 0.059636190301770586)


In [12]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

def preprocess_data(filepath):
    # Baca data dari CSV
    data = pd.read_csv(filepath)

    # Drop kolom yang tidak diperlukan
    data = data.drop(['id', 'kode', 'sumber', 'gambar'], axis=1)

    # Ubah nilai yang asalnya ',' menjadi '.'
    numeric_cols = ['air_gram', 'energi_kal', 'protein_gram', 'lemak_gram', 'karbohidrat_gram', 'serat_gram',
                    'kalsium_mg', 'fosfor_mg', 'zatbesi_mg', 'natrium_mg', 'kalium_mg', 'tembaga_mg', 'vitc_mg']

    data[numeric_cols] = data[numeric_cols].replace({',': '.'}, regex=True)

    return data, numeric_cols

def scaling(data, numeric_cols):
    scaler = StandardScaler()
    data_normalized = scaler.fit_transform(data[numeric_cols])
    return data_normalized, scaler

def nn_predictor(prep_data):
    neigh = NearestNeighbors(metric='cosine', algorithm='brute')
    neigh.fit(prep_data)
    return neigh

def build_pipeline(neigh, params):
    transformer = FunctionTransformer(neigh.kneighbors, kw_args=params)
    pipeline = Pipeline([('NN', transformer)])
    return pipeline

def extract_data(data, allergy_list):
    filtered_data = data.copy()
    if allergy_list is not None:
        for allergy in allergy_list:
            filtered_data = filtered_data[~filtered_data['nama_bahan'].str.contains(allergy, case=False)]
            filtered_data = filtered_data[~filtered_data['jenis_pangan'].str.contains(allergy, case=False)]
    return filtered_data

def apply_pipeline(pipeline, food_indices, extracted_data):
    return extracted_data.iloc[pipeline.transform(food_indices)[0]]

def recommend(data, food_names, numeric_cols, allergy_list=None, params={'n_neighbors': 11, 'return_distance': True}):
    extracted_data = extract_data(data, allergy_list)
    data_normalized, scaler = scaling(extracted_data, numeric_cols)
    neigh = nn_predictor(data_normalized)
    pipeline = build_pipeline(neigh, params)
    
    # Cari indeks makanan yang cocok dengan nama makanan yang diberikan
    food_indices = [data[data['nama_bahan'] == food_name].index[0] for food_name in food_names]
    
    distances, recommended_indices = pipeline.transform(data_normalized[food_indices])
    recommended_foods = extracted_data.iloc[recommended_indices[0]]['nama_bahan'].tolist()
    
    return recommended_foods, distances[0]


# Contoh penggunaan
filepath = "makanan6.csv"
allergy_list = ['susu']  # Ganti dengan alergi yang dimiliki user
food_names = ['apel segar']  # Ganti dengan makanan yang dimiliki user

data, numeric_cols = preprocess_data(filepath)
recommendations, distances = recommend(data, food_names, numeric_cols, allergy_list)

print("Rekomendasi makanan:")
for idx, (food, score) in enumerate(zip(recommendations, distances)):
    print(f"{idx+1}. {food} (Distance Score: {score})")


Rekomendasi makanan:
1. apel segar (Distance Score: 2.220446049250313e-16)
2. buah pir / pear (Distance Score: 0.013623297389274258)
3. apel malang segar (Distance Score: 0.02400054498041615)
4. terung panjang kukus (Distance Score: 0.0792637987823811)
5. mangga harumanis segar (Distance Score: 0.10840654271227768)
6. duwet segar (Distance Score: 0.11193915313777125)
7. manggis segar (Distance Score: 0.11286731721600163)
8. buah naga putih segar (Distance Score: 0.11663718656015554)
9. nanas palembang segar (Distance Score: 0.11917290374288991)
10. terung bengkulu segar (Distance Score: 0.11929719291674545)
11. jambu air segar (Distance Score: 0.12250635908213015)


In [8]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Baca data dari CSV
data = pd.read_csv("makanan8.csv")

# Drop kolom yang tidak diperlukan
data = data.drop(['id', 'kode', 'sumber', 'gambar', 'satuan'], axis=1)

# Ubah nilai yang asalnya ',' menjadi '.'
numeric_cols = ['air_gram', 'energi_kal', 'protein_gram', 'lemak_gram', 'karbohidrat_gram', 'serat_gram',
                'kalsium_mg', 'fosfor_mg', 'zatbesi_mg', 'natrium_mg', 'kalium_mg', 'tembaga_mg', 'vitc_mg']

data[numeric_cols] = data[numeric_cols].replace({',': '.'}, regex=True)

# Normalisasi data
scaler = StandardScaler()
data_normalized = scaler.fit_transform(data[numeric_cols])

# Hitung similarity matrix (Cosine Similarity)
cosine_sim = cosine_similarity(data_normalized, data_normalized)


# Fungsi untuk mendapatkan rekomendasi makanan
def get_recommendations(food_names, allergy_list):
    # Cari indeks makanan yang cocok dengan nama makanan yang diberikan
    food_indices = [data[data['nama_bahan'] == food_name].index[0] for food_name in food_names]
    
    print("Makanan tersebut berada di indeks ke-", food_indices)
    
    # Filter makanan berdasarkan alergi
    filtered_data = data.copy()
    for allergy in allergy_list:
        filtered_data = filtered_data[~filtered_data['nama_bahan'].str.contains(allergy, case=False)]
        filtered_data = filtered_data[~filtered_data['jenis_pangan'].str.contains(allergy, case=False)]
    
    # Hitung similarity antara makanan yang dimiliki user dengan makanan yang tersedia
    sim_scores = []
    for food_index in food_indices:
        sim_scores.extend(list(enumerate(cosine_sim[food_index])))
    
    # Hitung rata-rata similarity score untuk setiap makanan yang tersedia
    sim_scores_df = pd.DataFrame(sim_scores, columns=['index', 'score'])
    avg_sim_scores = sim_scores_df.groupby('index')['score'].mean().reset_index()
    
    # Urutkan makanan berdasarkan similarity score
    avg_sim_scores = avg_sim_scores.sort_values(by='score', ascending=False)
    
    
    # Ambil 10 makanan dengan similarity score tertinggi (kecuali makanan yang dimiliki user)
    top_similar_food_indices = avg_sim_scores['index'].iloc[0:11].tolist()  # [1:11] karena urutan 0 adalah makanan itu sendiri
    top_similar_food_names = data['nama_bahan'].iloc[top_similar_food_indices].tolist()
    top_similar_food_scores = avg_sim_scores['score'].iloc[1:11].tolist()  # Nilai similarity
    
    return top_similar_food_names, top_similar_food_scores

# Contoh penggunaan
allergy_list = ['susu']  # Ganti dengan alergi yang dimiliki user
food_names = ['apel segar']  # Ganti dengan makanan yang dimiliki user
recommendations, similarity_scores = get_recommendations(food_names, allergy_list)
print("Rekomendasi makanan:")
for idx, (food, score) in enumerate(zip(recommendations, similarity_scores)):
    print(f"{idx+1}. {food} (Similarity Score: {score})")


Makanan tersebut berada di indeks ke- [613]
Rekomendasi makanan:
1. apel segar (Similarity Score: 0.9862510103217272)
2. buah pir / pear (Similarity Score: 0.9757989328132398)
3. apel malang segar (Similarity Score: 0.9199825149140869)
4. terung panjang kukus (Similarity Score: 0.8904701611419836)
5. mangga harumanis segar (Similarity Score: 0.8872752353477718)
6. duwet segar (Similarity Score: 0.8859794515311835)
7. manggis segar (Similarity Score: 0.8821784400315614)
8. buah naga putih segar (Similarity Score: 0.8796076647008712)
9. nanas palembang segar (Similarity Score: 0.8794966664084582)
10. terung bengkulu segar (Similarity Score: 0.8762427434845895)


In [1]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Misalnya kita memiliki data sebenarnya dan prediksi untuk ilustrasi
actual_scores = [4, 3, 5, 2, 1]  # Contoh nilai sebenarnya dari user
predicted_scores = [3.5, 2.8, 4.9, 2.2, 1.1]  # Contoh nilai prediksi dari model

# Menghitung MSE, MAE, dan RMSE
mse = mean_squared_error(actual_scores, predicted_scores)
mae = mean_absolute_error(actual_scores, predicted_scores)
rmse = np.sqrt(mse)

print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")


MSE: 0.07000000000000003
MAE: 0.22000000000000003
RMSE: 0.26457513110645914
