In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

In [2]:
df = pd.read_csv('./data/foods_nutrients.csv')
df.head()

Unnamed: 0,식품코드,식품명,식품중량,에너지(kcal),수분(g),단백질(g),지방(g),회분(g),탄수화물(g),당류(g),...,비타민 C(mg),비타민 D(μg),콜레스테롤(mg),포화지방산(g),트랜스지방산(g),비타민 B12(μg),엽산(μg DFE),비타민 E(mg α-TE),과당(g),맥아당(g)
0,G00001,국밥_돼지머리,900.0,137,71.6,6.7,5.16,0.63,15.94,0.16,...,0.04,0.0,23.82,1.47,0.03,0.13,5.3,0.0,0.16,0.0
1,G00002,국밥_순대국밥,900.0,75,83.7,3.17,2.28,0.48,10.38,0.17,...,0.21,0.0,48.69,1.26,0.01,0.0,6.59,0.0,0.17,0.0
2,G00003,국밥_콩나물,780.0,52,86.8,1.45,0.24,0.56,10.93,0.0,...,1.26,0.0,0.0,0.12,0.0,0.0,5.27,0.0,0.0,0.0
3,G00004,기장밥,200.0,166,59.1,3.44,0.57,0.15,36.77,0.0,...,0.29,0.0,0.0,0.08,0.0,0.0,2.52,0.0,0.0,0.0
4,G00005,김밥,230.0,140,69.5,4.84,4.55,1.11,19.98,0.0,...,3.76,0.0,19.3,1.1,0.02,0.87,21.47,0.0,0.0,0.0


# 유사한 음식 상위 10개 추출

In [4]:
import os
os.environ["LOKY_MAX_CPU_COUNT"] = "8"

In [None]:
# 영양소 수치형만 선택 (식품명, 식품코드 제외)
features = df.drop(columns=['식품코드', '식품명'])

# NaN 값 0으로 대체a
features = features.fillna(0)

# 정규화
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# KNN 모델 학습 (n_neighbors = 11 → 자기 자신 + 유사 음식 10개)
knn = NearestNeighbors(n_neighbors=11, metric='euclidean')
knn.fit(scaled_features)

# 함수 수정: 유사 음식 10개 추출
def recommend_similar_foods(food_name, top_k=10):
    idx = df[df['식품명'] == food_name].index[0]
    distances, indices = knn.kneighbors([scaled_features[idx]])
    similar_indices = indices[0][1:top_k+1]  # 자기 자신 제외한 상위 K개
    return df.iloc[similar_indices]


# 실행 예시
recommend_similar_foods("김밥")

Unnamed: 0,식품코드,식품명,식품중량,에너지(kcal),수분(g),단백질(g),지방(g),회분(g),탄수화물(g),당류(g),...,비타민 C(mg),비타민 D(μg),콜레스테롤(mg),포화지방산(g),트랜스지방산(g),비타민 B12(μg),엽산(μg DFE),비타민 E(mg α-TE),과당(g),맥아당(g)
12,G00013,김밥_풋고추,290.0,169,61.9,4.88,4.41,1.32,27.52,0.09,...,2.92,0.0,37.81,1.24,0.03,1.28,21.44,0.0,0.0,0.0
10,G00011,김밥_채소,280.0,158,63.8,4.6,3.65,1.28,26.65,0.14,...,1.81,0.0,29.93,0.99,0.02,1.05,19.05,0.0,0.06,0.0
9,G00010,김밥_참치,250.0,174,64.2,7.0,7.22,1.32,20.26,0.71,...,3.06,0.0,33.45,1.4,0.04,0.58,25.04,0.0,0.01,0.55
5,G00006,김밥_김치,270.0,130,71.2,4.3,4.03,1.29,19.17,0.29,...,3.43,0.0,47.38,1.06,0.02,0.91,36.62,0.0,0.0,0.0
6,G00007,김밥_날치알,260.0,177,59.8,6.1,4.26,1.13,28.66,1.43,...,0.78,0.0,55.15,1.2,0.03,1.49,20.09,0.0,0.23,0.28
27,G00028,비빔밥,450.0,142,68.9,6.86,4.32,1.11,18.84,0.05,...,2.02,0.0,42.26,0.95,0.06,0.42,40.59,0.0,0.05,0.0
464,D00009,얼갈이배추김치,80.0,63,84.8,1.59,1.45,1.23,10.94,0.38,...,0.04,0.0,0.0,0.05,0.0,0.0,18.13,0.0,0.14,0.01
440,J00037,양장피,220.0,133,73.3,7.3,6.63,1.85,10.95,0.44,...,2.28,0.0,65.89,1.21,0.03,1.01,9.55,0.0,0.44,0.0
1597,H00204,잡채_소고기,118.7,112,39.8,2.5,4.18,0.0,16.38,2.93,...,6.42,0.02,6.66,1.21,0.0,0.0,22.63,0.26,0.0,0.0
17,G00018,덮밥_오징어,360.0,135,68.1,7.18,2.01,0.81,21.94,0.66,...,0.0,0.0,46.44,0.24,0.0,0.57,12.03,0.0,0.23,0.0


# 약간의 검색을 활용함.

In [9]:
def recommend_by_keyword(keyword, top_k=10):
    # 1. 키워드를 포함한 음식 검색
    matched = df[df['식품명'].str.contains(keyword, case=False, na=False)]

    if matched.empty:
        return f"'{keyword}'와 일치하는 음식이 없습니다."

    # 2. 평균 벡터 계산
    avg_vector = scaler.transform(matched.drop(columns=['식품코드', '식품명'])).mean(axis=0)

    # 3. 전체 벡터와 거리 계산
    distances, indices = knn.kneighbors([avg_vector])
    similar_indices = indices[0][:top_k]  # top_k개 추출

    return df.iloc[similar_indices]


In [10]:
recommend_by_keyword("김밥", top_k=10)


Unnamed: 0,식품코드,식품명,식품중량,에너지(kcal),수분(g),단백질(g),지방(g),회분(g),탄수화물(g),당류(g),...,비타민 C(mg),비타민 D(μg),콜레스테롤(mg),포화지방산(g),트랜스지방산(g),비타민 B12(μg),엽산(μg DFE),비타민 E(mg α-TE),과당(g),맥아당(g)
5,G00006,김밥_김치,270.0,130,71.2,4.3,4.03,1.29,19.17,0.29,...,3.43,0.0,47.38,1.06,0.02,0.91,36.62,0.0,0.0,0.0
1829,G00189,김밥_소고기,400.0,110,45.6,4.1,2.3,0.0,17.46,1.08,...,2.97,0.1,23.09,0.72,0.0,0.0,16.07,0.22,0.0,0.0
19,G00020,볶음밥,350.0,183,56.9,5.56,2.76,0.84,33.97,0.0,...,4.13,0.0,29.28,1.1,0.03,0.0,13.91,0.0,0.0,0.0
6,G00007,김밥_날치알,260.0,177,59.8,6.1,4.26,1.13,28.66,1.43,...,0.78,0.0,55.15,1.2,0.03,1.49,20.09,0.0,0.23,0.28
505,G00075,비빔밥_열무_양념장,280.0,114,35.7,3.35,1.28,0.0,22.08,1.94,...,1.15,0.06,11.27,0.32,0.0,0.0,22.58,0.0,0.0,0.0
1830,G00190,김밥_참치,400.0,128,47.7,5.3,3.93,0.0,17.49,0.57,...,3.4,0.63,22.86,0.64,0.0,0.0,18.9,0.61,0.0,0.0
22,G00023,볶음밥_새우,370.0,172,64.1,6.31,6.14,0.61,22.85,0.71,...,4.12,0.0,70.72,1.19,0.03,0.24,36.54,0.0,0.15,0.0
1831,G00191,김밥_채소,400.0,105,45.8,3.3,1.96,0.0,18.08,0.59,...,4.22,0.06,19.23,0.51,0.0,0.0,19.02,0.3,0.0,0.0
25,G00026,볶음밥_채소,260.0,182,59.7,4.72,5.39,1.44,28.74,0.29,...,0.76,0.0,46.24,0.84,0.02,0.35,20.64,0.0,0.11,0.0
504,G00074,비빔밥_열무,258.6,144,27.2,3.5,3.07,0.0,24.3,0.34,...,0.45,0.0,4.5,0.9,0.0,0.0,22.89,0.42,0.0,0.0


# 질병을 적용한 추천

In [11]:
neg_diabate = ['과당(g)', '포도당(g)', '당류(g)', '자당(g)', '콜레스테롤(mg)', '에너지(kcal)', '탄수화물(g)', '트랜스지방산(g)']
pos_diabate = ['단백질(g)', '오메가3 지방산(g)', '식이섬유(g)', '아연(mg)', '불포화지방(g)', '마그네슘(mg)']

neg_obesity = ['지방(g)', '당류(g)', '포화지방산(g)', '콜레스테롤(mg)', '불포화지방(g)', '에너지(kcal)', '탄수화물(g)', '트랜스지방산(g)', '카페인(mg)']
pos_obesity = ['에리스리톨(g)', '알룰로오스(g)', '단백질(g)', '오메가3 지방산(g)', '식이섬유(g)']

neg_hypertension = ['카페인(mg)', '나트륨(mg)', '콜레스테롤(mg)', '포화지방산(g)', '트랜스지방산(g)']
pos_hypertension = ['비타민 C(mg)', '오메가3 지방산(g)', '칼륨(mg)', '식이섬유(g)', '불포화지방(g)', '칼슘(mg)', '마그네슘(mg)']

In [13]:
df

Unnamed: 0,식품코드,식품명,식품중량,에너지(kcal),수분(g),단백질(g),지방(g),회분(g),탄수화물(g),당류(g),...,비타민 C(mg),비타민 D(μg),콜레스테롤(mg),포화지방산(g),트랜스지방산(g),비타민 B12(μg),엽산(μg DFE),비타민 E(mg α-TE),과당(g),맥아당(g)
0,G00001,국밥_돼지머리,900.0,137,71.6,6.70,5.16,0.63,15.94,0.16,...,0.04,0.00,23.82,1.47,0.03,0.13,5.30,0.00,0.16,0.0
1,G00002,국밥_순대국밥,900.0,75,83.7,3.17,2.28,0.48,10.38,0.17,...,0.21,0.00,48.69,1.26,0.01,0.00,6.59,0.00,0.17,0.0
2,G00003,국밥_콩나물,780.0,52,86.8,1.45,0.24,0.56,10.93,0.00,...,1.26,0.00,0.00,0.12,0.00,0.00,5.27,0.00,0.00,0.0
3,G00004,기장밥,200.0,166,59.1,3.44,0.57,0.15,36.77,0.00,...,0.29,0.00,0.00,0.08,0.00,0.00,2.52,0.00,0.00,0.0
4,G00005,김밥,230.0,140,69.5,4.84,4.55,1.11,19.98,0.00,...,3.76,0.00,19.30,1.10,0.02,0.87,21.47,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2609,M00007,딸기바나나 스무디,355.0,54,62.5,1.10,0.68,0.00,11.75,9.34,...,19.51,0.02,1.77,0.40,0.00,0.00,22.34,0.12,0.00,0.0
2610,M00008,미숫가루(선식)음료,100.0,128,0.4,2.90,1.10,0.00,27.73,12.67,...,0.00,0.00,0.00,0.22,0.00,0.00,25.40,0.20,0.00,0.0
2611,M00009,수박화채,200.0,42,89.9,1.20,0.87,0.00,7.79,5.78,...,0.20,0.03,2.47,0.56,0.00,0.00,0.51,0.04,0.00,0.0
2612,M00010,율무차,100.0,59,0.6,0.80,0.74,0.00,12.64,8.32,...,0.00,0.00,0.00,0.07,0.00,0.00,0.00,0.14,0.00,0.0


In [14]:
# 대상 컬럼 목록
non_nutrient_cols = ['식품코드', '식품명', '식품중량']
nutrient_cols = [col for col in df.columns if col not in non_nutrient_cols]

# 결측값 0으로 대체
df[nutrient_cols] = df[nutrient_cols].fillna(0)

# 정규화
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df[nutrient_cols]), columns=nutrient_cols)

# 가중치 설정 함수
def build_weight_vector(columns, pos_list, neg_list):
    weights = {col: 0 for col in columns}
    for col in pos_list:
        if col in weights:
            weights[col] = 1
    for col in neg_list:
        if col in weights:
            weights[col] = -1
    return weights

# 점수 계산 함수
def compute_health_score(df_scaled, weights):
    score = pd.Series(0, index=df_scaled.index)
    for col, weight in weights.items():
        if col in df_scaled.columns:
            score += df_scaled[col] * weight
    return score

# 가중치 및 점수 계산
weights = build_weight_vector(df_scaled.columns, pos_diabate, neg_diabate)
scores = compute_health_score(df_scaled, weights)

# 상위 10개 추천
top_k = 10
recommended = df.loc[scores.sort_values(ascending=False).index[:top_k]]

In [17]:
recommended

Unnamed: 0,식품코드,식품명,식품중량,에너지(kcal),수분(g),단백질(g),지방(g),회분(g),탄수화물(g),당류(g),...,비타민 C(mg),비타민 D(μg),콜레스테롤(mg),포화지방산(g),트랜스지방산(g),비타민 B12(μg),엽산(μg DFE),비타민 E(mg α-TE),과당(g),맥아당(g)
2393,V00091,미역튀각,15.3,363,10.6,14.4,30.22,0.0,28.31,3.26,...,11.76,0.0,0.0,4.72,0.0,0.0,295.42,3.31,0.0,0.0
244,B00006,김구이_조미김,5.0,470,2.2,20.4,30.2,18.0,29.0,0.2,...,69.8,0.0,0.0,3.0,0.2,28.0,523.0,0.0,0.0,0.2
411,J00008,김무침,30.0,259,41.6,21.17,10.73,7.1,19.33,5.73,...,11.33,0.0,0.0,1.53,0.07,46.27,561.03,0.0,0.27,0.0
368,R00026,코다리조림,100.0,86,76.9,16.46,0.4,2.08,4.16,3.67,...,6.72,0.0,66.8,0.17,0.02,1.77,27.39,0.0,0.79,2.01
369,R00027,콩조림(콩자반),30.0,281,35.3,17.97,7.73,4.1,34.83,3.53,...,3.67,0.0,0.0,0.67,0.0,0.0,98.63,0.0,0.07,1.3
241,B00003,갈치구이_기름,200.0,138,70.6,21.54,5.48,1.81,0.6,0.0,...,0.96,2.29,87.96,1.31,0.05,3.03,10.81,0.0,0.0,0.0
281,P00013,산적_돼지고기,170.0,200,58.5,29.06,8.56,2.31,1.55,0.06,...,0.29,0.0,56.15,2.41,0.05,0.21,5.16,0.0,0.06,0.0
260,B00022,스테이크_소안심,200.0,183,58.9,34.82,4.83,1.34,0.13,0.0,...,0.0,0.0,80.25,0.22,0.0,3.04,15.28,0.0,0.0,0.0
253,B00015,버섯구이표고버섯,80.0,100,74.3,4.34,1.06,2.13,18.14,1.4,...,0.0,0.0,0.0,0.65,0.04,0.0,35.85,0.0,0.0,1.4
414,J00011,다시마무침,90.0,48,82.0,2.57,1.18,7.42,6.8,0.0,...,0.21,0.0,13.03,0.28,0.0,0.37,13.12,0.0,0.0,0.0
