# 1. 데이터 준비

### 1) 라이브러리 호출

In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.neighbors import NearestNeighbors

### 2) 데이터 불러오기

In [3]:
df = pd.read_csv('data/nutrition_data.csv',encoding='euc-kr')

### 3) EDA

In [4]:
df.head()

Unnamed: 0,num,food_name,standard_amount,kcal,protein,fat,carb,sugar,sodium
0,1,월남쌈,100g,118.0,5.94,4.97,12.25,5.5,230.0
1,2,와플,100g,292.4,4.52,3.37,40.35,15.12,214.8
2,3,토르티야,100g,218.0,5.7,4.7,43.4,0.8,417.0
3,4,타코,100g,246.0,15.31,13.56,15.62,0.33,367.0
4,5,티라미수,100g,353.0,5.65,23.86,29.44,18.7,173.0


In [5]:
df.describe()

Unnamed: 0,num,kcal,protein,fat,carb,sugar,sodium
count,93.0,93.0,93.0,93.0,93.0,92.0,93.0
mean,47.0,224.906237,8.179785,9.134409,25.115161,6.485435,327.368172
std,26.990739,123.48771,5.897311,7.415152,20.413844,9.707216,240.2812
min,1.0,2.21,0.0,0.0,0.0,0.0,0.19
25%,24.0,126.45,4.52,3.39,7.82,0.7175,185.0
50%,47.0,218.0,7.1,6.43,22.02,2.46,302.0
75%,70.0,300.5,9.63,14.0,39.75,5.535,409.0
max,93.0,535.0,32.52,29.66,82.2,51.5,1480.0


In [7]:
df

Unnamed: 0,num,food_name,standard_amount,kcal,protein,fat,carb,sugar,sodium
0,1,월남쌈,100g,118.00,5.94,4.97,12.25,5.50,230.00
1,2,와플,100g,292.40,4.52,3.37,40.35,15.12,214.80
2,3,토르티야,100g,218.00,5.70,4.70,43.40,0.80,417.00
3,4,타코,100g,246.00,15.31,13.56,15.62,0.33,367.00
4,5,티라미수,100g,353.00,5.65,23.86,29.44,18.70,173.00
...,...,...,...,...,...,...,...,...,...
88,89,치킨,100g,285.71,21.57,12.20,12.14,3.18,401.79
89,90,칠리크랩,100g,187.90,2.90,8.90,24.10,19.40,1044.40
90,91,초콜릿,100g,535.00,7.65,29.66,59.40,51.50,79.00
91,92,츄러스,100g,358.00,0.00,0.00,0.00,0.01,0.19


### 4) 결측값 처리

In [6]:
df.isnull().sum()

num                0
food_name          0
standard_amount    0
kcal               0
protein            0
fat                0
carb               0
sugar              1
sodium             0
dtype: int64

In [8]:
df_filled = df.fillna(12.23)

In [10]:
print(df_filled.isnull().sum())

num                0
food_name          0
standard_amount    0
kcal               0
protein            0
fat                0
carb               0
sugar              0
sodium             0
dtype: int64


In [11]:
df_filled.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 93 entries, 0 to 92
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   num              93 non-null     int64  
 1   food_name        93 non-null     object 
 2   standard_amount  93 non-null     object 
 3   kcal             93 non-null     float64
 4   protein          93 non-null     float64
 5   fat              93 non-null     float64
 6   carb             93 non-null     float64
 7   sugar            93 non-null     float64
 8   sodium           93 non-null     float64
dtypes: float64(6), int64(1), object(2)
memory usage: 6.7+ KB


In [12]:
df_filled.describe()

Unnamed: 0,num,kcal,protein,fat,carb,sugar,sodium
count,93.0,93.0,93.0,93.0,93.0,93.0,93.0
mean,47.0,224.906237,8.179785,9.134409,25.115161,6.547204,327.368172
std,26.990739,123.48771,5.897311,7.415152,20.413844,9.672675,240.2812
min,1.0,2.21,0.0,0.0,0.0,0.0,0.19
25%,24.0,126.45,4.52,3.39,7.82,0.72,185.0
50%,47.0,218.0,7.1,6.43,22.02,2.47,302.0
75%,70.0,300.5,9.63,14.0,39.75,5.64,409.0
max,93.0,535.0,32.52,29.66,82.2,51.5,1480.0


# 2. KNN - 음식 추천

In [18]:
from sklearn.neighbors import NearestNeighbors

### 1) 클래스 초기화

In [None]:
def __init__(self, profiles, recent_activity, dataset):
    self.df = df_filled
    self.profiles = profiles
    self.recent_activity = recent_activity

<small>

>>* profiles: 사용자 프로필 데이터프레임.
>>* recent_activity: 사용자의 최근 활동 데이터프레임.
>>* dataset: 전체 식사 데이터셋

### 2) 특징 추출

In [None]:
def get_features(self, df_filled):
    food_name = df_filled.Food_name.str.get_dummies()
    standard_amount = df_filled.Standard_amount.str.get_dummies(sep=' ')
    feature_df = pd.concat([food_name, standard_amount], axis=1)
    return feature_df

<small>

- get_features 메서드 = 데이터프레임에서 특징을 추출
- food_name, standard_amount(object 형)열 → 원-핫 인코딩하여 더미 변수로 변환
- 변환된 더미 변수들을 pd.concat → 하나의 데이터프레임 feature_df로 결합
- 최종적으로 feature_df를 반환

### 3) 이웃 찾기

In [None]:
def find_neighbors(self, df_filled, features, k):
    features_df = self.get_features(df_filled)
    total_features = features_df.columns  
    d = dict()
    for i in total_features:
        d[i] = 0
    for i in features:
        d[i] = 1
    final_input = list(d.values())
    similar_neighbors = self.k_neighbor([final_input], features_df, df_filled, k)
    return similar_neighbors


<small>

>>* 주어진 특징을 기반으로 가장 유사한 이웃 찾기

### 4) 사용자 기반 추천 (user_based 메서드)

In [None]:
def user_based(self, features, user_id):
    similar_users = self.find_neighbors(self.profiles, features, 10)
    users = list(similar_users.User_Id)
    results = self.recent_activity[self.recent_activity.User_Id.isin(users)]
    results = results[results['User_Id'] != user_id]
    meals = list(results.Meal_Id.unique())
    results = self.df[self.df.Meal_Id.isin(meals)]
    results = results.filter(['Meal_Id', 'Name', 'Nutrient', 'Veg_Non', 'description', 'Price', 'Review'])
    results = results.drop_duplicates(subset=['Name'])
    results = results.reset_index(drop=True)
    return results


<small>

* KNN 사용 → 유사한 항목 찾기
>>* user_id : 우리 알고리즘에 맞게 수정 필요
>>* results = results.filter(['Meal_Id', 'Name', 'Nutrient', 'Veg_Non', 'description', 'Price', 'Review']) → 여기도 BMI, 칼로리, 영양성분 등으로 기준 수정 필요

### 5) 주변 식당 추천

### 6) 최종 추천

In [None]:
def recommend(self, user_id):
    profile = self.profiles[self.profiles['User_Id'] == user_id]
    features = []
    features.append(profile['Nutrient'].values[0])
    features.extend(profile['Disease'].values[0].split())
    features.extend(profile['Diet'].values[0].split())
    df1 = self.user_based(features, user_id)
    df2 = self.recent_activity_based(user_id)
    df = pd.concat([df1, df2])
    df = df.drop_duplicates('description').reset_index(drop=True)
    return df

<small>

>* 아래 항목은 수정 필요
>>* features.append(profile['Nutrient'].values[0])
>>* features.extend(profile['Disease'].values[0].split())
>>* features.extend(profile['Diet'].values[0].split())

>*  df1 = self.user_based(features, user_id)
>>* df2 = self.recent_activity_based(user_id)
>>* df = pd.concat([df1, df2])
>>* df = df.drop_duplicates('description').reset_index(drop=True)

<small>

* 아직 지도 API 넣는 방법은 고려 중