In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

In [2]:
# 데이터 로드
file_path = '/Users/hailey/Desktop/DS Project/KFT/KFT Dataset - Sheet1.csv'
dataset = pd.read_csv(file_path)

### 1. 데이터 전처리 및 TF-IDF 벡터화

In [4]:
# Flavor Tags와 Base Type을 결합하여 텍스트로 생성
dataset['Combined Features'] = dataset['Flavor Tags'].fillna('') + ' ' + dataset['Base Type'].fillna('')

# TF-IDF 벡터화
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(dataset['Combined Features'])

# TF-IDF 결과 확인
print(f"TF-IDF Matrix Shape: {tfidf_matrix.shape}")

TF-IDF Matrix Shape: (136, 36)


### 2. 사용자 입력과 유사도 계산

In [6]:
# 사용자 입력
user_input = {
    "Flavor Tags": "Sweet Fruity",
    "Base Type": "Green"
}
user_profile = user_input['Flavor Tags'] + ' ' + user_input['Base Type']

# 사용자 입력 텍스트를 TF-IDF로 변환
user_vector = tfidf.transform([user_profile])

# 코사인 유사도 계산
cosine_similarities = cosine_similarity(user_vector, tfidf_matrix)

# 상위 6개 음료 추천
top_indices = cosine_similarities[0].argsort()[-6:][::-1]
recommended_drinks = dataset.iloc[top_indices][['Menu', 'Category', 'Base Type', 'Flavor Tags']]
print("Recommended Drinks:")
print(recommended_drinks)

Recommended Drinks:
                     Menu Category Base Type                Flavor Tags
22           Lychee Punch    Punch     Green              Sweet, Fruity
14        Apple Green Tea    Punch     Green      Fruity, Sweet, Normal
24      White Grape Punch    Punch     Green  Sweet, Refreshing, Fruity
27        Mango Green Tea    Punch     Green  Sweet, Refreshing, Fruity
21        Peach Green Tea    Punch     Green         Fruity, Refreshing
30  Sunshine Pinapple Tea    Punch     Green         Refreshing, Fruity
