In [None]:
from google.colab import drive

drive.mount('/content/drive')
%cd /content/drive/MyDrive/recipe

Mounted at /content/drive
/content/drive/MyDrive/recipe


In [None]:
!pip install selenium
!pip install --upgrade webdriver-manager




In [None]:
import requests
import urllib.parse
import pickle
import torch
from transformers import BertTokenizer, BertModel
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import time

# Load model and embeddings from pickle file
def load_model_and_embeddings(file_path='model_embeddings.pkl'):
    with open(file_path, 'rb') as f:
        model, embeddings = pickle.load(f)
    print(f"Model and embeddings loaded from {file_path}")
    return model, embeddings

# Function to recommend recipe based on user input
def recommend_recipe(user_input, model, data_embeddings, tokenizer, file_path='Recipe_Info.txt'):
    # Define get_bert_embeddings function inside recommend_recipe
    def get_bert_embeddings(text_list, tokenizer, model):
        inputs = tokenizer(text_list, return_tensors='pt', padding=True, truncation=True, max_length=512)
        with torch.no_grad():
            outputs = model(**inputs)
        return outputs.last_hidden_state.mean(dim=1).numpy()

    data = pd.read_csv(file_path, delimiter='\t', header=None, names=["combined_features"])
    user_embedding = get_bert_embeddings([user_input], tokenizer, model)
    sim_scores = cosine_similarity(user_embedding, data_embeddings).flatten()
    sim_indices = sim_scores.argsort()[-5:][::-1]
    recommendations = [(data.iloc[i]['combined_features'], sim_scores[i]) for i in sim_indices]
    return recommendations


def youtube_crawl(recommendations):
  def extract_recipe_title(text):
    match = re.search(r"요리 이름: ([^,]+)", text)
    if match:
        return match.group(1).strip()
    else:
        return "제목을 찾을 수 없습니다."

  # Extract the title of the top recommended recipe
  top_recommendation = recommendations[0][0]
  recipe_title = extract_recipe_title(top_recommendation)
  print(f"Recommended Recipe Title: {recipe_title}")

  # Use the recommended recipe title as the keyword for YouTube search
  keyword = recipe_title
  SEARCH_KEYWORD = keyword.replace(" ", "+") + '+레시피'

  # 브라우저 꺼짐 방지 및 불필요한 에러 메시지 없애기
  chrome_options = Options()
  chrome_options.add_experimental_option("detach", True)
  chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])
  chrome_options.add_argument("--headless")
  chrome_options.add_argument("--no-sandbox")
  chrome_options.add_argument("--disable-dev-shm-usage")

  # 불필요한 에러 메시지 없애기
  browser = webdriver.Chrome(options=chrome_options)

  # 스크래핑 할 URL 세팅
  URL = "https://www.youtube.com/results?search_query=" + SEARCH_KEYWORD
  # 크롬 드라이버를 통해 지정한 URL의 웹 페이지 오픈
  browser.get(URL)
  # 웹 페이지 로딩 대기
  time.sleep(3)

  # XPath 설정
  img_xpath = '//*[@id="thumbnail"]/yt-image/img'
  title_xpath = '//*[@id="video-title"]'
  viewcnt_xpath = '//*[@id="metadata-line"]/span[1]'
  period_xpath = '//*[@id="metadata-line"]/span[2]'

  # 요소 찾기
  image = browser.find_element(By.XPATH, img_xpath)
  img_url = image.get_attribute('src')

  title = browser.find_element(By.XPATH, title_xpath)
  view = browser.find_element(By.XPATH, viewcnt_xpath)
  period = browser.find_element(By.XPATH, period_xpath)

  title_list = []
  view_list = []
  periods_list = []

  title_list.append(title.text)
  view_list.append(view.text)
  periods_list.append(period.text)

  result={'Title': title.text, 'Views': view.text, 'Period': period.text, 'Image URL': img_url}

  # 브라우저 종료
  browser.quit()

  return result

if __name__=='__main__':
  # Load model and embeddings
  model, data_embeddings = load_model_and_embeddings()

  # Initialize tokenizer
  tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')

  # Get user input and recommend recipes
  user_input = input("입력하세요: ")  # 여기서 사용자 입력을 받는 코드를 추가할 수 있습니다
  recommendations = recommend_recipe(user_input, model, data_embeddings, tokenizer)
  print(youtube_crawl(recommendations))

Model and embeddings loaded from model_embeddings.pkl
입력하세요새우
Recommended Recipe Title: 홍시아이스크림
{'Title': '아이스크림보다 맛있는 아이스홍시 만들기. 홍시 보관법까지 알아보기.', 'Views': '22K views', 'Period': '6 years ago', 'Image URL': 'https://i.ytimg.com/vi/YJ9mg0mEjwQ/hqdefault.jpg?sqp=-oaymwEcCOADEI4CSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLAk459smsiphxPe8AjhQuOZd1WYyA'}
