In [17]:
# 필요한 패키지 설치 (코드셀 1)
!pip install konlpy
!pip install mecab-python3
!sudo apt-get install mecab libmecab-dev mecab-ipadic mecab-ipadic-utf8
!bash <(curl -s https://raw.githubusercontent.com/konlpy/konlpy/master/scripts/mecab.sh)

# konlpy의 Mecab 클래스 import (코드셀 1)
from konlpy.tag import Mecab

# 사전이 설치된 경로를 찾기 (코드셀 1)
!find / -name mecab-ko-dic 2>/dev/null

# 찾아낸 사전 경로를 바탕으로 Mecab 형태소 분석기 초기화 (코드셀 1)
dicpath = '/usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ko-dic'  # 찾아낸 사전 경로
mecab = Mecab(dicpath)

[31mERROR: Operation cancelled by user[0m[31m
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libmecab-dev is already the newest version (0.996-14build9).
mecab-ipadic is already the newest version (2.7.0-20070801+main-3).
mecab-ipadic-utf8 is already the newest version (2.7.0-20070801+main-3).
mecab is already the newest version (0.996-14build9).
0 upgraded, 0 newly installed, 0 to remove and 24 not upgraded.
mecab-ko is already installed
Install mecab-ko-dic
Install mecab-ko-dic
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 47.4M  100 47.4M    0     0  44.3M      0  0:00:01  0:00:01 --:--:-- 92.7M
mecab-ko-dic-2.1.1-20180720/
mecab-ko-dic-2.1.1-20180720/configure
mecab-ko-dic-2.1.1-20180720/COPYING
mecab-ko-dic-2.1.1-20180720/autogen.sh
meca

In [48]:
import pandas as pd
from konlpy.tag import Mecab
from collections import defaultdict
import os

# Google Drive 마운트
from google.colab import drive
drive.mount('/content/drive')

# Mecab 형태소 분석기 초기화
dicpath = '/usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ko-dic'
mecab = Mecab(dicpath)

# 동사리스트 불러오기
verb_list_path = '/content/drive/My Drive/verblist/동사리스트.csv'
try:
    verb_list_df = pd.read_csv(verb_list_path, encoding='CP949')
except UnicodeDecodeError:
    verb_list_df = pd.read_csv(verb_list_path, encoding='euc-kr')

# '동사리스트' 데이터프레임의 열 이름 확인
print(verb_list_df.columns)

# 모든 가능한 classification 값 추출
all_classifications = set(verb_list_df['classification'].unique())

# 조리 과정별 파일 경로
cooking_processes = {
    '끓이기': '/content/drive/My Drive/recipe/끓이기1000_2.csv',
    '굽기': '/content/drive/My Drive/recipe/굽기1000_2.csv',
    '데치기': '/content/drive/My Drive/recipe/데치기1000_2.csv',
    '무침': '/content/drive/My Drive/recipe/무침1000_2.csv',
    '볶음': '/content/drive/My Drive/recipe/볶음1000_2.csv',
    '부침': '/content/drive/My Drive/recipe/부침1000_2.csv',
    '비빔': '/content/drive/My Drive/recipe/비빔1000_2.csv',
    '삶기': '/content/drive/My Drive/recipe/삶기1000_2.csv',
    '절임': '/content/drive/My Drive/recipe/절임1000_2.csv',
    '조림': '/content/drive/My Drive/recipe/조림1000_2.csv',
    '찜': '/content/drive/My Drive/recipe/찜1000_2.csv',
    '튀김': '/content/drive/My Drive/recipe/튀김1000_2.csv',
    '회': '/content/drive/My Drive/recipe/회1000_2.csv'
}

# 사용자가 선택한 조리 과정
selected_processes = ['회']  # 예시로 선택

# 각 레시피 및 조리 과정별 포인트를 계산하기 위한 구조
# 모든 classification에 대해 기본값을 0으로 설정
recipe_points = defaultdict(lambda: {cls: 0 for cls in all_classifications})

# 선택된 조리 과정별 파일 처리
for process in selected_processes:
    path = cooking_processes.get(process)
    if path:
        try:
            data = pd.read_csv(path, encoding='CP949')
            data['recipe'] = data['recipe'].astype(str)  # 문자열 변환
            for index, row in data.iterrows():
                verbs = [word for word, tag in mecab.pos(row['recipe']) if tag.startswith('VV')]
                for verb in verbs:
                    matched_verbs = verb_list_df[verb_list_df['verb'] == verb]
                    for _, verb_row in matched_verbs.iterrows():
                        recipe_points[row['id']][verb_row['classification']] += verb_row['weighted']
        except Exception as e:
            print(f"파일 {path}을(를) 읽는 중 에러가 발생했습니다: {e}")
    else:
        print(f"선택한 조리 과정 '{process}'에 대한 파일 경로가 없습니다.")

# 최종 결과 출력
for recipe_id, points in recipe_points.items():
    sorted_points = sorted(points.items(), key=lambda x: x[1], reverse=True)
    top_3_classes = sorted_points[:3]

    print(f"{recipe_id}", end=', ')
    for classification, point in points.items():  # 모든 classification 출력
        print(f"{classification}: {point}", end=', ')
    print(", ".join([cls for cls, _ in top_3_classes]))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Index(['id', 'verb', 'weighted', 'classification'], dtype='object')
1, 끓이기: 0.20255642500000007, 부침: 0.15936877, 찜: 0.23430311999999998, 삶기: 0.256530849, 무침: 0.159404636, 절임: 0.24028924799999996, 굽기: 0.6274604819999999, 데치기: 0.28021089, 볶음: 0.126938492, 튀김: 0.220231159, 비빔: 0.24909540600000002, 회: 0.31330827599999994, 조림: 0.18521848800000001, 굽기, 회, 데치기
2, 끓이기: 0.06828769, 부침: 0.12548985099999999, 찜: 0.101076713, 삶기: 0.07019778099999999, 무침: 0.125311908, 절임: 0.060569164, 굽기: 0.068083716, 데치기: 0.096312597, 볶음: 0.061331662, 튀김: 0.051881985, 비빔: 0.117387976, 회: 0.07370386, 조림: 0.061075758, 부침, 무침, 비빔
3, 끓이기: 0.065448566, 부침: 0.082513543, 찜: 0.08820103000000001, 삶기: 0.063911953, 무침: 0.082651783, 절임: 0.11726847899999998, 굽기: 0.15292344000000002, 데치기: 0.083159067, 볶음: 0.211345675, 튀김: 0.038622972, 비빔: 0.12354815, 회: 0.17182676800000002, 조림: 0.146839993, 볶음, 회, 굽기
4