In [None]:
# KoNLPy와 Mecab 재설치
!pip install --force-reinstall mecab-python3
!pip install --force-reinstall konlpy
!bash <(curl -s https://raw.githubusercontent.com/konlpy/konlpy/master/scripts/mecab.sh)


Collecting mecab-python3
  Downloading mecab_python3-1.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (581 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/581.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m174.1/581.7 kB[0m [31m5.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m573.4/581.7 kB[0m [31m9.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m581.7/581.7 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mecab-python3
Successfully installed mecab-python3-1.0.8
Collecting konlpy
  Downloading konlpy-0.6.0-py2.py3-none-any.whl (19.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.4/19.4 MB[0m [31m59.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting JPype1>=0.7.0 (from konlpy)
  Downloading JPype1-1.4.1-cp

In [None]:
import pandas as pd
from konlpy.tag import Mecab
from collections import defaultdict

# Google Drive 마운트
from google.colab import drive
drive.mount('/content/drive')

# Mecab 형태소 분석기 초기화
mecab = Mecab()

# 조리 과정별 파일 경로
cooking_processes = {
    '끓이기': '/content/drive/My Drive/recipe/끓이기1000_2.csv',
    '굽기': '/content/drive/My Drive/recipe/굽기1000_2.csv',
    '데치기': '/content/drive/My Drive/recipe/데치기1000_2.csv',
    '무침': '/content/drive/My Drive/recipe/무침1000_2.csv',
    '볶음': '/content/drive/My Drive/recipe/볶음1000_2.csv',
    '부침': '/content/drive/My Drive/recipe/부침1000_2.csv',
    '비빔': '/content/drive/My Drive/recipe/비빔1000_2.csv',
    '삶기': '/content/drive/My Drive/recipe/삶기1000_2.csv',
    '절임': '/content/drive/My Drive/recipe/절임1000_2.csv',
    '조림': '/content/drive/My Drive/recipe/조림1000_2.csv',
    '찜': '/content/drive/My Drive/recipe/찜1000_2.csv',
    '튀김': '/content/drive/My Drive/recipe/튀김1000_2.csv',
    '회': '/content/drive/My Drive/recipe/회1000_2.csv'
}

# 각 조리 과정별로 사용된 동사와 빈도수를 저장할 defaultdict
process_verbs = defaultdict(lambda: defaultdict(int))
all_recipes = pd.DataFrame()

# 각 조리 과정별 파일을 읽어서 사용된 동사 추출 및 빈도수 계산
for process, path in cooking_processes.items():
    try:
        data = pd.read_csv(path, encoding='CP949')
        data['process'] = process  # 조리 과정 열 추가
        data['recipe'] = data['recipe'].astype(str)  # 문자열 변환
        all_recipes = pd.concat([all_recipes, data], ignore_index=True)
        for recipe in data['recipe']:
            verbs = [word for word, tag in mecab.pos(recipe) if tag.startswith('VV')]
            for verb in verbs:
                process_verbs[process][verb] += 1
    except FileNotFoundError:
        print(f"파일 {path}을(를) 찾을 수 없습니다.")
    except Exception as e:
        print(f"파일 {path}을(를) 읽는 중 다음과 같은 에러가 발생했습니다: {e}")

# 전체 조리 과정에서 사용된 동사의 빈도수 계산
total_verbs = defaultdict(int)
for verbs in process_verbs.values():
    for verb, count in verbs.items():
        total_verbs[verb] += count

# 전체 레시피 수
total_recipes = len(all_recipes)

# 조건부 확률 계산
conditional_probabilities = defaultdict(dict)
for process, verbs in process_verbs.items():
    process_count = len(all_recipes[all_recipes['process'] == process])
    for verb, count in verbs.items():
        p_process = process_count / total_recipes
        p_verb_given_process = count / process_count
        p_verb = total_verbs[verb] / total_recipes
        conditional_probabilities[process][verb] = (p_verb_given_process * p_process) / p_verb

# 결과 출력
for process, verbs in conditional_probabilities.items():
    print(f"\n{process} 과정의 조건부 확률:")
    for verb, probability in verbs.items():
        print(f"P({process} | '{verb}') = {probability:.4f}")



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

회 과정의 조건부 확률:
P(회 | '잘라') = 0.0424
P(회 | '고를') = 0.2857
P(회 | '나야') = 1.0000
P(회 | '하') = 0.0415
P(회 | '부서지') = 0.0260
P(회 | '골라') = 0.0377
P(회 | '산') = 0.0769
P(회 | '벗겨진') = 0.0588
P(회 | '드셔도') = 0.0525
P(회 | '되') = 0.0397
P(회 | '지나') = 0.0183
P(회 | '나') = 0.0296
P(회 | '변하') = 0.0323
P(회 | '드') = 0.0530
P(회 | '뿌려') = 0.0494
P(회 | '덮일') = 0.5000
P(회 | '구워') = 0.0116
P(회 | '썰') = 0.0525
P(회 | '부서집니다') = 0.2000
P(회 | '구워진') = 0.0052
P(회 | '씌') = 0.1000
P(회 | '얼릴') = 1.0000
P(회 | '넣') = 0.0202
P(회 | '그래야') = 0.0266
P(회 | '나오') = 0.0347
P(회 | '있') = 0.0508
P(회 | '타') = 0.0229
P(회 | '만들') = 0.0538
P(회 | '갈아주') = 0.0208
P(회 | '들어가') = 0.0436
P(회 | '가') = 0.0557
P(회 | '바라') = 0.0833
P(회 | '꺼라') = 0.0588
P(회 | '꽂') = 0.0376
P(회 | '곁들일') = 0.1613
P(회 | '위해') = 0.0466
P(회 | '헹궈') = 0.0231
P(회 | '건져') = 0.0203
P(회 | '됩니다') = 0.0359
P(회 | '얼려') = 0.1098
P(회 | '풀') = 0.0