In [1]:
from collections import Counter

def compute_tf(text: str) -> Counter:
    tf_text = Counter(text)
    for word in tf_text:
        tf_text[word] = tf_text[word] / float(len(text))
    return tf_text

In [2]:
import math
from typing import List

def compute_idf(word: str, corpus: List[str]) -> float:
    return math.log10(len(corpus) / sum(1 for text in corpus if word in text))

In [3]:
from typing import List, Dict

def compute_tf_idf(corpus) -> List[Dict[str, float]]:
    documents_list = [text.split() for text in corpus]
    tf_idf = []
    for text in documents_list:
        tf_idf_text = {}
        computed_tf = compute_tf(text)
        for word in computed_tf:
            tf_idf_text[word] = computed_tf[word] * compute_idf(word, documents_list)
        tf_idf.append(tf_idf_text)
    return tf_idf

In [4]:
from typing import List, Dict, Tuple

def get_scores_for_word(
    word: str, tf_idf: List[Dict[str, float]]
) -> List[Tuple[int, float]]:
    scores = []
    for i, doc in enumerate(tf_idf):
        if word in doc:
            scores.append((i + 1, doc[word]))  # 문서 번호와 점수 저장
        else:
            scores.append((i + 1, 0.0))  # 단어가 없는 문서는 0점
    return scores

In [5]:
corpus = [
    "고소한 갈릭 팝콘",
    "청량감 넘치는 콜라",
    "카라멜과 오리지널 반반 팝콘",
]
tf_idf = compute_tf_idf(corpus)
for i, doc in enumerate(tf_idf):
    print(f"Document {i+1}: {doc}")

Document 1: {'고소한': 0.15904041823988746, '갈릭': 0.15904041823988746, '팝콘': 0.058697086351893746}
Document 2: {'청량감': 0.15904041823988746, '넘치는': 0.15904041823988746, '콜라': 0.15904041823988746}
Document 3: {'카라멜과': 0.11928031367991561, '오리지널': 0.11928031367991561, '반반': 0.11928031367991561, '팝콘': 0.04402281476392031}


In [6]:
# 검색할 단어 입력
search_word = "팝콘"
scores = get_scores_for_word(search_word, tf_idf)

In [7]:
# 검색 결과 출력
print(f"\nScores for the word '{search_word}':")
for doc_num, score in scores:
    print(f"Document {doc_num}: {score}")


Scores for the word '팝콘':
Document 1: 0.058697086351893746
Document 2: 0.0
Document 3: 0.04402281476392031
