In [None]:
import json
from gensim.models import KeyedVectors
import numpy as np
import random

In [None]:
# loading word list
with open("korean_frequency.json", encoding="utf-8") as f:
  data = json.load(f)

# list concatenating
word_list = sorted({w for group in data.values() for w in group})
word_list

['가게',
 '가격',
 '가구',
 '가까워지다',
 '가까이',
 '가깝다',
 '가꾸다',
 '가끔',
 '가난',
 '가난하다',
 '가늘다',
 '가능',
 '가능성',
 '가능하다',
 '가능해지다',
 '가다',
 '가득',
 '가득하다',
 '가득히',
 '가라앉다',
 '가려지다',
 '가령',
 '가로',
 '가로등',
 '가로막다',
 '가로수',
 '가루',
 '가르다',
 '가르치다',
 '가르침',
 '가리다',
 '가리키다',
 '가만',
 '가만있다',
 '가만히',
 '가뭄',
 '가방',
 '가볍다',
 '가사',
 '가상',
 '가수',
 '가스',
 '가슴',
 '가슴속',
 '가요',
 '가운데',
 '가위',
 '가을',
 '가이드',
 '가입',
 '가입자',
 '가입하다',
 '가장',
 '가정',
 '가정교사',
 '가져가다',
 '가져다주다',
 '가져오다',
 '가족',
 '가죽',
 '가지',
 '가지다',
 '가짜',
 '가치',
 '가치관',
 '가톨릭',
 '가하다',
 '각각',
 '각국',
 '각기',
 '각오',
 '각자',
 '각종',
 '간격',
 '간단하다',
 '간단히',
 '간부',
 '간섭',
 '간식',
 '간신히',
 '간장',
 '간접',
 '간접적',
 '간판',
 '간편하다',
 '간호',
 '간호사',
 '간혹',
 '갇히다',
 '갈다',
 '갈등',
 '갈비',
 '갈비탕',
 '갈색',
 '갈수록',
 '갈아입다',
 '갈아타다',
 '갈증',
 '감각',
 '감기',
 '감다',
 '감독',
 '감동',
 '감동적',
 '감사',
 '감사하다',
 '감상',
 '감상하다',
 '감소',
 '감소되다',
 '감소하다',
 '감수성',
 '감싸다',
 '감옥',
 '감자',
 '감정',
 '감정적',
 '감추다',
 '감히',
 '갑자기',
 '갑작스럽다',
 '값싸다',
 '강남',
 '강당',
 '강도',
 '강력하다',
 '강력히',
 '강렬하다',
 '강물',
 '강

In [3]:
# loading FastText model
model = KeyedVectors.load("cc.ko.300.kv")

In [8]:
# updating candidates
def update_candidates(candidates, guess, similarity, tolerance = 5.0):
  new_candidates = []
  for cand in candidates:
    if cand not in model or guess not in model:
      continue
    sim = model.similarity(cand, guess) * 100
    if abs(sim - similarity) <= tolerance:
      new_candidates.append(cand)
  return new_candidates
  
# selecting best
def select_best_guess(candidates, tried):
    max_entropy = -1
    best_word = None
    for word in candidates:
        if word not in model:
            continue
        sims = []
        for other in tried:
            if other in model:
                sims.append(model.similarity(word, other))
        entropy = -np.sum(np.log(np.clip(sims, 1e-5, 1))) if sims else 0
        if entropy > max_entropy:
            max_entropy = entropy
            best_word = word
    return best_word or random.choice(candidates)

In [None]:
def simulate_game(answer):
    candidates = [w for w in word_list if w in model]
    tried = []
    for step in range(1, 21):
        guess = select_best_guess(candidates, tried)
        tried.append(guess)

        if guess == answer:
            print(f"✅ 정답 '{guess}'을 {step}번 만에 맞췄습니다!")
            return tried

        similarity = model.similarity(guess, answer) * 100
        print(f"{step}. '{guess}' → 유사도: {similarity:.2f}")
        candidates = update_candidates(candidates, guess, similarity)

    print("❌ 실패: 20회 이내에 정답을 찾지 못했습니다.")
    return tried

In [12]:
simulate_game("사랑스럽다")

1. '가게' → 유사도: 0.45
2. '가입자' → 유사도: -3.68
3. '거칠다' → 유사도: 35.18
4. '자랑하다' → 유사도: 23.63
5. '힘차다' → 유사도: 27.99
✅ 정답 '사랑스럽다'을 6번 만에 맞췄습니다!


['가게', '가입자', '거칠다', '자랑하다', '힘차다', '사랑스럽다']