In [1]:
!pip install jieba gensim
import jieba
from gensim.models import Word2Vec
import re
import random



In [2]:
corpus = [
    "自然语言处理是人工智能的重要分支",
    "Word2Vec可以将词语转换为向量",
    "机器学习需要大量的数据训练模型",
    "深度学习是机器学习的一个子领域",
    "神经网络由多个神经元相互连接组成",
    "语义相似度是自然语言处理的重要任务",
    "CBOW和Skip-gram是Word2Vec的两种模型",
    "词向量能够捕捉词语的语义信息",
    "分词是文本预处理的关键步骤"
]

In [3]:
def preprocess(text):
    text = re.sub(r'[^\w\s]', '', text)
    words = jieba.lcut(text)
    stopwords = {'的', '是', '在', '中', '由', '为', '可以', '将', '需要'}
    return [w for w in words if w not in stopwords and len(w) > 1]

# 处理语料并训练模型
sentences = [preprocess(text) for text in corpus]
model = Word2Vec(sentences, vector_size=100, window=3, min_count=1, sg=1)  # Skip-gram模型

Building prefix dict from the default dictionary ...
DEBUG:jieba:Building prefix dict from the default dictionary ...
Dumping model to file cache /tmp/jieba.cache
DEBUG:jieba:Dumping model to file cache /tmp/jieba.cache
Loading model cost 1.088 seconds.
DEBUG:jieba:Loading model cost 1.088 seconds.
Prefix dict has been built successfully.
DEBUG:jieba:Prefix dict has been built successfully.


In [4]:
def word_guess_game():
    # 从词汇表中随机选择目标词
    target_words = list(model.wv.key_to_index.keys())
    target = random.choice(target_words)

    # 获取最相似的3个词（排除自身）
    try:
        similar_words = [w for w, _ in model.wv.most_similar(target, topn=5) if w != target][:3]
    except:
        print("游戏初始化失败，换一个目标词重试")
        return

    # 游戏交互
    print("\n🎮 词义猜谜游戏：根据以下相似词，猜出目标词（中文）")
    print(f"提示：与这些词最相似 -> {similar_words}")

    # 学生输入答案（可在Colab中直接输入）
    guess = input("请输入你的猜测：")

    # 验证答案
    if guess == target:
        # 计算相似度（展示模型分数）
        similarity = model.wv.similarity(guess, similar_words[0])
        print(f"🎉 猜对了！目标词就是「{target}」")
        print(f"相似度参考：与「{similar_words[0]}」的相似度为 {similarity:.2f}")
    else:
        print(f"❌ 猜错啦，正确答案是「{target}」")

# 启动游戏
if __name__ == "__main__":
    word_guess_game()



🎮 词义猜谜游戏：根据以下相似词，猜出目标词（中文）
提示：与这些词最相似 -> ['CBOW', '词语', '预处理']
请输入你的猜测：AI
❌ 猜错啦，正确答案是「重要」
