<a href="https://colab.research.google.com/github/SHUDengwei/Pkmer-Docs/blob/main/03_miniduolingo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 模块3｜Mini Duolingo：题库生成 + 间隔记忆练习

目标：把任意学习材料转成可练习题库，并用 SM-2 简化算法调度复习。

In [None]:
import os, pandas as pd, google.generativeai as genai
from io import StringIO
from google.colab import userdata
os.environ['GEMINI_API_KEY'] = userdata.get('GEMINI_API_KEY')
assert os.getenv('GEMINI_API_KEY'), '请先在模块0中设置GEMINI_API_KEY'
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
model = genai.GenerativeModel('gemini-2.5-pro')


### 3.1 上传或粘贴材料（已提供 sample/material.txt，可替换）

In [None]:
material_path = 'sample/material.txt'
if not os.path.exists(material_path):
    # 兜底：写一个示例材料
    open(material_path, 'w').write('''            现在完成进行时（Present Perfect Continuous）用于表达从过去某一时间开始一直持续到现在的动作，通常强调动作的持续性和结果。            结构：主语 + have/has + been + 动词现在分词（V-ing）。            例如：I have been studying English for two hours.''')
text = open(material_path, 'r', encoding='utf-8').read()
print(text[:300])


FileNotFoundError: [Errno 2] No such file or directory: 'sample/material.txt'

### 3.2 调用 Gemini 生成题库（CSV 格式）
列：type,question,options,answer,explain,tag,diff

In [None]:
schema = '以CSV输出，列: type,question,options,answer,explain,tag,diff(1-5)。题型仅可为[mcq,cloze,short]；mcq需A-D四个选项(以;分隔)。总题数=15。'
csv_text = model.generate_content(f'基于以下材料生成高质量题库：\n{text}\n\n{schema}').text
# 清理可能出现的markdown围栏
csv_text = csv_text.strip().replace('```csv','').replace('```','')
open('data/qa_bank.csv','w',encoding='utf-8').write(csv_text)
print('已生成题库 data/qa_bank.csv\n')
# 预览
try:
    df_preview = pd.read_csv(StringIO(csv_text))
    display(df_preview.head())
except Exception as e:
    print('CSV 预览失败，但文件已写入。错误：', e)


### 3.3 间隔记忆引擎（SM-2 简化）
- 第一次正确：间隔1天
- 第二次正确：间隔6天
- 之后：间隔 = 上次间隔 * EF（易度因子）

In [None]:
import datetime as dt
import numpy as np
import pandas as pd

path = '/mnt/data/qa_bank.csv'
df = pd.read_csv(path)
# 初始化调度列
for col, val in [('ef', 2.5), ('interval', 0), ('reps', 0), ('next', str(dt.date.today()))]:
    if col not in df.columns: df[col] = val
if 'result' not in df.columns: df['result'] = np.nan

def review_row(row):
    print('\n题目：', row['question'])
    if row['type'] == 'mcq' and isinstance(row.get('options',''), str):
        print('选项：', row['options'])
    ans = input('你的答案：').strip()
    is_correct = (ans == str(row['answer']).strip())
    grade = 5 if is_correct else int(input('不正确，请给自己一个评分(0-5)：') or 2)

    # SM-2 更新
    ef = max(1.3, row['ef'] + (0.1 - (5 - grade) * (0.08 + (5 - grade) * 0.02)))
    reps = row['reps'] + 1 if grade >= 3 else 0
    if reps == 0:
        interval = 0
    elif reps == 1:
        interval = 1
    elif reps == 2:
        interval = 6
    else:
        interval = int(row['interval'] * ef) if row['interval'] else 6

    next_day = dt.date.today() + dt.timedelta(days=int(interval))

    row['ef'] = round(float(ef), 2)
    row['reps'] = int(reps)
    row['interval'] = int(interval)
    row['next'] = str(next_day)
    row['result'] = int(is_correct)
    print(f"{'✅ 正确' if is_correct else '❌ 错误'}｜EF={row['ef']}｜下次复习：{row['next']}\n解析：{row.get('explain','(无)')}\n")
    return row

# 仅抽取到期的前10题
today = str(dt.date.today())
due = df[df['next'] <= today].head(10).copy()
if due.empty:
    print('今日暂无到期题。系统将抽取前5题进行首次学习。')
    due = df.head(5).copy()

for idx in due.index:
    due.loc[idx] = review_row(due.loc[idx])

# 回写
df.update(due)
df.to_csv(path, index=False)
print('\n已更新题库并保存：', path)
