In [2]:
import fasttext
import pandas as pd
import os

# 1. 检查文件路径
print("当前工作目录:", os.getcwd())
print("cooking文件存在:", os.path.exists("cooking.stackexchange.txt"))
print("csv文件存在:", os.path.exists("fake_news.csv"))

# 2. 检查文件格式
if os.path.exists("cooking.stackexchange.txt"):
    with open("cooking.stackexchange.txt", "r", encoding="utf-8") as f:
        print("文件前3行:", [line.strip() for line in f.readlines()[:3]])

# 3. 训练第一个模型
if os.path.exists("cooking.stackexchange.txt"):
    try:
        model = fasttext.train_supervised(
            input="cooking.stackexchange.txt",
            epoch=10,
            wordNgrams=2
        )
        print("第一个模型训练成功")
        
        # 测试
        if os.path.exists("cooking.valid.txt"):
            print("验证结果:", model.test("cooking.valid.txt"))
        print("预测示例:", model.predict("How to bake a cake?", k=2))
    except Exception as e:
        print("训练出错:", str(e))

# 4. 处理CSV数据
if os.path.exists("fake_news.csv"):
    try:
        df = pd.read_csv("fake_news.csv")
        # 确保列名正确
        if 'label' in df.columns and 'text' in df.columns:
            df['formatted_text'] = df.apply(lambda x: f'__label__{x.label} {x.text}', axis=1)
            df['formatted_text'].to_csv("fake_news.train", index=False, header=False)
            print("CSV预处理完成")
            
            # 训练第二个模型
            model2 = fasttext.train_supervised(
                input="fake_news.train",
                epoch=15,
                wordNgrams=3
            )
            print("第二个模型训练成功")
        else:
            print("CSV文件缺少必要的列（需要'label'和'text'列）")
    except Exception as e:
        print("CSV处理出错:", str(e))
else:
    print("未找到fake_news.csv文件")

当前工作目录: d:\1.八斗学院0\2.复习
cooking文件存在: True
csv文件存在: True
文件前3行: ['__label__sauce __label__cheese How much does potato starch affect a cheese sauce recipe?', '__label__food-safety __label__acidity Dangerous pathogens capable of growing in acidic environments', '__label__cast-iron __label__stove How do I cover up the white spots on my cast iron stove?']
第一个模型训练成功
训练出错: Unable to avoid copy while creating an array as requested.
If using `np.array(obj, copy=False)` replace it with `np.asarray(obj)` to allow a copy when needed (no behavior change in NumPy 1.x).
For more details, see https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword.
CSV预处理完成
第二个模型训练成功
