In [1]:
import pandas as pd
import os

# 原始文件路径
file_path = "/home/jesse/Projects/myprojs/MT_1/01_Data_Collection/Dogecoin_CSV/Dogecoin_Reddit_2019-07-05_to_2024-12-31.csv"

# 目标保存目录
save_dir = "/home/jesse/Projects/myprojs/MT_1/00_Testing/Dogecoin_Comments_HF"
os.makedirs(save_dir, exist_ok=True)

# 目标文件名
save_path = os.path.join(save_dir, "test.csv")

# 读取前 1000 行
df = pd.read_csv(file_path, nrows=1000)

# 保存到新的 CSV 文件
df.to_csv(save_path, index=False)

print(f"✅ 已保存前 1000 条评论至：{save_path}")


✅ 已保存前 1000 条评论至：/home/jesse/Projects/myprojs/MT_1/00_Testing/Dogecoin_Comments_HF/test.csv


In [1]:
import csv
import requests
import time

# Ollama 模型设置
OLLAMA_API_URL = "http://localhost:11434/api/generate"
MODEL_NAME = "llama3.3:70b-instruct-q3_K_S"

# 输入输出路径
input_csv = "/home/jesse/Projects/myprojs/MT_1/00_Testing/Dogecoin_Comments_HF/test.csv"
output_csv = "/home/jesse/Projects/myprojs/MT_1/00_Testing/Dogecoin_Comments_HF/labeled_test.csv"

# Prompt 模板
PROMPT_TEMPLATE = (
    "You are a financial language analysis assistant.\n"
    "Your task is to analyze the following Reddit comment and classify it with financial relevance and sentiment.\n\n"
    "If the comment is NOT related to markets, simply return:\n"
    "relevance: false\n\n"
    "If it IS related to markets, return in the following structured format:\n"
    "relevance: true, sentiment: [positive/neutral/negative], emotion_type: [euphoria, fear, anger, FOMO, uncertainty, etc.], "
    "volatility_signal: [low/medium/high], stance: [bullish/bearish/neutral]\n\n"
    "Be concise and always output in a single line.\n\n"
    "Comment: \"{text}\""
)

def classify_comment(text):
    prompt = PROMPT_TEMPLATE.format(text=text.strip().replace("\n", " "))
    response = requests.post(OLLAMA_API_URL, json={
        "model": MODEL_NAME,
        "prompt": prompt,
        "stream": False
    })

    try:
        result = response.json()["response"]
        # 将响应内容解析为字典
        if "relevance: false" in result.lower():
            return {
                "sentiment": "none",
                "relevance": "false",
                "emotion_type": "none",
                "volatility_signal": "none",
                "stance": "none"
            }

        parts = {k.strip(): v.strip() for k, v in [
            pair.split(":") for pair in result.split(",") if ":" in pair
        ]}
        return {
            "sentiment": parts.get("sentiment", "none"),
            "relevance": parts.get("relevance", "true"),  # 默认相关
            "emotion_type": parts.get("emotion_type", "none"),
            "volatility_signal": parts.get("volatility_signal", "none"),
            "stance": parts.get("stance", "none")
        }

    except Exception as e:
        print(f"Error processing comment: {e}")
        return {
            "sentiment": "none",
            "relevance": "false",
            "emotion_type": "none",
            "volatility_signal": "none",
            "stance": "none"
        }

# 主处理逻辑
with open(input_csv, newline='', encoding='utf-8') as infile, \
     open(output_csv, 'w', newline='', encoding='utf-8') as outfile:

    reader = csv.DictReader(infile)
    fieldnames = reader.fieldnames + ["sentiment", "relevance", "emotion_type", "volatility_signal", "stance"]
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()

    for idx, row in enumerate(reader):
        comment = row.get("body", "")
        if not comment.strip():
            continue

        print(f"[{idx+1}] Processing comment: {comment[:50]}...")
        tags = classify_comment(comment)
        row.update(tags)
        writer.writerow(row)
        time.sleep(0.5)  # 避免模型过载，可适当调整

print("✅ 所有评论处理完毕，结构化标签已写入输出文件。")

[1] Processing comment: Robin Hood...
[2] Processing comment: [deleted]...
[3] Processing comment: to the mooooooon! 🌕...
[4] Processing comment: Doing what you gotta do is what you gotta do. Just...
[5] Processing comment: Wow. Much beauty. So doge. +u/sodogetip random10 d...
[6] Processing comment: Thanks again for the info shibe! +u/sodogetip rand...
[7] Processing comment: Thanks for the info and explanation shibe +u/sodog...
[8] Processing comment: Here ya go +u/sodogetip random10 doge verify...
[9] Processing comment: 300,000 doge, and counting......
[10] Processing comment: Thx...
[11] Processing comment: /u/banano_tipbot 119 Here you are some banano seed...
[12] Processing comment: ruffvalv3r isn't registered, so I made an account ...
[13] Processing comment: 
__^[wow ^so ^verify]__: ^/u/mikeadair101 ^-&gt; ^...
[14] Processing comment: 
__^[wow ^so ^verify]__: ^/u/mikeadair101 ^-&gt; ^...
[15] Processing comment: 
__^[wow ^so ^verify]__: ^/u/mikeadair101 ^-&gt; ^...
[16] Proce