In [2]:
import FinanceDataReader as fdr
import pandas as pd
from pymongo import MongoClient
from tqdm import tqdm
import FinanceDataReader as fdr
import json
from datetime import datetime
import os

In [3]:
def classify_change(rate):
    if rate >= 0.05:
        return "Strong Positive (≥ 0.05)"
    elif rate >= 0.02:
        return "Positive (0.02–0.0499)"
    elif rate >= 0.0:
        return "Slight Positive (0–0.0199)"
    elif rate > -0.02:
        return "Slight Negative (–0.0199 to 0)"
    elif rate > -0.05:
        return "Negative (–0.0499 to –0.02)"
    else:
        return "Strong Negative (≤ –0.05)"


In [5]:

# 2. KOSPI 시가총액 상위 50개 종목 가져오기
kospi = fdr.StockListing('KOSPI')
top50 = kospi.sort_values(by='Marcap', ascending=False).head(50)
codes = top50[['Code', 'Name']].reset_index(drop=True)

jsonl_str = ""

for company, ticker in zip(codes['Name'], codes['Code']):
    df = fdr.DataReader(ticker, '2022-01-01', '2024-12-31')
    df = df.reset_index()
    df["Change"] = df["Close"].pct_change().shift(-1)  # 다음날 종가 기준 변화율

    for i in range(0, len(df) - 11, 10):
        context_df = df.iloc[i:i+10]
        target_row = df.iloc[i+10]

        if context_df.isnull().values.any() or pd.isna(target_row["Change"]):
            continue

        context_lines = [
            f"{row['Date'].date()}, {row['Open']}, {row['High']}, {row['Low']}, {row['Close']}, {row['Volume']}, {round(row['Change'], 6)}"
            for _, row in context_df.iterrows()
        ]

        output_label = classify_change(target_row["Change"])
        
        # instruction = f"""Assess the data to estimate how the closing price of {company} will change on {target_row['Date'].date()}.\nRespond with one of the following levels based on the rate of change: \nStrong Positive (≥ 0.05), Positive (0.02–0.0499), Slight Positive (0–0.0199), Slight Negative (–0.0199 to 0), \nNegative (–0.0499 to –0.02), or Strong Negative (≤ –0.05).\n\nContext: date, open, high, low, close, volume, change.\n{chr(10).join(context_lines)}\nAnswer:"""
        instruction = f"""Assess the data to estimate how the closing price of {company} will change tomorrow.\nRespond with one of the following levels based on the rate of change: \nStrong Positive (≥ 0.05), Positive (0.02–0.0499), Slight Positive (0–0.0199), Slight Negative (–0.0199 to 0), \nNegative (–0.0499 to –0.02), or Strong Negative (≤ –0.05).\n\nContext: date, open, high, low, close, volume, change.\n{chr(10).join(context_lines)}\nAnswer:"""

        json_obj = {
            "instruction": instruction,
            "output": output_label
        }

        jsonl_str += json.dumps(json_obj, ensure_ascii=False) + "\n"

# 저장
with open("kospi_fdr_output_10_days.jsonl", "w", encoding="utf-8") as f:
    f.write(jsonl_str)

print("✅ FDR 기반 JSONL 생성 완료")


✅ FDR 기반 JSONL 생성 완료
