In [5]:
import FinanceDataReader as fdr
import pandas as pd
import json

# 등락률 분류 함수
def classify_change(rate):
    if rate >= 0.05:
        return "Strong Positive (≥ 0.05)"
    elif rate >= 0.02:
        return "Positive (0.02–0.0499)"
    elif rate >= 0.0:
        return "Slight Positive (0–0.0199)"
    elif rate > -0.02:
        return "Slight Negative (–0.0199 to 0)"
    elif rate > -0.05:
        return "Negative (–0.0499 to –0.02)"
    else:
        return "Strong Negative (≤ –0.05)"

# KOSPI 시가총액 상위 50개 종목
kospi = fdr.StockListing('KOSPI')
top50 = kospi.sort_values(by='Marcap', ascending=False).head(50)
codes = top50[['Code', 'Name']].reset_index(drop=True)

monthly_jsonl_str = ""

for company, ticker in zip(codes['Name'], codes['Code']):
    try:
        df = fdr.DataReader(ticker, '2022-01-01', '2024-12-31')
        if df.empty or len(df) < 250:
            continue

        # 월간 리샘플링
        monthly_df = df.resample('M').agg({
            'Open': 'first',
            'High': 'max',
            'Low': 'min',
            'Close': 'last',
            'Volume': 'sum'
        }).dropna()

        # 월간 수익률 계산
        # monthly_df['Change'] = (monthly_df['Close'] - monthly_df['Open']) / monthly_df['Open']
        monthly_df['Change'] = monthly_df['Close'].pct_change().shift(-1)
        monthly_df = monthly_df.reset_index()

        for i in range(len(monthly_df) - 11):
            context_df = monthly_df.iloc[i:i+10]
            target_row = monthly_df.iloc[i+10]

            if context_df.isnull().values.any() or pd.isna(target_row["Change"]):
                continue

            context_lines = [
                f"{row['Date'].date()}, {row['Open']}, {row['High']}, {row['Low']}, {row['Close']}, {row['Volume']}, {round(row['Change'], 6)}"
                for _, row in context_df.iterrows()
            ]

            output_label = classify_change(target_row["Change"])

            instruction = f"""Assess the data to estimate how the closing price of {company} will change on {target_row['Date'].date()}.\nRespond with one of the following levels based on the rate of change: \nStrong Positive (≥ 0.05), Positive (0.02–0.0499), Slight Positive (0–0.0199), Slight Negative (–0.0199 to 0), \nNegative (–0.0499 to –0.02), or Strong Negative (≤ –0.05).\n\nContext: date, open, high, low, close, volume, change.\n{chr(10).join(context_lines)}\nAnswer:"""
            # instruction = f"""Assess the data to estimate how the closing price of {company} will change next month.\nRespond with one of the following levels based on the rate of change: \nStrong Positive (≥ 0.05), Positive (0.02–0.0499), Slight Positive (0–0.0199), Slight Negative (–0.0199 to 0), \nNegative (–0.0499 to –0.02), or Strong Negative (≤ –0.05).\n\nContext: date, open, high, low, close, volume, change.\n{chr(10).join(context_lines)}\nAnswer:"""
            json_obj = {
                "instruction": instruction,
                "output": output_label
            }

            monthly_jsonl_str += json.dumps(json_obj, ensure_ascii=False) + "\n"

    except Exception as e:
        print(f"❗ Error for {company} ({ticker}): {e}")
        continue

# 저장
with open("kospi_fdr_output_10_months.jsonl", "w", encoding="utf-8") as f:
    f.write(monthly_jsonl_str)

print("✅ 월간 JSONL 생성 완료!")


✅ 월간 JSONL 생성 완료!
