In [6]:
import FinanceDataReader as fdr
import pandas as pd
from tqdm import tqdm
import FinanceDataReader as fdr
import json
from datetime import datetime

# 일간 데이터

In [7]:

# 2. KOSPI 시가총액 상위 50개 종목 가져오기
kospi = fdr.StockListing('KOSPI')
top50 = kospi.sort_values(by='Marcap', ascending=False).head(50)
codes = top50[['Code', 'Name']].reset_index(drop=True)

jsonl_str = ""

for company, ticker in zip(codes['Name'], codes['Code']):
    df = fdr.DataReader(ticker, '2022-01-01', '2024-12-31')
    df = df.reset_index()
    df["Change"] = df["Close"].pct_change().shift(-1)  # 다음날 종가 기준 변화율

    for i in range(0, len(df) - 11, 10):
        context_df = df.iloc[i:i+10]
        target_row = df.iloc[i+10]

        if context_df.isnull().values.any() or pd.isna(target_row["Change"]):
            continue

        context_lines = [
            f"{row['Date'].date()}, {row['Open']}, {row['High']}, {row['Low']}, {row['Close']}, {row['Volume']}, {round(row['Change'], 6)}"
            for _, row in context_df.iterrows()
        ]

        output_label = round(target_row["Change"], 6)
        
        instruction = f"""Using the context below, estimate the rate of change in the closing price of {company} on {target_row['Date'].date()}.
Return the expected value of change as a decimal.

Context: date, open, high, low, close, volume, change.
{chr(10).join(context_lines)}
Answer:"""

        json_obj = {
            "instruction": instruction,
            "output": output_label
        }

        jsonl_str += json.dumps(json_obj, ensure_ascii=False) + "\n"

# 저장
with open("kospi_10_days_reg_output_.jsonl", "w", encoding="utf-8") as f:
    f.write(jsonl_str)

print("✅ FDR 기반 Regression JSONL 생성 완료")


✅ FDR 기반 Regression JSONL 생성 완료


# 주간 데이터

In [8]:
import FinanceDataReader as fdr
import pandas as pd
import json

# KOSPI 시가총액 상위 50개 종목
kospi = fdr.StockListing('KOSPI')
top50 = kospi.sort_values(by='Marcap', ascending=False).head(50)
codes = top50[['Code', 'Name']].reset_index(drop=True)

weekly_jsonl_str = ""

for company, ticker in zip(codes['Name'], codes['Code']):
    try:
        df = fdr.DataReader(ticker, '2022-01-01', '2024-12-31')
        if df.empty or len(df) < 60:
            continue

        weekly_df = df.resample('W').agg({
            'Open': 'first',
            'High': 'max',
            'Low': 'min',
            'Close': 'last',
            'Volume': 'sum'
        }).dropna()

        # weekly_df['Change'] = (weekly_df['Close'] - weekly_df['Open']) / weekly_df['Open']
        weekly_df['Change'] = weekly_df['Close'].pct_change().shift(-1)
        weekly_df = weekly_df.reset_index()

        for i in range(len(weekly_df) - 11):
            context_df = weekly_df.iloc[i:i+10]
            target_row = weekly_df.iloc[i+10]

            if context_df.isnull().values.any() or pd.isna(target_row["Change"]):
                continue

            context_lines = [
                f"{row['Date'].date()}, {row['Open']}, {row['High']}, {row['Low']}, {row['Close']}, {row['Volume']}, {round(row['Change'], 6)}"
                for _, row in context_df.iterrows()
            ]

            output_label = round(target_row["Change"], 6)
        
            instruction = f"""Using the context below, estimate the rate of change in the closing price of {company} on {target_row['Date'].date()}.
    Return the expected value of change as a decimal.
    
    Context: date, open, high, low, close, volume, change.
    {chr(10).join(context_lines)}
    Answer:"""
    
            json_obj = {
                "instruction": instruction,
                "output": output_label
            }

            weekly_jsonl_str += json.dumps(json_obj, ensure_ascii=False) + "\n"
    except Exception as e:
        print(f"❗ Error for {company} ({ticker}): {e}")
        continue

with open("kospi_10_weeks_reg_output_.jsonl", "w", encoding="utf-8") as f:
    f.write(weekly_jsonl_str)

print("✅ 주간 JSONL 생성 완료!")


✅ 주간 JSONL 생성 완료!


# 월간 데이터

In [9]:
import FinanceDataReader as fdr
import pandas as pd
import json

# KOSPI 시가총액 상위 50개 종목
kospi = fdr.StockListing('KOSPI')
top50 = kospi.sort_values(by='Marcap', ascending=False).head(50)
codes = top50[['Code', 'Name']].reset_index(drop=True)

monthly_jsonl_str = ""

for company, ticker in zip(codes['Name'], codes['Code']):
    try:
        df = fdr.DataReader(ticker, '2022-01-01', '2024-12-31')
        if df.empty or len(df) < 250:
            continue

        # 월간 리샘플링
        monthly_df = df.resample('M').agg({
            'Open': 'first',
            'High': 'max',
            'Low': 'min',
            'Close': 'last',
            'Volume': 'sum'
        }).dropna()

        # 월간 수익률 계산
        # monthly_df['Change'] = (monthly_df['Close'] - monthly_df['Open']) / monthly_df['Open']
        monthly_df['Change'] = monthly_df['Close'].pct_change().shift(-1)
        monthly_df = monthly_df.reset_index()

        for i in range(len(monthly_df) - 11):
            context_df = monthly_df.iloc[i:i+10]
            target_row = monthly_df.iloc[i+10]

            if context_df.isnull().values.any() or pd.isna(target_row["Change"]):
                continue

            context_lines = [
                f"{row['Date'].date()}, {row['Open']}, {row['High']}, {row['Low']}, {row['Close']}, {row['Volume']}, {round(row['Change'], 6)}"
                for _, row in context_df.iterrows()
            ]

            output_label = round(target_row["Change"], 6)
        
            instruction = f"""Using the context below, estimate the rate of change in the closing price of {company} on {target_row['Date'].date()}.
    Return the expected value of change as a decimal.
    
    Context: date, open, high, low, close, volume, change.
    {chr(10).join(context_lines)}
    Answer:"""
    
            json_obj = {
                "instruction": instruction,
                "output": output_label
            }

            monthly_jsonl_str += json.dumps(json_obj, ensure_ascii=False) + "\n"

    except Exception as e:
        print(f"❗ Error for {company} ({ticker}): {e}")
        continue

# 저장
with open("kospi_10_months_reg_output_.jsonl", "w", encoding="utf-8") as f:
    f.write(monthly_jsonl_str)

print("✅ 월간 JSONL 생성 완료!")


✅ 월간 JSONL 생성 완료!
