In [None]:
import pandas as pd
from dataDownloader import DataDownloader
from db_financialStatement import DB_FinancialStatement
from db_stock import DB_Stock
from db_nyse import DB_NYSE
from commonHelper import EDateType, EFinancialStatementType
from datetime import datetime
import yfinance as yf
from portfolio import Portfolio
import numpy as np

pd.set_option('display.max_rows', None)  # 모든 행 출
pd.set_option('display.max_columns', None)  # 모든 열 출력
pd.set_option('display.expand_frame_repr', False)  # 긴 데이터 프레임 줄바꿈 없이 출력
pd.set_option('display.max_colwidth', None)  # 열 하나에 들어가는 텍스트의 최대 길이를 제한 없이 설정
pd.set_option('display.width', None)         # 줄바꿈 없이 전체 데이터 프레임을 한 줄로 출력


### 1. 스코어링 유틸리티

def score_positive(val, min_val, max_val):
    if pd.isnull(val): return 0
    return max(0, min(10, 10 * (val - min_val) / (max_val - min_val)))

sector_metric_ranges = {
    "PER": {
        "Technology": (10, 100),
        "Financial Services": (5, 20),
        "Consumer Cyclical": (8, 40),
        "Healthcare": (10, 60),
        "Communication Services": (10, 50),
        "Industrials": (8, 30),
        "Consumer Defensive": (10, 25),
        "Energy": (5, 20),
        "Basic Materials": (5, 25),
        "Real Estate": (5, 30),
        "Utilities": (5, 25)
    },
    "PSR": {
        "Technology": (0.5, 20),
        "Financial Services": (0.5, 5),
        "Consumer Cyclical": (0.5, 6),
        "Healthcare": (0.5, 8),
        "Communication Services": (0.5, 6),
        "Industrials": (0.5, 5),
        "Consumer Defensive": (0.5, 5),
        "Energy": (0.5, 3),
        "Basic Materials": (0.5, 4),
        "Real Estate": (0.5, 8),
        "Utilities": (0.5, 4)
    },
    "DebtToEquityRatio": {
        "Technology": (0, 1),
        "Financial Services": (0, 10),
        "Consumer Cyclical": (0, 2),
        "Healthcare": (0, 1.5),
        "Communication Services": (0, 1.5),
        "Industrials": (0, 2),
        "Consumer Defensive": (0, 1.5),
        "Energy": (0, 2),
        "Basic Materials": (0, 2),
        "Real Estate": (0, 3),
        "Utilities": (0, 2)
    },
    "EV/EBIT": {
        "Technology": (5, 80),
        "Financial Services": (5, 20),
        "Consumer Cyclical": (5, 30),
        "Healthcare": (5, 50),
        "Communication Services": (5, 40),
        "Industrials": (5, 25),
        "Consumer Defensive": (5, 20),
        "Energy": (3, 15),
        "Basic Materials": (3, 20),
        "Real Estate": (5, 25),
        "Utilities": (4, 15)
    },
    "PBR": {
        "Technology": (2, 15),
        "Financial Services": (0.5, 2),
        "Consumer Cyclical": (1, 5),
        "Healthcare": (2, 6),
        "Communication Services": (1, 6),
        "Industrials": (1, 4),
        "Consumer Defensive": (1, 4),
        "Energy": (0.5, 2),
        "Basic Materials": (0.5, 2),
        "Real Estate": (0.5, 3),
        "Utilities": (0.5, 2)
    },
    "GP/A": {
        "Technology": (0.2, 0.7),
        "Financial Services": (0.05, 0.2),
        "Consumer Cyclical": (0.1, 0.4),
        "Healthcare": (0.1, 0.5),
        "Communication Services": (0.1, 0.4),
        "Industrials": (0.05, 0.25),
        "Consumer Defensive": (0.05, 0.25),
        "Energy": (0.05, 0.2),
        "Basic Materials": (0.05, 0.2),
        "Real Estate": (0.01, 0.1),
        "Utilities": (0.01, 0.1)
    }
}

def get_sector_range(sector, metric, default_min=0, default_max=10):
    return sector_metric_ranges.get(metric, {}).get(sector, (default_min, default_max))

def safe_impute(row, metric, fill_policy):
    sector = row["Sector"]
    val = row[metric]
    if not pd.isnull(val):
        return val
    
    min_val, max_val = get_sector_range(sector, metric)
    if fill_policy == "high":
        return max_val * 1.5
    elif fill_policy == "low":
        return 0
    elif fill_policy == "median":
        return (min_val + max_val) / 2
    return np.nan

def preprocess_financials(row):
    policies = {
        "PER": "high",
        "PSR": "high",
        "PBR": "high",
        "EV/EBIT": "high",
        "PCR": "high",
        "PFCR": "high",
        "GP/A": "low",
        "DividendYieldProxy": "low",
        "DebtToEquityRatio": "high",
        "CurrentRatio": "median"
    }
    for metric, policy in policies.items():
        row[metric] = safe_impute(row, metric, policy)
    return row

def score_financials_by_sector_extended(df):
    df = df.copy()

    # 파생 변수 계산
    df["CommonStockDividendPaid"] = df["CommonStockDividendPaid"].fillna(0)
    df["MarketCap"] = df["MarketCap"].fillna(0)
    df["DividendYieldProxy"] = df["CommonStockDividendPaid"] / df["MarketCap"]
    
    # 결측치 보정
    df = df.apply(preprocess_financials, axis=1)

    # 스코어링 함수
    def row_score(row):
        sector = row["Sector"]
        r = {}

        r["IncomeGrowth_score"] = score_positive(row["IncomeGrowth"], -1, 5)

        psr_min, psr_max = get_sector_range(sector, "PSR", 0.5, 20)
        r["PSR_score"] = score_positive(psr_max - row["PSR"], 0, psr_max - psr_min)

        gp_min, gp_max = get_sector_range(sector, "GP/A", 0, 0.7)
        r["GP/A_score"] = score_positive(row["GP/A"], gp_min, gp_max)

        ev_min, ev_max = get_sector_range(sector, "EV/EBIT", 5, 400)
        r["EV/EBIT_score"] = score_positive(ev_max - row["EV/EBIT"], 0, ev_max - ev_min)

        per_min, per_max = get_sector_range(sector, "PER", 10, 50)
        r["PER_score"] = score_positive(per_max - row["PER"], 0, per_max - per_min)

        r["CurrentRatio_score"] = score_positive(row["CurrentRatio"], 1, 3)

        pbr_min, pbr_max = get_sector_range(sector, "PBR", 0.5, 25)
        r["PBR_score"] = score_positive(pbr_max - row["PBR"], 0, pbr_max - pbr_min)

        de_min, de_max = get_sector_range(sector, "DebtToEquityRatio", 0, 1)
        r["DebtToEquity_score"] = score_positive(de_max - row["DebtToEquityRatio"], 0, de_max - de_min)

        r["PCR_score"] = score_positive(150 - row["PCR"], 0, 140)
        r["PFCR_score"] = score_positive(150 - row["PFCR"], 0, 140)
        r["Dividend_score"] = score_positive(row["DividendYieldProxy"], 0, 0.05)

        r["TotalScore"] = sum(r.values()) / len(r)
        return pd.Series(r)

    scored_df = df.copy()
    score_result = df.apply(row_score, axis=1)
    return pd.concat([scored_df[["Symbol", "Date"]], score_result], axis=1)

### 3. 분기 실적 추세 필터 함수
def apply_quarterly_trend_filters(df_quarters):
    df_sorted = df_quarters.sort_values(by=["Symbol", "Date"])

    def trend_filter(group):
        group = group.reset_index(drop=True)
        result = {}

        if len(group) < 3:
            result["RevenueUp"] = False
            result["OperatingIncomeUp"] = False
            result["NetIncomePositive"] = False
            result["GrossMarginUp"] = False
            result["TrendQualified"] = False
            return pd.Series(result)

        recent = group.iloc[-3:]

        rev_up = all(recent["TotalRevenue"].pct_change().fillna(0)[1:] > 0)
        op_up = all(recent["OperatingIncome"].pct_change().fillna(0)[1:] > 0)
        net_pos = (recent["NetIncome"].iloc[-1] > 0) and \
                  (recent["NetIncome"].iloc[-1] > recent["NetIncome"].iloc[-2])
        gpm_up = all(recent["GrossProfitMargin"].diff().fillna(0)[1:] > 0)

        result["RevenueUp"] = rev_up
        result["OperatingIncomeUp"] = op_up
        result["NetIncomePositive"] = net_pos
        result["GrossMarginUp"] = gpm_up
        result["TrendQualified"] = all(result.values())
        return pd.Series(result)

    trend_df = df_sorted.groupby("Symbol").apply(trend_filter).reset_index()
    return trend_df

### 4. 통합 파이프라인 실행

def run_full_pipeline(df_annual, df_quarterly, top_n=500, final_n=20):
    
    # 1단계: 연간 스코어링
    scored_annual = score_financials_by_sector_extended(df_annual)
    scored_annual["MissingRatio"] = scored_annual.isnull().mean(axis=1)
    scored_annual = scored_annual[scored_annual["MissingRatio"] < 0.3]

    display(df_quarterly)

    # 각 Symbol별 최신 연도 데이터만 추출
    latest_rows = (
        scored_annual
        .sort_values(by=["Symbol", "Date"], ascending=[True, False])
        .drop_duplicates(subset=["Symbol"], keep="first")
    )


    # 총점 기준 상위 N개 종목 선정
    top_scored = latest_rows.sort_values(by="TotalScore", ascending=False).head(top_n)

    # 2단계: 해당 종목들의 분기 데이터 필터링
    tickers = top_scored["Symbol"].unique()
    df_quarters_filtered = df_quarterly[df_quarterly["Symbol"].isin(tickers)]


    # 단순 불리언 필터링 방식
    if False:
        # 3단계: 추세 필터링
        trend_filtered = apply_quarterly_trend_filters(df_quarters_filtered)

        # 4단계: 최종 선별
        final_candidates = trend_filtered[trend_filtered["TrendQualified"] == True]
        final_selection = final_candidates.head(final_n)
    
    # 스코어 기반 랭킹방식
    else: 
        # 3단계: 추세 스코어 계산
        trend_filtered = apply_quarterly_trend_scores(df_quarters_filtered)

        # 4단계: TrendScore 기준 상위 N개 선별
        top_trend = trend_filtered.sort_values(by="TrendScore", ascending=False).head(final_n)
        final_selection = top_trend

    return final_selection



def score_trend_filter(group):
    group = group.sort_values(by="Date").reset_index(drop=True)
    result = {}

    if len(group) < 3:
        result.update({
            "RevenueScore": 0,
            "OperatingIncomeScore": 0,
            "NetIncomeScore": 0,
            "GPMScore": 0,
            "TrendScore": 0
        })
        return pd.Series(result)

    recent = group.iloc[-3:]

    # Revenue Growth Score
    rev_growths = recent["TotalRevenue"].pct_change().fillna(0)[1:]
    rev_avg = rev_growths.mean()
    if rev_avg >= 0.2:
        result["RevenueScore"] = 10
    elif rev_avg >= 0.1:
        result["RevenueScore"] = 7
    elif rev_avg >= 0.05:
        result["RevenueScore"] = 5
    elif rev_avg >= 0:
        result["RevenueScore"] = 3
    else:
        result["RevenueScore"] = 0

    # Operating Income Growth Score
    op_growths = recent["OperatingIncome"].pct_change().fillna(0)[1:]
    op_avg = op_growths.mean()
    if op_avg >= 0.15:
        result["OperatingIncomeScore"] = 10
    elif op_avg >= 0.07:
        result["OperatingIncomeScore"] = 7
    elif op_avg >= 0.03:
        result["OperatingIncomeScore"] = 5
    elif op_avg >= 0:
        result["OperatingIncomeScore"] = 3
    else:
        result["OperatingIncomeScore"] = 0

    # Net Income Score
    ni_now = recent["NetIncome"].iloc[-1]
    ni_prev = recent["NetIncome"].iloc[-2]
    if ni_now > 0 and ni_now > ni_prev:
        result["NetIncomeScore"] = 10
    elif ni_now > 0:
        result["NetIncomeScore"] = 7
    elif ni_now > ni_prev:
        result["NetIncomeScore"] = 5
    else:
        result["NetIncomeScore"] = 0

    # Gross Profit Margin Score
    gpm_changes = recent["GrossProfitMargin"].diff().fillna(0)[1:]
    gpm_avg = gpm_changes.mean()
    if gpm_avg >= 0.03:
        result["GPMScore"] = 10
    elif gpm_avg >= 0.01:
        result["GPMScore"] = 7
    elif gpm_avg >= 0:
        result["GPMScore"] = 5
    else:
        result["GPMScore"] = 0

    # Final Trend Score
    result["TrendScore"] = (
        result["RevenueScore"] +
        result["OperatingIncomeScore"] +
        result["NetIncomeScore"] +
        result["GPMScore"]
    ) / 4

    return pd.Series(result)

def apply_quarterly_trend_scores(df_quarters):
    df_sorted = df_quarters.sort_values(by=["Symbol", "Date"])
    trend_scores = df_sorted.groupby("Symbol").apply(score_trend_filter).reset_index()
    return trend_scores


symbols = []
with DB_NYSE() as nyse:
  symbols = nyse.getSymbolList()


with DB_FinancialStatement() as fs:

    symbols = symbols
    symbols = ['AMD']
    df = fs.get_symbols_data_existence(symbols)

    # 모든 컬럼이 1인 행만 필터링 (symbol 제외한 컬럼 대상으로 확인)
    valid_symbols_df = df[df.drop(columns='symbol').eq(1).all(axis=1)]

    # symbol 컬럼만 리스트로 추출
    symbols = valid_symbols_df['symbol'].tolist()

    df = fs.get_company(symbols)
    df = fs.get_mark_spac(df)
    df = df[['symbol','IsSPAC']]
    symbols = df[df['IsSPAC'] == False]['symbol'].tolist()

    df_year = fs.get_data(symbols, EDateType.YEAR)
    df_quarter = fs.get_data(symbols, EDateType.QUARTER)
    df = run_full_pipeline(df_annual=df_year, df_quarterly=df_quarter)
    # display(df)
    



  df["CommonStockDividendPaid"] = df["CommonStockDividendPaid"].fillna(0)


Unnamed: 0,Symbol,Date,IncomeGrowth_score,PSR_score,GP/A_score,EV/EBIT_score,PER_score,CurrentRatio_score,PBR_score,DebtToEquity_score,PCR_score,PFCR_score,Dividend_score,TotalScore,MissingRatio
4,AMD,2024-12-31,8.666667,6.359862,0.0,0.0,0.0,8.081308,8.920536,9.626912,6.112384,6.908361,0.0,4.970548,0.0


  trend_scores = df_sorted.groupby("Symbol").apply(score_trend_filter).reset_index()
