In [None]:
import os
import time
import requests
import pandas as pd
from pandarallel import pandarallel
import multiprocessing as mp

# macOS에서 fork 관련 문제 방지를 위해 'spawn' 모드를 사용합니다.
mp.set_start_method('spawn', force=True)

# 새 API 키 및 PDF Generator API URL 설정
API_KEY = '@@'
PDF_GENERATOR_API = 'https://api.sec-api.io/filing-reader'

def download_pdf(row):
    ticker = row['ticker']
    filing_url = row['filingUrl']
    max_retries = 5
    delay = 5  # 초기 지연 시간 (초)
    for attempt in range(max_retries):
        try:
            # 저장할 폴더: ./pdf_filings/<ticker>
            new_folder = os.path.join('./pdf_filings', ticker)
            os.makedirs(new_folder, exist_ok=True)
            
            # filing 날짜 (YYYY-MM-DD)
            date = row['filedAt'][:10]
            file_name = f"{date}_{row['formType']}_{filing_url.split('/')[-1]}.pdf"
            
            # API 호출 URL 구성
            api_url = f"{PDF_GENERATOR_API}?token={API_KEY}&type=pdf&url={filing_url}"
            headers = {"User-Agent": "Your Name (your.email@example.com)"}
            print(f"Requesting PDF from: {api_url}")
            
            response = requests.get(api_url, stream=True, headers=headers)
            if response.status_code == 429:
                raise requests.exceptions.HTTPError("429 Too Many Requests")
            response.raise_for_status()
            
            pdf_path = os.path.join(new_folder, file_name)
            with open(pdf_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            print(f"✅ Downloaded PDF for {ticker}: {file_name}")
            break  # 성공하면 루프 종료
        except requests.exceptions.HTTPError as err:
            if response.status_code == 429:
                print(f"❌ {ticker}: 429 Too Many Requests. Retrying in {delay} seconds... (Attempt {attempt+1}/{max_retries})")
                time.sleep(delay)
                delay *= 2  # 지연 시간 증가 (exponential backoff)
            else:
                print(f"❌ {ticker}: PDF download failed for {filing_url} ({err})")
                break
        except Exception as e:
            print(f"❌ {ticker}: PDF download failed for {filing_url} ({e})")
            break

# CSV 파일에서 메타데이터 읽기 (파일명: metadata_filings.csv)
metadata = pd.read_csv('metadata_filings.csv')

# pandarallel 초기화: 워커 수를 10개로 설정 (병렬 다운로드)
pandarallel.initialize(progress_bar=True, nb_workers=10, verbose=0)

# DataFrame의 각 행에 대해 병렬로 PDF 다운로드 실행
metadata.parallel_apply(download_pdf, axis=1)

print("✅ PDF Download completed")


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=7), Label(value='0 / 7'))), HBox(c…

Requesting PDF from: https://api.sec-api.io/filing-reader?token=848005f5dea221a713fe0dda7e82fc0c3f905ee827db967973c9818500287810&type=pdf&url=https://www.sec.gov/Archives/edgar/data/320193/000032019323000077/aapl-20230701.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=848005f5dea221a713fe0dda7e82fc0c3f905ee827db967973c9818500287810&type=pdf&url=https://www.sec.gov/Archives/edgar/data/1730168/000173016823000008/avgo-20230129.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=848005f5dea221a713fe0dda7e82fc0c3f905ee827db967973c9818500287810&type=pdf&url=https://www.sec.gov/Archives/edgar/data/1318605/000095017023001409/tsla-20221231.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=848005f5dea221a713fe0dda7e82fc0c3f905ee827db967973c9818500287810&type=pdf&url=https://www.sec.gov/Archives/edgar/data/1326801/000132680124000012/meta-20231231.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=848005f5dea221a713fe0dda7e82fc0c3

In [None]:
import os
import time
from datetime import datetime
import fitz  # PyMuPDF
import pandas as pd
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# 1. PDF에서 텍스트 추출하는 함수
def extract_text_from_pdf(pdf_path):
    """주어진 PDF 파일에서 전체 텍스트 추출"""
    doc = fitz.open(pdf_path)
    full_text = ""
    for page in doc:
        full_text += page.get_text()
    return full_text

# 2. PDF 파일의 텍스트를 FinBERT로 감정 분석하는 함수
def analyze_sentiment_pdf(pdf_path, tokenizer, sentiment_analyzer):
    """
    주어진 PDF 파일에서 텍스트를 추출하고, 이를 500 토큰 이하 청크로 분할하여
    FinBERT로 감정 분석을 수행한 후, 각 청크의 감정 점수(긍정, 부정, 중립)를 평균내어 반환.
    전체 점수는 (긍정 - 부정)로 계산.
    """
    # PDF 텍스트 추출
    full_text = extract_text_from_pdf(pdf_path)
    
    # 전체 텍스트를 500 토큰 청크로 분할 (stride=50)
    encoding = tokenizer(
        full_text,
        max_length=500,
        truncation=True,
        return_overflowing_tokens=True,
        stride=50
    )
    
    # 각 청크를 안전하게 500 토큰 이하로 제한 (여기서는 처음 500 토큰 사용)
    safe_chunks = [tokenizer.decode(enc.ids[:500], skip_special_tokens=True) for enc in encoding.encodings]
    
    # 각 청크에 대해 감정 분석 실행
    results = []
    for chunk in safe_chunks:
        try:
            res = sentiment_analyzer(chunk)
            results.append(res)
        except Exception as e:
            print(f"Error analyzing chunk: {e}")
    
    # 각 청크 결과 집계 (각 청크 결과는 리스트 형태, 보통 하나의 dict가 포함됨)
    positive_sum = 0.0
    negative_sum = 0.0
    neutral_sum  = 0.0
    count = 0
    for res in results:
        for r in res:
            label = r['label'].lower()
            score = r['score']
            if label == 'positive':
                positive_sum += score
            elif label == 'negative':
                negative_sum += score
            elif label == 'neutral':
                neutral_sum += score
        count += 1
    if count == 0:
        return {'positive': None, 'negative': None, 'neutral': None, 'overall': None}
    
    avg_pos = positive_sum / count
    avg_neg = negative_sum / count
    avg_neu = neutral_sum / count
    overall = avg_pos - avg_neg
    return {'positive': avg_pos, 'negative': avg_neg, 'neutral': avg_neu, 'overall': overall}

In [7]:
import os
import time
import pandas as pd
import requests
from sec_api import QueryApi, RenderApi

# multiprocessing 관련 설정 (macOS fork 문제 방지를 위해)
import multiprocessing as mp
mp.set_start_method('spawn', force=True)

# 새 API 키 설정 (JSON 예시 참고)
API_KEY = '28e5da007c35965e6f51e9e7b145f39151f5f3bf573231ac9f69e0b86b2b52e8'
queryApi = QueryApi(api_key=API_KEY)
renderApi = RenderApi(api_key=API_KEY)

def standardize_filing_url(url):
    """iXBRL URL에서 불필요한 부분 제거"""
    return url.replace('ix?doc=/', '')

def get_BRK_GOOG_metadata(start_year=2023, end_year=2025):
    """
    지정한 연도 범위(2023~2025) 동안 cik가 1652044 (Alphabet Inc.) 또는 1067983 (Berkshire Hathaway Inc.)인 filing의 메타데이터를 수집합니다.
    10-K와 10-Q (amended 및 NT 제외)만 대상으로 합니다.
    """
    frames = []
    # cik 필터 (선행 0 없이 사용)
    cik_filter = '(cik:"1652044" OR cik:"1067983")'
    
    for year in range(start_year, end_year + 1):
        objects_downloaded = 0
        for month in range(1, 13):
            padded_month = str(month).zfill(2)
            date_range_filter = f'filedAt:[{year}-{padded_month}-01 TO {year}-{padded_month}-31]'
            form_filter = '((formType:"10-K" AND NOT formType:("10-K/A", NT)) OR (formType:"10-Q" AND NOT formType:("10-Q/A", NT)))'
            lucene_query = date_range_filter + ' AND ' + cik_filter + ' AND ' + form_filter

            query_from = 0
            query_size = 200

            while True:
                query = {
                    "query": lucene_query,
                    "from": query_from,
                    "size": query_size,
                    "sort": [{ "filedAt": { "order": "desc" } }]
                }
                response = queryApi.get_filings(query)
                filings = response.get('filings', [])
                if len(filings) == 0:
                    break
                else:
                    query_from += query_size

                metadata = list(map(lambda f: {
                    'ticker': f.get('ticker', ''),
                    'cik': f.get('cik', ''),
                    'formType': f.get('formType', ''),
                    'filedAt': f.get('filedAt', ''),
                    'filingUrl': standardize_filing_url(f.get('linkToFilingDetails', ''))
                }, filings))
                df = pd.DataFrame.from_records(metadata)
                # 티커 정보가 없으면 CIK 기반으로 나중에 수동 수정할 예정입니다.
                frames.append(df)
                objects_downloaded += len(df)
        print(f"✅ Downloaded {objects_downloaded} metadata objects for year {year}")
    if frames:
        result = pd.concat(frames, ignore_index=True)
    else:
        result = pd.DataFrame()
    print(f"✅ Download completed. Metadata downloaded for {len(result)} filings.")
    return result

# 메타데이터 수집 (2023~2025)
metadata_BRK_GOOG = get_BRK_GOOG_metadata(start_year=2023, end_year=2025)

# 티커 필드 수동 업데이트: 
# cik 1652044 -> "GOOGL", cik 1067983 -> "BRK-A"
def update_ticker(row):
    cik = str(row['cik'])
    if cik == "1652044":
        return "GOOGL"
    elif cik == "1067983":
        return "BRK-A"
    else:
        return row['ticker']

metadata_BRK_GOOG['ticker'] = metadata_BRK_GOOG.apply(update_ticker, axis=1)

# CSV 파일로 저장
output_csv = "metadata_BRK_GOOG.csv"
metadata_BRK_GOOG.to_csv(output_csv, index=False)
print(f"✅ CSV 파일이 생성되었습니다: {output_csv}")

##############################
# PDF 다운로드
##############################

def download_pdf(row):
    ticker = row['ticker']
    filing_url = row['filingUrl']
    max_retries = 5
    delay = 5  # 초기 지연 (초)
    for attempt in range(max_retries):
        try:
            new_folder = os.path.join('./pdf_filings', ticker)
            os.makedirs(new_folder, exist_ok=True)
            date = row['filedAt'][:10]
            file_name = f"{date}_{row['formType']}_{filing_url.split('/')[-1]}.pdf"
            api_url = f"https://api.sec-api.io/filing-reader?token={API_KEY}&type=pdf&url={filing_url}"
            headers = {"User-Agent": "Your Name (your.email@example.com)"}
            print(f"Requesting PDF from: {api_url}")
            response = requests.get(api_url, stream=True, headers=headers)
            if response.status_code == 429:
                raise requests.exceptions.HTTPError("429 Too Many Requests")
            response.raise_for_status()
            pdf_path = os.path.join(new_folder, file_name)
            with open(pdf_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            print(f"✅ Downloaded PDF for {ticker}: {file_name}")
            break  # 성공하면 종료
        except requests.exceptions.HTTPError as err:
            if response.status_code == 429:
                print(f"❌ {ticker}: 429 Too Many Requests. Retrying in {delay} seconds... (Attempt {attempt+1}/{max_retries})")
                time.sleep(delay)
                delay *= 2
            else:
                print(f"❌ {ticker}: PDF download failed for {filing_url} ({err})")
                break
        except Exception as e:
            print(f"❌ {ticker}: PDF download failed for {filing_url} ({e})")
            break

# CSV 파일을 읽어와서 PDF 다운로드 진행
metadata_BRK_GOOG = pd.read_csv(output_csv)

# 병렬 처리를 위해 pandarallel 초기화 (워커 수 10개)
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True, nb_workers=10, verbose=0)

# 각 행에 대해 병렬로 PDF 다운로드 실행
metadata_BRK_GOOG.parallel_apply(download_pdf, axis=1)
print("✅ PDF Download completed")


✅ Downloaded 8 metadata objects for year 2023
✅ Downloaded 8 metadata objects for year 2024
✅ Downloaded 1 metadata objects for year 2025
✅ Download completed. Metadata downloaded for 17 filings.
✅ CSV 파일이 생성되었습니다: metadata_BRK_GOOG.csv


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=2), Label(value='0 / 2'))), HBox(c…

Requesting PDF from: https://api.sec-api.io/filing-reader?token=28e5da007c35965e6f51e9e7b145f39151f5f3bf573231ac9f69e0b86b2b52e8&type=pdf&url=https://www.sec.gov/Archives/edgar/data/1652044/000165204423000045/goog-20230331.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=28e5da007c35965e6f51e9e7b145f39151f5f3bf573231ac9f69e0b86b2b52e8&type=pdf&url=https://www.sec.gov/Archives/edgar/data/1652044/000165204424000022/goog-20231231.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=28e5da007c35965e6f51e9e7b145f39151f5f3bf573231ac9f69e0b86b2b52e8&type=pdf&url=https://www.sec.gov/Archives/edgar/data/1067983/000095017023004451/brka-20221231.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=28e5da007c35965e6f51e9e7b145f39151f5f3bf573231ac9f69e0b86b2b52e8&type=pdf&url=https://www.sec.gov/Archives/edgar/data/1067983/000095017024120241/brka-20240930.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=28e5da007c35965e6f51e9e7b145f391

In [1]:
import os
import time
import pandas as pd
import requests
from sec_api import QueryApi, RenderApi
import multiprocessing as mp
mp.set_start_method('spawn', force=True)  # macOS 환경 대응

# API 키 설정
API_KEY = "8d165a6b1dc73095662db4f8bbcd57c2d23239e7de653ae887dc10836512481f"
queryApi = QueryApi(api_key=API_KEY)
renderApi = RenderApi(api_key=API_KEY)

def standardize_filing_url(url):
    """iXBRL URL에서 불필요한 부분 제거"""
    return url.replace('ix?doc=/', '')

def get_8K_metadata(start_year=2024, end_year=2025):
    """
    2024년부터 2025년까지 대상 티커의 8-K 보고서 메타데이터를 수집합니다.
    대상 티커: AAPL, AMZN, BRK-A, GOOGL, META, MSFT, TSLA, WMT, AVGO, NFLX
    """
    tickers = ["AAPL", "AMZN", "BRK-A", "GOOGL", "META", "MSFT", "TSLA", "WMT", "AVGO", "NFLX"]
    # 티커 필터: 각 티커에 대해 ticker:"<티커>"를 OR로 연결
    ticker_filter = "(" + " OR ".join([f'ticker:"{t}"' for t in tickers]) + ")"
    # 8-K 보고서만 대상으로 함
    form_filter = 'formType:"8-K"'
    
    frames = []
    for year in range(start_year, end_year + 1):
        objects_downloaded = 0
        for month in range(1, 13):
            padded_month = str(month).zfill(2)
            date_range_filter = f'filedAt:[{year}-{padded_month}-01 TO {year}-{padded_month}-31]'
            lucene_query = date_range_filter + " AND " + ticker_filter + " AND " + form_filter
            
            query_from = 0
            query_size = 200
            
            while True:
                query = {
                    "query": lucene_query,
                    "from": query_from,
                    "size": query_size,
                    "sort": [{ "filedAt": { "order": "desc" } }]
                }
                response = queryApi.get_filings(query)
                filings = response.get('filings', [])
                if len(filings) == 0:
                    break
                else:
                    query_from += query_size
                metadata = list(map(lambda f: {
                    'ticker': f.get('ticker', ''),
                    'cik': f.get('cik', ''),
                    'formType': f.get('formType', ''),
                    'filedAt': f.get('filedAt', ''),
                    'filingUrl': standardize_filing_url(f.get('linkToFilingDetails', ''))
                }, filings))
                df = pd.DataFrame.from_records(metadata)
                # 티커 정보가 없는 경우 그대로 두고, 나중에 CSV에 기록됨
                frames.append(df)
                objects_downloaded += len(df)
        print(f"✅ Downloaded {objects_downloaded} metadata objects for year {year}")
    if frames:
        result = pd.concat(frames, ignore_index=True)
    else:
        result = pd.DataFrame()
    print(f"✅ Download completed. Metadata downloaded for {len(result)} filings.")
    return result

# 1. 8-K 보고서 메타데이터 수집 (2024~2025)
metadata_8K = get_8K_metadata(start_year=2024, end_year=2025)
output_csv = "metadata_8K.csv"
metadata_8K.to_csv(output_csv, index=False)
print(f"✅ CSV 파일이 생성되었습니다: {output_csv}")

##############################
# PDF 다운로드
##############################

def download_pdf(row):
    ticker = row['ticker']
    filing_url = row['filingUrl']
    max_retries = 5
    delay = 5  # 초기 지연 (초)
    for attempt in range(max_retries):
        try:
            new_folder = os.path.join('./pdf_filings', ticker)
            os.makedirs(new_folder, exist_ok=True)
            date = row['filedAt'][:10]
            file_name = f"{date}_{row['formType']}_{filing_url.split('/')[-1]}.pdf"
            api_url = f"https://api.sec-api.io/filing-reader?token={API_KEY}&type=pdf&url={filing_url}"
            headers = {"User-Agent": "Your Name (your.email@example.com)"}
            print(f"Requesting PDF from: {api_url}")
            response = requests.get(api_url, stream=True, headers=headers)
            if response.status_code == 429:
                raise requests.exceptions.HTTPError("429 Too Many Requests")
            response.raise_for_status()
            pdf_path = os.path.join(new_folder, file_name)
            with open(pdf_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            print(f"✅ Downloaded PDF for {ticker}: {file_name}")
            break  # 성공하면 종료
        except requests.exceptions.HTTPError as err:
            if response.status_code == 429:
                print(f"❌ {ticker}: 429 Too Many Requests. Retrying in {delay} seconds... (Attempt {attempt+1}/{max_retries})")
                time.sleep(delay)
                delay *= 2
            else:
                print(f"❌ {ticker}: PDF download failed for {filing_url} ({err})")
                break
        except Exception as e:
            print(f"❌ {ticker}: PDF download failed for {filing_url} ({e})")
            break

# CSV 파일 읽어서 PDF 다운로드 진행
metadata_8K = pd.read_csv(output_csv)

from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True, nb_workers=10, verbose=0)

metadata_8K.parallel_apply(download_pdf, axis=1)
print("✅ PDF Download completed")


✅ Downloaded 74 metadata objects for year 2024
✅ Downloaded 14 metadata objects for year 2025
✅ Download completed. Metadata downloaded for 88 filings.
✅ CSV 파일이 생성되었습니다: metadata_8K.csv


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=9), Label(value='0 / 9'))), HBox(c…

Requesting PDF from: https://api.sec-api.io/filing-reader?token=8d165a6b1dc73095662db4f8bbcd57c2d23239e7de653ae887dc10836512481f&type=pdf&url=https://www.sec.gov/Archives/edgar/data/789019/000119312524062997/d808756d8ka.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=8d165a6b1dc73095662db4f8bbcd57c2d23239e7de653ae887dc10836512481f&type=pdf&url=https://www.sec.gov/Archives/edgar/data/1318605/000110465924071439/tm2413800d31_8k.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=8d165a6b1dc73095662db4f8bbcd57c2d23239e7de653ae887dc10836512481f&type=pdf&url=https://www.sec.gov/Archives/edgar/data/1065280/000114036124035325/ny20032950x5_8k.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=8d165a6b1dc73095662db4f8bbcd57c2d23239e7de653ae887dc10836512481f&type=pdf&url=https://www.sec.gov/Archives/edgar/data/104169/000010416924000033/wmt-20240223.htmRequesting PDF from: https://api.sec-api.io/filing-reader?token=8d165a6b1dc73095662db4f8bbcd57c2d

In [1]:
import os
import time
import pandas as pd
import requests
from sec_api import QueryApi, RenderApi
import multiprocessing as mp
mp.set_start_method('spawn', force=True)  # macOS 환경 대응

# API 키 및 API 인스턴스 생성
API_KEY = "8d165a6b1dc73095662db4f8bbcd57c2d23239e7de653ae887dc10836512481f"
queryApi = QueryApi(api_key=API_KEY)
renderApi = RenderApi(api_key=API_KEY)

def standardize_filing_url(url):
    """iXBRL URL에서 불필요한 부분 제거"""
    return url.replace('ix?doc=/', '')

def get_NFLX_metadata(start_year=2024, end_year=2025):
    """
    NFLX의 10-K와 10-Q 보고서 메타데이터를 2024년부터 2025년까지 수집합니다.
    """
    ticker_filter = 'ticker:"NFLX"'
    # 10-K와 10-Q (amended 및 NT 제외)
    form_filter = '((formType:"10-K" AND NOT formType:("10-K/A", NT)) OR (formType:"10-Q" AND NOT formType:("10-Q/A", NT)))'
    
    frames = []
    for year in range(start_year, end_year + 1):
        objects_downloaded = 0
        for month in range(1, 13):
            padded_month = str(month).zfill(2)
            date_range_filter = f'filedAt:[{year}-{padded_month}-01 TO {year}-{padded_month}-31]'
            lucene_query = date_range_filter + " AND " + ticker_filter + " AND " + form_filter
            
            query_from = 0
            query_size = 200
            while True:
                query = {
                    "query": lucene_query,
                    "from": query_from,
                    "size": query_size,
                    "sort": [{ "filedAt": { "order": "desc" } }]
                }
                response = queryApi.get_filings(query)
                filings = response.get('filings', [])
                if len(filings) == 0:
                    break
                else:
                    query_from += query_size
                metadata = list(map(lambda f: {
                    "ticker": f.get("ticker", ""),
                    "cik": f.get("cik", ""),
                    "formType": f.get("formType", ""),
                    "filedAt": f.get("filedAt", ""),
                    "filingUrl": standardize_filing_url(f.get("linkToFilingDetails", ""))
                }, filings))
                df = pd.DataFrame.from_records(metadata)
                frames.append(df)
                objects_downloaded += len(df)
        print(f"✅ Downloaded {objects_downloaded} metadata objects for year {year}")
    if frames:
        result = pd.concat(frames, ignore_index=True)
    else:
        result = pd.DataFrame()
    print(f"✅ Download completed. Total metadata filings: {len(result)}")
    return result

# 1. NFLX 메타데이터 수집 (2024~2025)
metadata_NFLX = get_NFLX_metadata(start_year=2024, end_year=2025)
output_csv = "metadata_NFLX.csv"
metadata_NFLX.to_csv(output_csv, index=False)
print(f"✅ CSV 파일이 생성되었습니다: {output_csv}")

##############################
# PDF 다운로드
##############################

def download_pdf(row):
    ticker = row["ticker"]
    filing_url = row["filingUrl"]
    max_retries = 5
    delay = 5  # 초기 지연 (초)
    for attempt in range(max_retries):
        try:
            new_folder = os.path.join("./pdf_filings", ticker)
            os.makedirs(new_folder, exist_ok=True)
            date = row["filedAt"][:10]
            file_name = f"{date}_{row['formType']}_{filing_url.split('/')[-1]}.pdf"
            api_url = f"https://api.sec-api.io/filing-reader?token={API_KEY}&type=pdf&url={filing_url}"
            headers = {"User-Agent": "Your Name (your.email@example.com)"}
            print(f"Requesting PDF from: {api_url}")
            response = requests.get(api_url, stream=True, headers=headers)
            if response.status_code == 429:
                raise requests.exceptions.HTTPError("429 Too Many Requests")
            response.raise_for_status()
            pdf_path = os.path.join(new_folder, file_name)
            with open(pdf_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            print(f"✅ Downloaded PDF for {ticker}: {file_name}")
            break
        except requests.exceptions.HTTPError as err:
            if response.status_code == 429:
                print(f"❌ {ticker}: 429 Too Many Requests. Retrying in {delay} seconds... (Attempt {attempt+1}/{max_retries})")
                time.sleep(delay)
                delay *= 2
            else:
                print(f"❌ {ticker}: PDF download failed for {filing_url} ({err})")
                break
        except Exception as e:
            print(f"❌ {ticker}: PDF download failed for {filing_url} ({e})")
            break

# 2. CSV 파일 읽기 및 NFLX PDF 다운로드 진행
metadata_NFLX = pd.read_csv(output_csv)

from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True, nb_workers=4, verbose=0)

metadata_NFLX.parallel_apply(download_pdf, axis=1)
print("✅ NFLX PDF Download completed")


SyntaxError: incomplete input (2128086095.py, line 91)