# 정당성 검정

1. 각 소속사 주가 + 사건 데이터를 읽고 소속사 컬럼 붙여서 하나로 병합

In [2]:
import pandas as pd
import numpy as np
from scipy import stats

# 주가 & 사건 파일 목록
companies = ['HYBE', 'JYP', 'SM', 'YG']
stock_files = {c: f'../데이터/{c}_주가_전처리.csv' for c in companies}
incident_files = {c: f'../데이터/{c}_사건_전처리.csv' for c in companies}

# 로그수익률 계산 함수
def add_return(df):
    df = df.sort_values('날짜').copy()
    df['수익률'] = np.log(df['종가'] / df['종가'].shift(1))
    return df

# 모든 사건 정보 병합
all_incidents = []

for company in companies:
    # 사건 데이터 불러오기
    inc = pd.read_csv(incident_files[company], parse_dates=['사건 날짜'])
    inc['소속사'] = company
    all_incidents.append(inc)

incident_df = pd.concat(all_incidents, ignore_index=True)

2. 벤치마크 지수도 KOSDAQ_ENT, KOSDAQ 각각 준비

In [3]:
# 벤치마크 파일 (KOSDAQ & 엔터테인먼트)
kosdaq_ent_file = '../데이터/KOSDAQ Entertainment & Culture_전처리.csv'
kosdaq_file = '../데이터/코스닥_전처리.csv'

# 벤치마크 데이터 불러오기
kosdaq_ent = pd.read_csv(kosdaq_ent_file, parse_dates=['날짜'])
kosdaq = pd.read_csv(kosdaq_file, parse_dates=['날짜'])

# 벤치마크 수익률 추가
kosdaq_ent = add_return(kosdaq_ent)
kosdaq = add_return(kosdaq)

In [4]:
kosdaq_ent

Unnamed: 0,날짜,종가,시가,고가,저가,거래량,변동 %,수익률
0,2014-03-10,675.86,656.91,675.86,656.31,17.64K,4.20%,
1,2014-03-11,672.68,679.36,692.03,670.19,30.91K,-0.47%,-0.004716
2,2014-03-12,673.39,677.49,680.32,668.75,33.55K,0.11%,0.001055
3,2014-03-13,679.69,678.71,679.74,661.38,35.02K,0.94%,0.009312
4,2014-03-14,659.55,669.19,677.63,659.55,24.24K,-2.96%,-0.030079
...,...,...,...,...,...,...,...,...
2732,2025-04-23,510.44,515.67,515.67,508.00,45.06M,-0.77%,-0.007689
2733,2025-04-24,511.60,509.18,513.37,507.86,52.67M,0.23%,0.002270
2734,2025-04-25,523.41,512.62,526.83,510.55,19.17M,2.31%,0.022822
2735,2025-04-28,526.39,524.93,529.88,519.90,31.46M,0.57%,0.005677


3. 소속사별 + 사건별로:
    - 사건일 ±7일 윈도우 추출
    - 로그수익률 & 초과수익률(AR) 계산
    - CAR 계산
4. 결과 데이터프레임 구성:
    - 소속사, 그룹, 연예인, 사건일, CAR_KOSDAQ_ENT, CAR_KOSDAQ, 사건명

In [5]:

# 결과 저장용 리스트
results = []

for company in companies:
    # 주가 데이터 불러오기 & 수익률 계산
    stock = pd.read_csv(stock_files[company], parse_dates=['날짜'])
    stock = add_return(stock)
    
    # 해당 소속사 사건 데이터
    incidents = incident_df[incident_df['소속사'] == company]
    
    for _, row in incidents.iterrows():
        event_date = row['사건 날짜']
        
        # 윈도우 설정 (±7일)
        mask = (stock['날짜'] >= event_date - pd.Timedelta(days=7)) & (stock['날짜'] <= event_date + pd.Timedelta(days=7))
        event_window = stock.loc[mask].copy()

        # 벤치마크도 같은 기간 추출
        bm_ent = kosdaq_ent[kosdaq_ent['날짜'].isin(event_window['날짜'])]
        bm_kosdaq = kosdaq[kosdaq['날짜'].isin(event_window['날짜'])]

        # 병합
        event_window = event_window.merge(bm_ent[['날짜', '수익률']], on='날짜', how='left', suffixes=('', '_KOSDAQ_ENT'))
        event_window = event_window.merge(bm_kosdaq[['날짜', '수익률']], on='날짜', how='left', suffixes=('', '_KOSDAQ'))

        # 초과수익률(AR) 계산
        event_window['AR_KOSDAQ_ENT'] = event_window['수익률'] - event_window['수익률_KOSDAQ_ENT']
        event_window['AR_KOSDAQ'] = event_window['수익률'] - event_window['수익률_KOSDAQ']

        # CAR 계산
        CAR_ent = event_window['AR_KOSDAQ_ENT'].sum(skipna=True)
        CAR_kosdaq = event_window['AR_KOSDAQ'].sum(skipna=True)

        # 결과 저장
        results.append({
            '소속사': company,
            '그룹': row['그룹'],
            '연예인': row['연예인 이름'],
            '사건일': event_date,
            'CAR_KOSDAQ_ENT': CAR_ent,
            'CAR_KOSDAQ': CAR_kosdaq
        })

# 최종 데이터프레임
results_df = pd.DataFrame(results)

# 결과 확인
print(results_df)




      소속사     그룹  연예인        사건일  CAR_KOSDAQ_ENT  CAR_KOSDAQ
0    HYBE  방탄소년단   슈가 2020-05-17        0.000000    0.000000
1    HYBE  방탄소년단   슈가 2020-05-29        0.000000    0.000000
2    HYBE  방탄소년단   RM 2016-07-06        0.000000    0.000000
3    HYBE  방탄소년단   지민 2022-04-25       -0.043926    0.000000
4    HYBE  방탄소년단   정국 2019-09-16        0.000000    0.000000
..    ...    ...  ...        ...             ...         ...
261    YG     위너  송민호 2023-03-24        0.072161    0.000000
262    YG     위너  이승훈 2019-06-12       -0.126170   -0.113548
263    YG     위너  이승훈 2016-08-22       -0.027001   -0.012066
264    YG    트레저  윤재혁 2023-04-02        0.084693    0.000000
265    YG    트레저  하루토 2019-04-06       -0.023170   -0.051496

[266 rows x 6 columns]


In [6]:
event_window

Unnamed: 0,날짜,종가,시가,고가,저가,거래량,변동 %,수익률,수익률_KOSDAQ_ENT,수익률_KOSDAQ,AR_KOSDAQ_ENT,AR_KOSDAQ
0,2019-04-01,37050.0,38250,38300,36650,395.56K,-1.72%,-0.017392,0.006545,0.010231,-0.023936,-0.027623
1,2019-04-02,36650.0,37000,37550,36550,291.86K,-1.08%,-0.010855,0.004147,0.003739,-0.015002,-0.014594
2,2019-04-03,37850.0,37000,38000,36800,331.29K,3.27%,0.032218,0.012357,0.013071,0.019861,0.019147
3,2019-04-04,38050.0,37850,38700,37350,318.71K,0.53%,0.00527,-0.005656,0.003038,0.010926,0.002232
4,2019-04-05,37750.0,38500,39000,37550,470.91K,-0.79%,-0.007916,0.005501,0.000173,-0.013417,-0.008089
5,2019-04-08,38700.0,38500,39450,38000,588.85K,2.52%,0.024854,-0.005137,0.000279,0.029991,0.024575
6,2019-04-09,38200.0,38950,38950,37500,470.81K,-1.29%,-0.013004,0.003255,0.006482,-0.016259,-0.019486
7,2019-04-10,37650.0,38150,38450,37500,226.46K,-1.44%,-0.014503,-0.000882,0.004404,-0.013621,-0.018906
8,2019-04-11,38050.0,38050,38250,37600,252.39K,1.06%,0.010568,0.015091,0.008306,-0.004523,0.002262
9,2019-04-12,37700.0,37700,38150,37600,204.95K,-0.92%,-0.009241,-0.012051,0.001773,0.00281,-0.011014


5. 전체 CAR 리스트로 평균 검정 (t-test)

In [7]:
# t-test: CAR 평균이 0인가?
for col in ['CAR_KOSDAQ_ENT', 'CAR_KOSDAQ']:
    car_list = results_df[col].dropna()
    t_stat, p_value = stats.ttest_1samp(car_list, 0)
    print(f'\n{col} 검정결과: t={t_stat:.3f}, p={p_value:.3f}, n={len(car_list)}')


CAR_KOSDAQ_ENT 검정결과: t=1.371, p=0.171, n=266

CAR_KOSDAQ 검정결과: t=-1.136, p=0.257, n=266


incident_df.to_csv('../데이터/incidents_merged.csv', index=False, encoding='utf-8-sig')

In [18]:
incident_df['연예인 이름'].unique()

array(['슈가', 'RM', '지민', '정국', '정원', '제이', '니키', '김채원', '홍은채', '조슈아',
       '민규', '도겸', '민지', '다니엘', '준케이', '닉쿤', '택연', '나연', '지효', '채영', '방찬',
       '현진', '필릭스', '리아', '백현', '첸', '찬열', '카이', '유노윤호', '이특', '희철', '신동',
       '은혁', '시원', '태연', '써니', '티파니', '유리', '수영', '서현', '아이린', '슬기', '웬디',
       '예리', '쟈니', '태용', '윈윈', '런쥔', '해찬', '루카스', '태일', '지젤', '윈터', '닝닝',
       '승리', '탑', '지드래곤', '대성', '제니', '리사', '송민호', '이승훈', '윤재혁', '하루토'],
      dtype=object)