In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
def label_encode_string_columns(df):
    label_encoder = LabelEncoder()
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = label_encoder.fit_transform(df[col])
    return df

In [4]:
def plot_heatmap(data, columns):
    correlation_matrix = data[columns].corr()
    plt.figure()
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5, fmt=".2f")
    plt.title('Feature Correlation Heatmap')
    plt.show(block=False)

In [5]:
def plot_feature(data, column):
    plt.figure()
    plt.scatter(np.arange(len(data)), data[column], s=0.1, alpha=0.8)
    plt.title(f'Plot {column}')
    plt.xlabel('Index')
    plt.ylabel(column)
    plt.grid(True)
    plt.show(block=False)

In [6]:
def plot_feature_distribution(data, column):
    plt.figure()
    sns.kdeplot(data[column], fill=True)
    plt.title(f'Distribution of {column}')
    plt.xlabel(column)
    plt.ylabel('Density')
    plt.grid(True)
    plt.show(block=False)

In [7]:
def plot_feature_to_target(data, column, target):
    plt.figure()
    plt.scatter(data[column], data[target], c='blue', s=0.2, marker='o')
    plt.title(f'Feature({column}) to Target({target})')
    plt.xlabel(column)
    plt.ylabel('target')
    plt.grid(True)
    plt.show(block=False)

In [8]:
def heatmap(data):
    correlation_matrix = data.corr()
    plt.figure()
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5, fmt=".2f")
    plt.title('Feature Correlation Heatmap')
    plt.show()

In [9]:
def feature_graph(data, column_name):
    plt.figure()
    plt.scatter(np.arange(len(data)), data, alpha=0.8)
    plt.title(f'Distribution of {column_name} in Coordinate Space')
    plt.xlabel('Index')
    plt.ylabel(column_name)
    plt.grid(True)
    plt.show()

In [10]:
def compare_two_column(data1, data2, data1_name, data2_name):
    plt.figure()
    plt.scatter(data1, data2, c='blue', marker='o')
    plt.title(f'Scatter Plot of {data1_name} & {data2_name}')
    plt.xlabel(f'{data1_name}')
    plt.ylabel(f'{data2_name}')
    plt.grid(True)
    plt.show()

In [11]:
def plot_func(df, column, target):
    count_df = df.groupby([column, target]).size().unstack(fill_value=0)
    count_df = count_df.div(count_df.sum(axis=1), axis=0)
    count_df.plot(kind='bar', stacked=True, color=['salmon', 'skyblue'], figsize=(8, 4))
    plt.xlabel(f'{column}')
    plt.ylabel('Proportion')
    plt.title(f'Proportion of Target by {column}')
    plt.legend(title='Target', loc='upper right')
    plt.ylim(0, 1)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    for i in range(count_df.shape[0]):
        for j in range(count_df.shape[1]):
            plt.text(i, count_df.iloc[i, j] / 2 + count_df.iloc[i, :j].sum(), f"{count_df.iloc[i, j]:.2f}", ha='center', color='black')
    plt.show()

In [12]:
df_baedang = pd.read_csv('open (1)/NH_CONTEST_DATA_HISTORICAL_DIVIDEND.csv')

In [13]:
df_baedang

Unnamed: 0,etf_tck_cd,ediv_dt,ddn_amt,aed_stkp_ddn_amt,ddn_bse_dt,ddn_pym_dt,pba_dt,ddn_pym_fcy_cd
0,AAON,20230608,0.12,0.08,20230609,20230630,20230518,Quarterly
1,AAON,20240315,0.08,0.08,20240318,20240329,20240301,Quarterly
2,AAON,20230907,0.08,0.08,20230908,20230929,20230818,Quarterly
3,AAON,20230310,0.12,0.08,20230313,20230331,20230301,Quarterly
4,AAON,20221125,0.24,0.16,20221128,20221216,20221108,SemiAnnual
...,...,...,...,...,...,...,...,...
12852,ZION,20230816,0.41,0.41,20230817,20230824,20230728,Quarterly
12853,ZION,20240214,0.41,0.41,20240215,20240222,20240202,Quarterly
12854,ZION,20230215,0.41,0.41,20230216,20230223,20230127,Quarterly
12855,ZION,20230517,0.41,0.41,20230518,20230525,20230505,Quarterly


In [14]:
df_peopledata = pd.read_csv('open (1)/NH_CONTEST_NHDATA_CUS_TP_IFO.csv')

In [15]:
df_peopledata

Unnamed: 0,bse_dt,tck_iem_cd,cus_cgr_llf_cd,cus_cgr_mlf_cd,cus_cgr_act_cnt_rt,cus_cgr_ivs_rt
0,20240826,AAL,2,22,37.2608,37.5675
1,20240826,AAL,2,25,4.8405,11.8565
2,20240826,AAL,2,21,21.4259,11.5923
3,20240826,AAL,2,23,23.9400,21.7531
4,20240816,AAL,2,21,21.4740,11.7051
...,...,...,...,...,...,...
1194995,20240612,ZS,2,24,25.3165,23.7650
1194996,20240529,ZS,3,31,36.8421,11.7457
1194997,20240819,ZS,2,21,14.7059,7.2295
1194998,20240819,ZS,2,23,28.8235,25.3286


In [18]:
df_baedang['etf_tck_cd'].value_counts()

etf_tck_cd
QYLD     25
XYLD     25
RYLD     25
EFC      25
DX       24
         ..
ARHS      1
CONL      1
GOOGL     1
FIZZ      1
AIRS      1
Name: count, Length: 1631, dtype: int64

In [20]:
df_info = pd.read_csv('open (1)/NH_CONTEST_DATA_ETF_HOLDINGS.csv', encoding='cp949')

In [21]:
df_info

Unnamed: 0,etf_tck_cd,tck_iem_cd,mkt_vlu,fc_sec_eng_nm,fc_sec_krl_nm,stk_qty,wht_pct,sec_tp
0,AAPB,AAPL,36858666,AAPL,애플,215737,66.778,ST
1,AAPB,USD,18337384,US Dollars,프로셰어즈 울트라 반도체,0,33.222,EF
2,AMDL,USD,3257693,US Dollars,프로셰어즈 울트라 반도체,0,33.282,EF
3,AMDL,AMD,6530355,AMD,에이엠디,36558,66.718,ST
4,CLOU,TWLO,15400502,TWILIO INC - A,트윌리오,254933,4.266,ST
...,...,...,...,...,...,...,...,...
40714,TMF,-,2981494930,ISHARES 20+ YEAR TREASURY BOND ETF SWAP,-,31278797,14.837,-
40715,TMF,-,3838866,DREYFUS TRSRY SECURITIES CASH MGMT,-,0,0.019,-
40716,TMF,-,693619858,GOLDMAN FINL SQ TRSRY INST 506,-,0,3.452,-
40717,TMF,-,1146532591,DREYFUS GOVT CASH MAN INS,-,0,5.706,-


In [22]:
df_sample = pd.read_csv('open (1)/NH_CONTEST_NW_FC_STK_IEM_IFO.csv', encoding='cp949')

In [23]:
df_sample

Unnamed: 0,tck_iem_cd,fc_sec_krl_nm,fc_sec_eng_nm,stk_etf_dit_cd,ltg_tot_stk_qty,fc_mkt_dit_cd,co_adr,web_adr,btp_cfc_nm,ceo_nm,eng_utk_otl_cts,ser_cfc_nm,ids_nm,mkt_pr_tot_amt
0,AA,알코아 ...,Alcoa Corp ...,주식,258340140,NYSE,"201 Isabella Street, Suite 500 PITTSBURGH PA...",https://www.alcoa.com/global/en/home,Aluminum (NEC),William Oplinger,Alcoa Corporation is a vertically integrated a...,Basic Materials,Aluminum,8928.24
1,AAL,아메리칸 에어라인스 그룹 ...,American Airline ...,주식,656703913,NASD,1 Skyview Drive FORT WORTH TX 76155 United S...,https://americanairlines.gcs-web.com/,Regional Airlines,Robert Isom,American Airlines Group Inc. is a holding comp...,Industrials,Airlines,6711.51
2,AAN,Aaron's Company ...,Aaron's Company ...,주식,31550767,NYSE,"400 Galleria Parkway Se, Suite 300 ATLANTA G...",https://investor.aarons.com/,Consumer Leasing,Stephen Olsen,"The Aaron?? Company, Inc. is a technology-enab...",Industrials,Rental & Leasing Services,318.03
3,AAOI,어플라이드 옵토일렉트로닉스 ...,AOI ...,주식,42644441,NASD,13139 Jess Pirtle Blvd SUGAR LAND TX 77478 U...,https://ao-inc.com/,Electronic Components,Chih-Hsiang Lin,"Applied Optoelectronics, Inc. is a vertically ...",Technology,Communication Equipment,393.42
4,AAON,에이에이온 ...,AAON ...,주식,81013148,NASD,2425 South Yukon Ave. TULSA OK 74107 United ...,https://www.aaon.com/,"Heating, Ventilation & Air Conditioning",Matt Tobolski,"AAON, Inc. is a producer of heating, ventilati...",Industrials,Building Products & Equipment,7605.92
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3091,ZURA,Zura Bio ...,Zura Bio ...,주식,63207088,NASD,"4225 Executive Square, Suite - 600 SAN DIEGO...",https://zurabio.com/,Bio Therapeutic Drugs,Robert Lisicki,"Zura Bio Limited is a clinical-stage, multi-as...",Healthcare,Biotechnology,255.99
3092,ZVRA,Zevra ...,Zevra ...,주식,52617789,NASD,"1180 CELEBRATION BOULEVARD, SUITE 103 CELEBR...",https://zevra.com/,Proprietary & Advanced Pharmaceuticals,Neil Mcfarlane,"Zevra Therapeutics, Inc. is a rare disease com...",Healthcare,Biotechnology,372.01
3093,ZWS,Zurn Elkay Wtr ...,Zurn Elkay Wtr ...,주식,170928495,NYSE,511 West Freshwater Way MILWAUKEE WI 53204 U...,https://zurn-elkay.com/,Construction Supplies & Fixtures (NEC),Todd Adams,Zurn Elkay Water Solutions Corp is a pure-play...,Industrials,Pollution & Treatment Controls,5485.10
3094,ZYME,Zymeworks ...,Zymeworks ...,주식,71618027,NASD,"108 Patriot Drive, Suite A MIDDLETOWN DE 197...",https://www.zymeworks.com/,Bio Therapeutic Drugs,Kenneth Galbraith,Zymeworks Inc. is a global biotechnology compa...,Healthcare,Biotechnology,819.89
