In [11]:
import pandas as pd  # pandas 라이브러리 import
import os
from sklearn.model_selection import train_test_split  # 데이터 분할을 위한 함수 import
from sklearn.linear_model import LogisticRegression  # 로지스틱 회귀 모델 import
from sklearn.ensemble import RandomForestClassifier  # 랜덤 포레스트 모델 import
from sklearn.metrics import accuracy_score, f1_score  # 모델 평가 지표 import

In [12]:
# 1. 팀 기록 데이터, 선수 기록 데이터, 선수 맞대결 기록 데이터 로드
team_df = pd.read_csv('baseball/team_stats/team_hitter.csv')  # 팀 기록 데이터 로드

In [13]:
# 선수 기록 데이터 로드

# 작업 디렉토리 설정
teams = ['doosan', 'hanwha', 'kia', 'kiwoom', 'kt', 'lg', 'lotte', 'nc', 'samsung', 'ssg']
for team in teams:
    temp_df = pd.read_csv(f'baseball/player_stats_csv/{team}/{team}_hitter.csv')
    exec(f"{team}_hitter_df = temp_df")

In [18]:
dfs = [doosan_hitter_df, hanwha_hitter_df, kia_hitter_df, kiwoom_hitter_df, kt_hitter_df, lg_hitter_df, lotte_hitter_df, nc_hitter_df, samsung_hitter_df, ssg_hitter_df]

# 데이터프레임 병합
all_hitter_df = pd.concat(dfs, ignore_index=True)

# 연도가 19인 데이터행만 남기기
all_hitter_df = all_hitter_df[all_hitter_df['연도'] == 19]

In [15]:
# 선수 맞대결 기록 데이터 로드

# 작업 디렉토리 설정
work_dir = 'baseball/pitcher_vs_hitter/2019_pitcher_vs_hitter'
if not os.path.exists(work_dir):
    os.makedirs(work_dir)
os.chdir(work_dir)

# 폴더 내 모든 CSV 파일 목록 가져오기
csv_files = [f for f in os.listdir() if f.endswith('.csv')]

# 각 CSV 파일을 개별 DataFrame으로 읽어들이기
for i, csv_file in enumerate(csv_files):
    globals()[os.path.splitext(csv_file)[0] + "_df"] = pd.read_csv(csv_file)

In [19]:
all_hitter_df

Unnamed: 0,이름,연도,포지션,WAR,oWAR,dWAR,G,PA,ePA,AB,...,SO,GDP,SH,SF,AVG,OBP,SLG,OPS,R/ePA,WRC+
1,박세혁,19,C,6.03,2.98,3.05,137,505,495,441,...,68,5,10,6,0.279,0.345,0.390,0.735,0.009,111.2
2,페르난데스,19,DH,5.73,7.49,-1.76,144,645,643,572,...,54,16,0,6,0.344,0.409,0.483,0.892,0.069,162.0
4,오재일,19,1B,5.14,4.97,0.17,130,529,529,467,...,99,5,0,4,0.293,0.369,0.495,0.864,0.048,144.6
5,박건우,19,RF,5.13,5.59,-0.46,127,537,536,458,...,57,23,1,11,0.319,0.397,0.465,0.862,0.053,148.4
10,김재호,19,SS,4.53,2.82,1.71,130,470,460,377,...,58,10,8,11,0.268,0.379,0.358,0.737,0.012,114.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1506,최경모,19,2B,-0.18,-0.25,0.07,17,16,16,16,...,2,0,0,0,0.063,0.063,0.125,0.188,-0.227,-89.2
1510,남태혁,19,1B,-0.19,-0.06,-0.13,12,24,24,22,...,3,3,0,0,0.227,0.292,0.227,0.519,-0.077,38.8
1513,정진기,19,RF,-0.22,-0.17,-0.05,13,17,17,16,...,2,0,0,0,0.188,0.235,0.188,0.423,-0.124,-1.3
1517,김창평,19,2B,-0.27,-0.06,-0.21,18,53,50,45,...,11,1,3,0,0.178,0.260,0.222,0.482,-0.077,42.1


In [17]:
이대호_2019_df

Unnamed: 0,Pitcher,PA,ePA,AB,R,H,2B,3B,HR,TB,...,SH,SF,AVG,OBP,SLG,OPS,NP,avLI,RE24,WPA
0,정찬헌,2,2,2,0,1,1,0,0,2,...,0,0,0.500,0.500,1.000,1.500,3,3.60,2.10,0.447
1,함덕주,2,2,1,0,1,0,0,0,1,...,0,0,1.000,1.000,1.000,2.000,7,2.97,1.54,0.384
2,장현식,3,3,2,1,1,1,0,0,2,...,0,0,0.500,0.667,1.000,1.667,11,2.25,3.33,0.290
3,정성곤,2,2,2,0,1,0,0,0,1,...,0,0,0.500,0.500,0.500,1.000,5,1.94,0.65,0.271
4,양현종,9,9,9,3,3,1,0,1,7,...,0,0,0.333,0.333,0.778,1.111,20,0.80,1.85,0.244
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,이용찬,9,8,8,0,1,0,0,0,1,...,0,0,0.125,0.222,0.125,0.347,21,1.19,-1.79,-0.173
131,김민우,7,7,7,1,1,0,0,0,1,...,0,0,0.143,0.143,0.143,0.286,21,1.07,-1.35,-0.177
132,브리검,9,9,8,0,1,0,0,0,1,...,0,0,0.125,0.222,0.125,0.347,39,1.26,-0.75,-0.183
133,서진용,4,4,4,0,1,1,0,0,2,...,0,0,0.250,0.250,0.500,0.750,10,1.76,0.70,-0.263
