## 라이브러리

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import itertools
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

## 데이터 불러오기 및 전처리

In [8]:
df = pd.read_csv('/content/drive/MyDrive/DATATHON/data/LG/LG_hitter_2024.csv', encoding='cp949')
columns_to_keep = ['선수명', '팀명', '타율', '경기', '타수', '홈런', '득점', '장타율', '출루율', '득점권타율', '고의4구', '희생플라이', '희생번트']
df = df[columns_to_keep]

df3 = pd.read_csv('/content/drive/MyDrive/DATATHON/data/LG/LG_runner_2024.csv', encoding='cp949')

df3 = df3[['선수명', '도루허용', '도루저지']]

df = pd.merge(df, df3, on='선수명', how='left')

# URL 지정 (스탯티즈에서 수비 승리 기여도 가져오기)
url = "https://statiz.sporki.com/stats/?m=main&m2=fielding&m3=default&so=&ob=&year=2024&sy=&ey=&te=5002&po=&lt=10100&reg=A&pe=&ds=&de=&we=&hr=&ha=&ct=&st=&vp=&bo=&pt=&pp=&ii=&vc=&um=&oo=&rr=&sc=&bc=&ba=&li=&as=&ae=&pl=&gc=&lr=&pr=1000&ph=&hs=&us=&na=&ls=1&sf1=G&sk1=&sv1=&sf2=G&sk2=&sv2="

# 웹페이지에서 테이블 읽기
dfs = pd.read_html(url)

# 첫 번째 테이블을 데이터프레임으로 선택
df5 = dfs[0]

df5.to_csv('New_defense.csv', index=False)

df5 = pd.read_csv('New_defense.csv')

#### 컬럼정리 ####
df5= df5[df5['Rank'] != 'Rank']

# 컬럼명 한글이름으로 바꾸기
columns_to_keep = ['Name', 'Team', 'Sort▼', 'IP']
df5 = df5[columns_to_keep].rename(columns={'Sort▼': '수비 승리 기여도', 'Name': '선수명', 'Team': '포지션',
                                            'IP' : '수비이닝'})

# 포지션 컬럼에서 앞 세 글자 제외
df5['포지션'] = df5['포지션'].str[3:]

position_mapping = {
    '1B': '1루수',    '2B': '2루수',    '3B': '3루수',    'SS': '유격수',
    'C': '포수',    'RF': '우익수',    'CF': '중견수',    'LF': '좌익수'
}

# 포지션 컬럼 값 변환
df5['포지션'] = df5['포지션'].map(position_mapping)

# '경기'와 '수비이닝' 컬럼을 float로 변환 (잘못된 형식이 있을 경우를 대비하여 정리)
df5['수비이닝'] = pd.to_numeric(df5['수비이닝'], errors='coerce')
df5['수비 승리 기여도'] = pd.to_numeric(df5['수비 승리 기여도'], errors='coerce')

# '포지션' 열의 NaN 값을 빈 문자열로 대체하고 문자열로 변환
df5['포지션'] = df5['포지션'].fillna('').astype(str)

# 수비이닝을 기준으로 포지션 정렬 함수
def aggregate_positions(group):
    sorted_positions = group.sort_values(by='수비이닝', ascending=False)
    return ', '.join(sorted_positions['포지션'])

# 선수명을 기준으로 그룹화하여 수비 승리 기여도의 평균을 계산하고 포지션을 결합
df5 = df5.groupby('선수명').apply(
    lambda x: pd.Series({
        '포지션': aggregate_positions(x),
        '수비 승리 기여도': x['수비 승리 기여도'].mean(),
        '수비이닝': x['수비이닝'].sum()
    })
).reset_index()

df = pd.merge(df, df5, on='선수명', how='left')
df = df.dropna()

df = df[df['경기'] >= 10]
df = df[df['타수'] >= 31]

df

Unnamed: 0,선수명,팀명,타율,경기,타수,홈런,득점,장타율,출루율,득점권타율,고의4구,희생플라이,희생번트,도루허용,도루저지,포지션,수비 승리 기여도,수비이닝
3,문성주,LG,0.331,75,248,0,41,0.411,0.431,0.418,0,3,1,13,7,"좌익수, 우익수, 중견수",0.053,562.0
4,홍창기,LG,0.315,74,270,4,54,0.396,0.447,0.365,2,2,1,8,8,"우익수, 중견수",0.229,594.0
5,김현수,LG,0.303,75,294,7,39,0.439,0.365,0.234,1,4,0,2,1,"좌익수, 1루수",-0.223,202.0
7,오스틴,LG,0.3,77,287,16,48,0.54,0.372,0.301,4,10,0,10,3,1루수,-1.036,558.0
8,문보경,LG,0.288,78,264,9,36,0.455,0.365,0.224,3,10,1,3,5,"3루수, 1루수",-0.567,620.0
9,김범석,LG,0.287,38,108,5,12,0.463,0.353,0.28,2,0,0,0,0,"1루수, 포수",-0.057,110.1
10,박동원,LG,0.279,68,215,13,34,0.516,0.375,0.281,1,5,1,1,0,포수,1.049,471.1
11,신민재,LG,0.276,76,214,0,41,0.318,0.38,0.333,0,3,6,24,5,2루수,-0.203,582.0
12,박해민,LG,0.272,78,272,1,40,0.346,0.337,0.268,0,7,4,26,7,중견수,0.527,605.0
13,구본혁,LG,0.272,70,184,2,28,0.348,0.341,0.3,0,3,1,3,3,"유격수, 2루수, 3루수",0.315,458.0


batting order에 도루허용, 도루저지, 고의4구, 희생플라이, 희생번트 값 넣어서 추정득점 구하기

In [10]:
df2 = pd.read_csv('/content/drive/MyDrive/DATATHON/data/LG/LG_batting_order_2024.csv', encoding='cp949')

filtered_names = df['선수명'].unique()
df2 = df2[df2['선수명'].isin(filtered_names)]

# 1루타 열 계산 (안타 - 2루타 - 3루타 - 홈런)
df2['1루타'] = df2['안타'] - df2['2루타'] - df2['3루타'] - df2['홈런']
df2 = pd.merge(df2, df[['선수명', '도루허용', '도루저지', '고의4구', '희생플라이', '희생번트', '수비 승리 기여도', '득점권타율']], on='선수명', how='left')

# XR 계산 함수 정의
def calculate_XR(row):
    XR = (row['1루타'] * 0.5 +
          row['2루타'] * 0.72 +
          row['3루타'] * 1.04 +
          row['홈런'] * 1.44 +
          (row['사구'] + row['볼넷'] - row['고의4구']) * 0.34 +
          row['고의4구'] * 0.25 +
          row['도루허용'] * 0.18 -
          row['도루저지'] * 0.32 -
          (row['타수'] - row['안타'] - row['삼진']) * 0.09 -
          row['삼진'] * 0.098 -
          row['병살타'] * 0.37 +
          row['희생플라이'] * 0.37 +
          row['희생번트'] * 0.04)  # 희생번트 값 추가
    return XR

# 출루율 계산 함수 정의
def calculate_obp(row):
    hits = row['안타']
    walks = row['볼넷']
    hbp = row['사구']
    at_bats = row['타수']
    sac_flies = row['희생플라이']
    obp = (hits + walks + hbp) / (at_bats + walks + hbp + sac_flies) if (at_bats + walks + hbp + sac_flies) > 0 else 0
    return obp

# 장타율 계산 함수 정의
def calculate_slg(row):
    singles = row['안타'] - row['2루타'] - row['3루타'] - row['홈런']
    doubles = row['2루타']
    triples = row['3루타']
    homers = row['홈런']
    at_bats = row['타수']
    slg = (singles + 2*doubles + 3*triples + 4*homers) / at_bats if at_bats > 0 else 0
    return slg

# 각 선수의 XR 계산
df2['추정득점'] = df2.apply(calculate_XR, axis=1)
df2['출루율'] = df2.apply(calculate_obp, axis=1)
df2['장타율'] = df2.apply(calculate_slg, axis=1)

# '순위' 컬럼 제외
df2.drop(columns=['순위'], axis=1, inplace=True)

# '상위(1~2번)', '중심(3~5번)', '하위(6~9번)' 타순 제외
excluded_orders = ['상위(1~2번)', '중심(3~5번)', '하위(6~9번)']
df2 = df2[~df2['타순'].isin(excluded_orders)]

# '타율' column을 float 타입으로 변환
df2['타율'] = df2['타율'].replace('-', np.nan).astype(float)

# '타수'가 10개 이상인 선수들만 선택
df2 = df2[df2['타수']>= 10]

# 결과 출력
df2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['타율'] = df2['타율'].replace('-', np.nan).astype(float)


Unnamed: 0,선수명,팀명,타율,타수,안타,2루타,3루타,홈런,타점,볼넷,...,도루허용,도루저지,고의4구,희생플라이,희생번트,수비 승리 기여도,득점권타율,추정득점,출루율,장타율
0,홍창기,LG,0.307,215,66,3,3,2,28,48,...,8,8,2,2,1,0.229,0.365,38.386,0.438662,0.376744
1,박해민,LG,0.28,82,23,0,1,0,10,6,...,26,7,0,7,4,0.527,0.268,13.802,0.3125,0.304878
5,문성주,LG,0.345,142,49,8,2,0,27,28,...,13,7,0,3,1,0.053,0.418,29.574,0.448276,0.429577
6,김현수,LG,0.333,21,7,0,0,1,4,0,...,2,1,1,4,0,-0.223,0.234,4.586,0.28,0.47619
7,홍창기,LG,0.326,46,15,1,0,2,8,8,...,8,8,2,2,1,0.229,0.365,9.588,0.440678,0.478261
8,박해민,LG,0.287,87,25,2,2,1,11,10,...,26,7,0,7,4,0.527,0.268,17.82,0.342857,0.390805
12,김범석,LG,0.318,22,7,1,0,0,3,0,...,0,0,2,0,0,-0.057,0.28,1.756,0.318182,0.363636
13,문보경,LG,0.308,13,4,2,0,0,2,2,...,3,5,3,10,1,-0.567,0.224,4.334,0.24,0.461538
14,김현수,LG,0.3,267,80,15,2,6,39,28,...,2,1,1,4,0,-0.223,0.234,42.296,0.365449,0.438202
21,오스틴,LG,0.3,287,86,17,2,16,66,36,...,10,3,4,10,0,-1.036,0.301,60.676,0.372024,0.54007


In [11]:
# 가중 평균 계산 함수
def calculate_weighted_average(records):
    weighted_avg = {}
    total_weight = records['타수'].sum()
    for column in records.columns:
        if column not in ['선수명', '팀명', '타순'] and pd.api.types.is_numeric_dtype(records[column]):
            weighted_avg[column] = (records[column] * records['타수']).sum() / total_weight
    return weighted_avg

# 각 선수에 대해 처리
unique_players = df2['선수명'].unique()
all_new_records = []

for player in unique_players:
    player_records = df2[df2['선수명'] == player]
    existing_orders = player_records['타순'].unique()
    all_orders = [f'{i}번' for i in range(1, 10)]
    missing_orders = [order for order in all_orders if order not in existing_orders]

    for order in missing_orders:
        weighted_avg = calculate_weighted_average(player_records)
        weighted_avg['타율'] = weighted_avg['안타'] / weighted_avg['타수'] if weighted_avg['타수'] != 0 else 0
        weighted_avg['선수명'] = player
        weighted_avg['팀명'] = player_records['팀명'].iloc[0]
        weighted_avg['타순'] = order
        all_new_records.append(weighted_avg)

# 새로운 기록들을 DataFrame으로 변환하고 기존 df2에 추가
new_records_df = pd.DataFrame(all_new_records)
df3 = pd.concat([df2, new_records_df], ignore_index=True)

# 기존 타율이 계산된 후에도 유지되도록 설정
df3['타율'] = df3.apply(lambda row: row['안타'] / row['타수'] if row['타수'] != 0 else row['타율'], axis=1)

# 타수 조정
df3 = df3[df3['타수'] >= 10]

# 소수점 셋째 자리까지 반올림할 열 목록
decimal_columns = ['타율', '수비 승리 기여도', '득점권타율', '추정득점', '출루율', '장타율']

# 소수점 없이 정수로 표시할 열 목록
integer_columns = ['타수', '안타', '2루타', '3루타', '홈런', '타점', '볼넷', '사구', '삼진', '병살타', '1루타', '도루허용', '도루저지', '고의4구', '희생플라이', '희생번트']

# 숫자 데이터를 소수점 셋째 자리까지 반올림
df3[decimal_columns] = df3[decimal_columns].round(3)

# NaN 값을 0으로 대체
df3[integer_columns] = df3[integer_columns].fillna(0)

# 숫자 데이터를 정수로 변환
df3[integer_columns] = df3[integer_columns].astype(int)

# 결과 출력
df2 = df3
df2

Unnamed: 0,선수명,팀명,타율,타수,안타,2루타,3루타,홈런,타점,볼넷,...,도루허용,도루저지,고의4구,희생플라이,희생번트,수비 승리 기여도,득점권타율,추정득점,출루율,장타율
0,홍창기,LG,0.307,215,66,3,3,2,28,48,...,8,8,2,2,1,0.229,0.365,38.386,0.439,0.377
1,박해민,LG,0.280,82,23,0,1,0,10,6,...,26,7,0,7,4,0.527,0.268,13.802,0.312,0.305
2,문성주,LG,0.345,142,49,8,2,0,27,28,...,13,7,0,3,1,0.053,0.418,29.574,0.448,0.430
3,김현수,LG,0.333,21,7,0,0,1,4,0,...,2,1,1,4,0,-0.223,0.234,4.586,0.280,0.476
4,홍창기,LG,0.326,46,15,1,0,2,8,8,...,8,8,2,2,1,0.229,0.365,9.588,0.441,0.478
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,허도환,LG,0.178,22,4,0,0,0,3,2,...,0,0,0,1,5,-0.246,0.316,2.785,0.318,0.205
104,허도환,LG,0.178,22,4,0,0,0,3,2,...,0,0,0,1,5,-0.246,0.316,2.785,0.318,0.205
105,허도환,LG,0.178,22,4,0,0,0,3,2,...,0,0,0,1,5,-0.246,0.316,2.785,0.318,0.205
106,허도환,LG,0.178,22,4,0,0,0,3,2,...,0,0,0,1,5,-0.246,0.316,2.785,0.318,0.205


## 라인업 만들기

In [12]:
df.replace('-', np.nan, inplace=True)

df['장타율'] = df['장타율'].astype(float)
df['출루율'] = df['출루율'].astype(float)
df['타율'] = df['타율'].astype(float)

### 중심타선

In [13]:
# 표준화 스케일러 생성
scaler = StandardScaler()

# 장타율과 득점권 타율 표준화
df[['장타율_표준화', '득점권 타율_표준화', '홈런_표준화']] = scaler.fit_transform(df[['장타율', '득점권타율', '홈런']])

# 표준화된 지표 합산
df['중심타선 합산 지표'] = df['장타율_표준화']*0.46 + df['득점권 타율_표준화']*0.33 + df['홈런_표준화']*0.56

# 표준화된 합산 지표 기준으로 상위 3명 선택
center_hitters_1 = df

# 결과 출력
center_hitters_1 = center_hitters_1.nlargest(3, '중심타선 합산 지표')
center_hitters_1.nlargest(3, '중심타선 합산 지표')

Unnamed: 0,선수명,팀명,타율,경기,타수,홈런,득점,장타율,출루율,득점권타율,...,희생번트,도루허용,도루저지,포지션,수비 승리 기여도,수비이닝,장타율_표준화,득점권 타율_표준화,홈런_표준화,중심타선 합산 지표
7,오스틴,LG,0.3,77,287,16,48,0.54,0.372,0.301,...,0,10,3,1루수,-1.036,558.0,1.583076,0.177314,2.163531,1.998306
10,박동원,LG,0.279,68,215,13,34,0.516,0.375,0.281,...,1,1,0,포수,1.049,471.1,1.308422,-0.126653,1.577914,1.443711
8,문보경,LG,0.288,78,264,9,36,0.455,0.365,0.224,...,1,3,5,"3루수, 1루수",-0.567,620.0,0.610343,-0.992956,0.797091,0.399453


In [14]:
# center_hitters의 선수명을 df2에서 찾기
center_hitters_names = center_hitters_1['선수명'].values
filtered_df2 = df2[df2['선수명'].isin(center_hitters_names)]

# '타순'이 '3번', '4번', '5번'인 것만 남기기
filtered_df2 = filtered_df2[filtered_df2['타순'].isin(['3번', '4번', '5번'])]

center_hitters = filtered_df2

# center_hitters_1에서 '포지션' 컬럼만 선택
center_hitters_1_position = center_hitters_1[['선수명', '포지션']]

# '선수명'을 기준으로 병합
center_hitters = pd.merge(center_hitters, center_hitters_1_position, on='선수명', how='left')

center_hitters

Unnamed: 0,선수명,팀명,타율,타수,안타,2루타,3루타,홈런,타점,볼넷,...,도루저지,고의4구,희생플라이,희생번트,수비 승리 기여도,득점권타율,추정득점,출루율,장타율,포지션
0,문보경,LG,0.308,13,4,2,0,0,2,2,...,5,3,10,1,-0.567,0.224,4.334,0.24,0.462,"3루수, 1루수"
1,오스틴,LG,0.3,287,86,17,2,16,66,36,...,3,4,10,0,-1.036,0.301,60.676,0.372,0.54,1루수
2,문보경,LG,0.311,151,47,8,0,7,26,19,...,5,3,10,1,-0.567,0.224,29.236,0.367,0.503,"3루수, 1루수"
3,박동원,LG,0.291,55,16,3,0,6,14,11,...,0,1,5,1,1.049,0.281,15.642,0.38,0.673,포수
4,문보경,LG,0.298,120,36,6,0,4,20,15,...,5,3,10,1,-0.567,0.224,22.747,0.342,0.466,"3루수, 1루수"
5,오스틴,LG,0.3,287,86,17,2,16,66,36,...,3,4,10,0,-1.036,0.301,60.676,0.372,0.54,1루수
6,오스틴,LG,0.3,287,86,17,2,16,66,36,...,3,4,10,0,-1.036,0.301,60.676,0.372,0.54,1루수
7,박동원,LG,0.275,53,14,2,0,3,10,9,...,0,1,5,1,1.049,0.281,12.137,0.353,0.526,포수
8,박동원,LG,0.275,53,14,2,0,3,10,9,...,0,1,5,1,1.049,0.281,12.137,0.353,0.526,포수


In [15]:
# 각 선수명에 따른 타순 값을 리스트로 모음
center_per_player = filtered_df2.groupby('선수명')['타순'].apply(list).reset_index()
center_per_player

Unnamed: 0,선수명,타순
0,문보경,"[3번, 5번, 4번]"
1,박동원,"[5번, 3번, 4번]"
2,오스틴,"[4번, 3번, 5번]"


### 2번타자

In [16]:
# 표준화 스케일러 생성
scaler = StandardScaler()

# 장타율, 출루율, 도루허용 표준화
df[['장타율_표준화', '출루율_표준화', '도루_표준화', '홈런_표준화']] = scaler.fit_transform(df[['장타율', '출루율', '도루허용', '홈런']])

# 2번타자 지표 합산 (가중치는 예시로 설정, 필요에 따라 조정 가능)
df['2번타자 합산 지표'] = df['장타율_표준화']*0.46 + df['출루율_표준화']*0.35 + df['도루_표준화']*0.33 + df['홈런_표준화']*0.56

# center_hitters에 있는 선수명 제외 (중심타선 3명 제외)
exclude_names = center_hitters['선수명'].tolist()
filtered_df = df[~df['선수명'].isin(exclude_names)]

# center_hitters의 주 포지션 확인
center_main_positions = center_hitters['포지션'].apply(lambda x: x.split(',')[0]).unique()

# 포지션별 필터링 함수
def filter_by_position(df, position, min_count, max_count, excluded_positions):
    if position in excluded_positions:
        return pd.DataFrame()  # 제외된 포지션이면 빈 데이터프레임 반환
    pos_df = df[df['포지션'].str.contains(position)]
    if len(pos_df) > max_count:
        pos_df = pos_df.nlargest(max_count, '2번타자 합산 지표')
    elif len(pos_df) < min_count:
        pos_df = df[df['포지션'].str.contains(position)].nlargest(min_count, '2번타자 합산 지표')
    return pos_df

# 포지션별로 필터링
catcher = filter_by_position(filtered_df, '포수', 1, 1, center_main_positions)
outfielders = filter_by_position(filtered_df, '좌익수|우익수|중견수', 1, 3, center_main_positions)
infielders = filter_by_position(filtered_df, '1루수|2루수|3루수|유격수', 1, 4, center_main_positions)

# 최종 데이터프레임 합치기
final_candidates = pd.concat([catcher, outfielders, infielders]).drop_duplicates()

# # 2번타자 합산 지표 기준으로 상위 1명 선택
second_hitter_1 = final_candidates.nlargest(1, '2번타자 합산 지표')
second_hitter_1

Unnamed: 0,선수명,팀명,타율,경기,타수,홈런,득점,장타율,출루율,득점권타율,...,포지션,수비 승리 기여도,수비이닝,장타율_표준화,득점권 타율_표준화,홈런_표준화,중심타선 합산 지표,출루율_표준화,도루_표준화,2번타자 합산 지표
4,홍창기,LG,0.315,74,270,4,54,0.396,0.447,0.365,...,"우익수, 중견수",0.229,594.0,-0.064849,1.150005,-0.178939,0.249465,2.138567,-0.048686,0.602396


In [17]:
# second_hitter의 선수명을 df2에서 찾기
second_hitter_names = second_hitter_1['선수명'].values
filtered_df2 = df2[df2['선수명'].isin(second_hitter_names)]

# '타순'이 '2번' 인 것만 남기기
filtered_df2 = filtered_df2[filtered_df2['타순'].isin(['2번'])]

second_hitter = filtered_df2
# second_hitters_1에서 '포지션' 컬럼만 선택
second_hitter_1_position = second_hitter_1[['선수명', '포지션']]

# '선수명'을 기준으로 병합
second_hitter = pd.merge(second_hitter, second_hitter_1_position, on='선수명', how='left')
second_hitter

Unnamed: 0,선수명,팀명,타율,타수,안타,2루타,3루타,홈런,타점,볼넷,...,도루저지,고의4구,희생플라이,희생번트,수비 승리 기여도,득점권타율,추정득점,출루율,장타율,포지션
0,홍창기,LG,0.326,46,15,1,0,2,8,8,...,8,2,2,1,0.229,0.365,9.588,0.441,0.478,"우익수, 중견수"


### 1번 타자

In [18]:
# 표준화 스케일러 생성
scaler = StandardScaler()

# 장타율, 출루율, 도루허용 표준화
df[['출루율_표준화', '도루_표준화']] = scaler.fit_transform(df[['출루율', '도루허용']])

# 1번타자 지표 합산 (가중치는 예시로 설정, 필요에 따라 조정 가능)
df['1번타자 합산 지표'] = df['출루율_표준화']*0.35 + df['도루_표준화']*0.33

# center_hitters와 second_hitter_1에 있는 선수명 제외
exclude_names = center_hitters['선수명'].tolist() + second_hitter_1['선수명'].tolist()
filtered_df = df[~df['선수명'].isin(exclude_names)]

# center_hitters와 second_hitter_1의 주 포지션 확인
center_main_positions = center_hitters['포지션'].apply(lambda x: x.split(',')[0]).unique()
second_hitter_main_positions = second_hitter_1['포지션'].apply(lambda x: x.split(',')[0]).unique()
excluded_positions = list(center_main_positions) + list(second_hitter_main_positions)

# 포지션별 필터링 함수
def filter_by_position(df, position, min_count, max_count, excluded_positions):
    if position in excluded_positions:
        return pd.DataFrame()  # 제외된 포지션이면 빈 데이터프레임 반환
    pos_df = df[df['포지션'].str.contains(position)]
    if len(pos_df) > max_count:
        pos_df = pos_df.nlargest(max_count, '1번타자 합산 지표')
    elif len(pos_df) < min_count:
        pos_df = df[df['포지션'].str.contains(position)].nlargest(min_count, '1번타자 합산 지표')
    return pos_df

# 포지션별로 필터링
catcher = filter_by_position(filtered_df, '포수', 1, 1, excluded_positions)
outfielders = filter_by_position(filtered_df, '좌익수|우익수|중견수', 1, 3, excluded_positions)
infielders = filter_by_position(filtered_df, '1루수|2루수|3루수|유격수', 1, 4, excluded_positions)

# 최종 데이터프레임 합치기
final_candidates = pd.concat([catcher, outfielders, infielders]).drop_duplicates()

# 1번타자 합산 지표 기준으로 상위 1명 선택
first_hitter_1 = final_candidates.nlargest(1, '1번타자 합산 지표')
first_hitter_1

Unnamed: 0,선수명,팀명,타율,경기,타수,홈런,득점,장타율,출루율,득점권타율,...,수비 승리 기여도,수비이닝,장타율_표준화,득점권 타율_표준화,홈런_표준화,중심타선 합산 지표,출루율_표준화,도루_표준화,2번타자 합산 지표,1번타자 합산 지표
3,문성주,LG,0.331,75,248,0,41,0.411,0.431,0.418,...,0.053,562.0,0.10681,1.955515,-0.959762,0.156986,1.700822,0.535548,0.283684,0.772018


In [19]:
# first_hitters의 선수명을 df2에서 찾기
first_hitter_names = first_hitter_1['선수명'].values
filtered_df2 = df2[df2['선수명'].isin(first_hitter_names)]

# '타순'이 '1번' 인 것만 남기기
filtered_df2 = filtered_df2[filtered_df2['타순'].isin(['1번'])]

first_hitter = filtered_df2
# first_hitter_1에서 '포지션' 컬럼만 선택
first_hitter_1_position = first_hitter_1[['선수명', '포지션']]

# '선수명'을 기준으로 병합
first_hitter = pd.merge(first_hitter, first_hitter_1_position, on='선수명', how='left')

first_hitter

Unnamed: 0,선수명,팀명,타율,타수,안타,2루타,3루타,홈런,타점,볼넷,...,도루저지,고의4구,희생플라이,희생번트,수비 승리 기여도,득점권타율,추정득점,출루율,장타율,포지션
0,문성주,LG,0.333,111,37,5,1,0,20,20,...,7,0,3,1,0.053,0.418,21.873,0.407,0.399,"좌익수, 우익수, 중견수"


### 하위타선

In [20]:
# 출루율과 수비 승리 기여도 표준화
scaler = StandardScaler()
df[['출루율_표준화', '수비_표준화']] = scaler.fit_transform(df[['출루율', '수비 승리 기여도']])

# 하위타선 지표 합산
df['하위타선 합산 지표'] = df['출루율_표준화']*0.35 + df['수비_표준화']*0.10

# center_hitters, second_hitter_1, first_hitter_1에 있는 선수명 제외
exclude_names = center_hitters['선수명'].tolist() + second_hitter_1['선수명'].tolist() + first_hitter_1['선수명'].tolist()
filtered_df = df[~df['선수명'].isin(exclude_names)]

# center_hitters, second_hitter_1, first_hitter_1의 주 포지션 확인
center_main_positions = center_hitters['포지션'].apply(lambda x: x.split(',')[0]).unique()
second_hitter_main_positions = second_hitter_1['포지션'].apply(lambda x: x.split(',')[0]).unique()
first_hitter_main_positions = first_hitter_1['포지션'].apply(lambda x: x.split(',')[0]).unique()
excluded_positions = list(center_main_positions) + list(second_hitter_main_positions) + list(first_hitter_main_positions)

# 포지션별 필터링 함수
def filter_by_position(df, position, excluded_positions):
    if position in excluded_positions:
        return pd.DataFrame()  # 제외된 포지션이면 빈 데이터프레임 반환
    return df[df['포지션'].str.contains(position)]

# 포지션별로 필터링
catchers = filter_by_position(filtered_df, '포수', excluded_positions)
outfielders = filter_by_position(filtered_df, '좌익수|우익수|중견수', excluded_positions)
infielders = filter_by_position(filtered_df, '1루수|2루수|3루수|유격수', excluded_positions)

# 상위순으로 나열하기
bottom_candidates = pd.concat([catchers, outfielders, infielders]).drop_duplicates()

# 포수가 center_hitters, second_hitter_1, first_hitter_1에 없는지 확인
if not any('포수' in pos for pos in center_main_positions) and \
   not any('포수' in pos for pos in second_hitter_main_positions) and \
   not any('포수' in pos for pos in first_hitter_main_positions):
    if '포수' not in bottom_candidates['포지션'].values:
        # 포수를 제외한 나머지 선수들 중 상위 3명 선택
        bottom_candidates = bottom_candidates.iloc[:-1]
        # filtered_df에서 포지션이 포수인 선수 추가
        catcher = filtered_df[filtered_df['포지션'].str.contains('포수')].nlargest(1, '하위타선 합산 지표')
        bottom_candidates = pd.concat([bottom_candidates, catcher])

# 포수가 2명 이상 포함된 경우 처리
if bottom_candidates['포지션'].str.contains('포수').sum() > 1:
    # 포수들 중에서 하위타선 합산 지표가 가장 높은 선수 선택
    best_catcher = bottom_candidates[bottom_candidates['포지션'].str.contains('포수')].nlargest(1, '하위타선 합산 지표')
    # 포수를 제외한 나머지 선수들 중 상위 3명 선택
    bottom_candidates = bottom_candidates[~bottom_candidates['포지션'].str.contains('포수')].nlargest(3, '하위타선 합산 지표')
    # 다시 포수 추가하여 상위 4명 선택
    bottom_candidates = pd.concat([bottom_candidates, best_catcher])

# 결과 출력
bottom_hitters_1 = bottom_candidates.nlargest(4, '하위타선 합산 지표').drop_duplicates()

# 앞에서 선택한 선수들의 포지션
selected_positions = set(center_main_positions) | set(second_hitter_main_positions) | set(first_hitter_main_positions)

# 부족한 포지션이 있는지 확인
missing_positions = {'포수', '좌익수', '우익수', '중견수', '1루수', '2루수', '3루수', '유격수'} - selected_positions

# 부족한 포지션이 있다면 처리
if missing_positions:
    for position in missing_positions:
        if not any(position in pos for pos in bottom_hitters_1['포지션']):
            # 점수가 낮은 선수 제거
            lowest_score_player = bottom_hitters_1.nsmallest(1, '하위타선 합산 지표')
            bottom_hitters_1 = bottom_hitters_1.drop(lowest_score_player.index)
            # 해당 포지션의 선수 추가
            position_candidate = filtered_df[filtered_df['포지션'].str.contains(position)].nlargest(1, '타수')
            bottom_hitters_1 = pd.concat([bottom_hitters_1, position_candidate])

bottom_hitters_1 = bottom_hitters_1.nlargest(4, '하위타선 합산 지표').drop_duplicates()
bottom_hitters_1

Unnamed: 0,선수명,팀명,타율,경기,타수,홈런,득점,장타율,출루율,득점권타율,...,장타율_표준화,득점권 타율_표준화,홈런_표준화,중심타선 합산 지표,출루율_표준화,도루_표준화,2번타자 합산 지표,1번타자 합산 지표,수비_표준화,하위타선 합산 지표
11,신민재,LG,0.276,76,214,0,41,0.318,0.38,0.333,...,-0.957475,0.663659,-0.959762,-0.758898,0.30551,1.820862,-0.270092,0.707813,-0.427164,0.064212
5,김현수,LG,0.303,75,294,7,39,0.439,0.365,0.234,...,0.42724,-0.840973,0.406679,0.146749,-0.104876,-0.749767,0.140141,-0.28413,-0.465763,-0.083283
16,오지환,LG,0.238,54,168,2,31,0.381,0.35,0.152,...,-0.236508,-2.087233,-0.56935,-1.116417,-0.515262,0.301854,-0.50836,-0.08073,0.696072,-0.110735
12,박해민,LG,0.272,78,272,1,40,0.346,0.337,0.268,...,-0.637045,-0.32423,-0.764556,-0.828188,-0.87093,2.054556,-0.348014,0.373178,0.981706,-0.206655


In [21]:
# bottom_hitters의 선수명을 df2에서 찾기
bottom_hitters_names = bottom_hitters_1['선수명'].values
filtered_df2 = df2[df2['선수명'].isin(bottom_hitters_names)]

# '타순'이 '6-9번' 인 것만 남기기
filtered_df2 = filtered_df2[filtered_df2['타순'].isin(['6번', '7번', '8번', '9번'])]

bottom_hitters = filtered_df2
# bottom_hitters_1에서 '포지션' 컬럼만 선택
bottom_hitters_1_position = bottom_hitters_1[['선수명', '포지션']]

# '선수명'을 기준으로 병합
bottom_hitters = pd.merge(bottom_hitters, bottom_hitters_1_position, on='선수명', how='left')

bottom_hitters

Unnamed: 0,선수명,팀명,타율,타수,안타,2루타,3루타,홈런,타점,볼넷,...,도루저지,고의4구,희생플라이,희생번트,수비 승리 기여도,득점권타율,추정득점,출루율,장타율,포지션
0,오지환,LG,0.242,62,15,6,0,0,6,11,...,3,0,0,1,0.379,0.152,8.82,0.356,0.339,유격수
1,박해민,LG,0.333,21,7,2,0,0,2,0,...,7,0,7,4,0.527,0.268,7.83,0.25,0.429,중견수
2,오지환,LG,0.25,56,14,6,1,1,7,12,...,3,0,0,1,0.379,0.152,10.994,0.391,0.446,유격수
3,박해민,LG,0.246,69,17,3,2,0,7,7,...,7,0,7,4,0.527,0.268,14.022,0.314,0.348,중견수
4,오지환,LG,0.231,13,3,2,0,1,2,1,...,3,0,0,1,0.379,0.152,3.68,0.333,0.615,유격수
5,신민재,LG,0.2,10,2,0,0,0,0,3,...,5,0,3,6,-0.203,0.333,5.354,0.312,0.2,2루수
6,신민재,LG,0.282,195,55,7,1,0,24,32,...,5,0,3,6,-0.203,0.333,30.59,0.381,0.328,2루수
7,박해민,LG,0.276,75,20,1,1,0,8,7,...,7,0,7,4,0.527,0.268,14.726,0.318,0.355,중견수
8,박해민,LG,0.276,75,20,1,1,0,8,7,...,7,0,7,4,0.527,0.268,14.726,0.318,0.355,중견수
9,김현수,LG,0.3,249,74,13,1,5,36,25,...,1,1,4,0,-0.223,0.234,39.546,0.359,0.441,"좌익수, 1루수"


In [22]:
# 각 선수명에 따른 타순 값을 리스트로 모음
bottom_per_player = filtered_df2.groupby('선수명')['타순'].apply(list).reset_index()
bottom_per_player

Unnamed: 0,선수명,타순
0,김현수,"[6번, 7번, 8번, 9번]"
1,박해민,"[7번, 8번, 6번, 9번]"
2,신민재,"[8번, 9번, 6번, 7번]"
3,오지환,"[6번, 7번, 8번, 9번]"


## 만들어진 라인업 결합

### 상위타선

In [23]:
# first_hitter와 second_hitter 병합
df = pd.concat([first_hitter, second_hitter], ignore_index=True)
df

Unnamed: 0,선수명,팀명,타율,타수,안타,2루타,3루타,홈런,타점,볼넷,...,도루저지,고의4구,희생플라이,희생번트,수비 승리 기여도,득점권타율,추정득점,출루율,장타율,포지션
0,문성주,LG,0.333,111,37,5,1,0,20,20,...,7,0,3,1,0.053,0.418,21.873,0.407,0.399,"좌익수, 우익수, 중견수"
1,홍창기,LG,0.326,46,15,1,0,2,8,8,...,8,2,2,1,0.229,0.365,9.588,0.441,0.478,"우익수, 중견수"


### 중심타선

In [24]:
# 선수별 가능한 타순을 딕셔너리로 변환
players = center_per_player.set_index('선수명')['타순'].to_dict()

# 가능한 타순 조합 생성
possible_orders = list(itertools.product(*players.values()))

# 타순이 중복되지 않는 조합 필터링
valid_orders = [order for order in possible_orders if len(set(order)) == len(players)]

# 타순과 선수 이름을 매칭하여 조합 생성
combinations = []
for order in valid_orders:
    order_dict = {order[i]: player for i, player in enumerate(players.keys())}
    combinations.append(order_dict)

# 각 조합을 center_hitters에서 찾아오기
results = []
for comb in combinations:
    filtered_center_hitters = center_hitters[center_hitters.apply(lambda row: row['타순'] in comb and comb[row['타순']] == row['선수명'], axis=1)]
    if len(filtered_center_hitters) == len(players):  # 모든 타순이 일치하는 경우
        results.append(filtered_center_hitters)

# 결과를 각각 center_1, center_2 등의 DataFrame으로 저장
center_dfs = []
for i, result in enumerate(results):
    center_df = result.reset_index(drop=True)
    center_dfs.append(center_df)
    globals()[f'center_{i+1}'] = center_df

In [25]:
# center_dfs와 df를 각각 concat
concat_dfs = []
for i, center_df in enumerate(center_dfs):
    concat_df = pd.concat([df, center_df], ignore_index=True)
    concat_dfs.append(concat_df)
    globals()[f'df_{i+1}'] = concat_df

### 하위타선

In [26]:
# 선수별 가능한 타순을 딕셔너리로 변환
players = bottom_per_player.set_index('선수명')['타순'].to_dict()

# 가능한 타순 조합 생성
possible_orders = list(itertools.product(*players.values()))

# 타순이 중복되지 않는 조합 필터링
valid_orders = [order for order in possible_orders if len(set(order)) == len(players)]

# 타순과 선수 이름을 매칭하여 조합 생성
combinations = []
for order in valid_orders:
    order_dict = {order[i]: player for i, player in enumerate(players.keys())}
    combinations.append(order_dict)

# 각 조합을 bottom_hitters에서 찾아오기
results = []
for comb in combinations:
    filtered_bottom_hitters = bottom_hitters[bottom_hitters.apply(lambda row: row['타순'] in comb and comb[row['타순']] == row['선수명'], axis=1)]
    if len(filtered_bottom_hitters) == len(players):  # 모든 타순이 일치하는 경우
        results.append(filtered_bottom_hitters)

# 결과를 각각 bottom_1, bottom_2 등의 DataFrame으로 저장
bottom_dfs = []
for i, result in enumerate(results):
    bottom_df = result.reset_index(drop=True)
    bottom_dfs.append(bottom_df)
    globals()[f'bottom_{i+1}'] = bottom_df

In [27]:
# 모든 bottom_n과 df_n을 각각 concat하여 n*n개의 DataFrame 생성 및 출력
final_dfs = []
count = 1
for i in range(len(concat_dfs)):
    for j in range(len(bottom_dfs)):
        final_df = pd.concat([concat_dfs[i], bottom_dfs[j]], ignore_index=True)
        final_dfs.append(final_df)
        globals()[f'final_df_{count}'] = final_df
        count += 1

# final_df_n의 이름을 df_n으로 변경
for i in range(1, len(final_dfs) + 1):
    globals()[f'df_{i}'] = globals().pop(f'final_df_{i}')

## 베스트 라인업

In [28]:
# 모델 생성
df = pd.read_csv('/content/drive/MyDrive/DATATHON/data/@KBO_5개구단/KBO_top5.csv', encoding='cp949')

df = df.drop(columns=['선수명', '년도'])

# '타율' 열에서 '-' 값을 가진 행을 제거
df = df[df['타율'] != '-']

# 나머지 열에서 '-' 값을 0으로 변경
df.replace('-', 0, inplace=True)

# 특징과 타겟 설정
features = ['출루율', '장타율', '홈런', '수비 승리 기여도', '득점권타율', '도루허용']
target = '추정득점'

X = df[features]
y = df[target]

# 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)

# 모델 학습
model = RandomForestRegressor(n_estimators=50, max_depth=20, max_features=0.3, random_state=42)


model.fit(X_train, y_train)
predictions = model.predict(X_test)

# 모든 df_1부터 df_144까지의 라인업에 대한 추정득점 예측
predicted_scores = {}
for i in range(1, 145):
    lineup_df = globals()[f'df_{i}']
    lineup_features = lineup_df[features]
    predicted_score = model.predict(lineup_features)
    total_predicted_score = predicted_score.mean()  # 각 선수의 예측 점수의 평균
    predicted_scores[f'df_{i}'] = total_predicted_score

best_lineup = max(predicted_scores, key=predicted_scores.get)
best_score = predicted_scores[best_lineup]

# MSE 계산
mse = mean_squared_error(y_test, predictions)
print(f"MSE: {mse:.2f}")
# RMSE 계산
rmse = mean_squared_error(y_test, predictions, squared=False)
print(f"RMSE: {rmse:.2f}")
# MAE 계산
mae = mean_absolute_error(y_test, predictions)
print(f"MAE: {mae:.2f}")
# R2 계산
r2 = r2_score(y_test, predictions)
print(f"R2 Score: {r2:.4f}")

# 인덱스를 1번부터 9번까지로 변경
best_lineup_df = globals()[best_lineup].copy()
best_lineup_df.index = range(1, len(best_lineup_df) + 1)

print(f"\n가장 높은 점수를 받은 라인업: {best_lineup} with score {best_score}")
print(f'편차: {predicted_score.max()-predicted_score.min()}')
best_lineup_df

MSE: 158.59
RMSE: 12.59
MAE: 8.09
R2 Score: 0.8294

가장 높은 점수를 받은 라인업: df_51 with score 52.830666666666666
편차: 61.97200000000002


Unnamed: 0,선수명,팀명,타율,타수,안타,2루타,3루타,홈런,타점,볼넷,...,도루저지,고의4구,희생플라이,희생번트,수비 승리 기여도,득점권타율,추정득점,출루율,장타율,포지션
1,문성주,LG,0.333,111,37,5,1,0,20,20,...,7,0,3,1,0.053,0.418,21.873,0.407,0.399,"좌익수, 우익수, 중견수"
2,홍창기,LG,0.326,46,15,1,0,2,8,8,...,8,2,2,1,0.229,0.365,9.588,0.441,0.478,"우익수, 중견수"
3,오스틴,LG,0.3,287,86,17,2,16,66,36,...,3,4,10,0,-1.036,0.301,60.676,0.372,0.54,1루수
4,문보경,LG,0.311,151,47,8,0,7,26,19,...,5,3,10,1,-0.567,0.224,29.236,0.367,0.503,"3루수, 1루수"
5,박동원,LG,0.275,53,14,2,0,3,10,9,...,0,1,5,1,1.049,0.281,12.137,0.353,0.526,포수
6,오지환,LG,0.25,56,14,6,1,1,7,12,...,3,0,0,1,0.379,0.152,10.994,0.391,0.446,유격수
7,박해민,LG,0.246,69,17,3,2,0,7,7,...,7,0,7,4,0.527,0.268,14.022,0.314,0.348,중견수
8,신민재,LG,0.282,195,55,7,1,0,24,32,...,5,0,3,6,-0.203,0.333,30.59,0.381,0.328,2루수
9,김현수,LG,0.3,249,74,13,1,5,36,25,...,1,1,4,0,-0.223,0.234,39.546,0.359,0.441,"좌익수, 1루수"


In [29]:
# 각 포지션 개수 세기
from collections import Counter

# 모든 포지션을 담을 리스트
all_positions = []

# 각 선수의 포지션을 분리하여 리스트에 추가
for positions in best_lineup_df['포지션']:
    all_positions.extend(positions.split(', '))

# 각 포지션의 개수 계산
position_counts = Counter(all_positions)

# 결과 출력
print(position_counts)

Counter({'중견수': 3, '1루수': 3, '좌익수': 2, '우익수': 2, '3루수': 1, '포수': 1, '유격수': 1, '2루수': 1})


# 기준라인업

```
LG
1. 홍창기
2. 문성주
3. 김현수
4. 오스틴
5. 문보경
6. 박동원
7. 구본혁
8. 박해민
9. 신민재
```

In [30]:
# 최적 라인업
optimal_lineup = eval(best_lineup)['선수명'].tolist()

# 기준 라인업
standard_lineup = ['홍창기', '문성주', '김현수', '오스틴', '문보경', '박동원', '구본혁', '박해민', '신민재']

# 선수 이름 중복 일치율 계산
common_players = set(optimal_lineup) & set(standard_lineup)
name_match_rate = len(common_players) / len(optimal_lineup) * 100

# 타순까지 고려한 일치율 계산
exact_match_count = sum([optimal_lineup[i] == standard_lineup[i] for i in range(len(optimal_lineup))])
exact_match_rate = exact_match_count / len(optimal_lineup) * 100

# 타순 차이를 고려한 일치율 계산
total_position_difference = sum([abs(optimal_lineup.index(player) - standard_lineup.index(player)) for player in common_players])
average_position_difference = total_position_difference / len(common_players)
worst_case_difference = len(optimal_lineup) - 1
position_difference_match_rate = (1 - (average_position_difference / worst_case_difference)) * 100

# 결과 출력
print(f"선수 이름 중복 일치율: {name_match_rate:.2f}%")
print(f"타순까지 고려한 일치율: {exact_match_rate:.2f}%")
print(f"타순 차이를 고려한 일치율: {position_difference_match_rate:.2f}%")

선수 이름 중복 일치율: 88.89%
타순까지 고려한 일치율: 0.00%
타순 차이를 고려한 일치율: 79.69%


In [31]:
# 기준 라인업의 선수와 타순을 기준으로 데이터를 필터링하여 standard_df 생성
standard_lineup = [
    ('홍창기', '1번'),
    ('문성주', '2번'),
    ('김현수', '3번'),
    ('오스틴', '4번'),
    ('문보경', '5번'),
    ('박동원', '6번'),
    ('구본혁', '7번'),
    ('박해민', '8번'),
    ('신민재', '9번')
]

standard_df = pd.DataFrame()

for player, order in standard_lineup:
    filtered_data = df2[(df2['선수명'] == player) & (df2['타순'] == order)]
    standard_df = pd.concat([standard_df, filtered_data])

standard_df.reset_index(drop=True, inplace=True)

# 결과 출력
standard_df

Unnamed: 0,선수명,팀명,타율,타수,안타,2루타,3루타,홈런,타점,볼넷,...,도루허용,도루저지,고의4구,희생플라이,희생번트,수비 승리 기여도,득점권타율,추정득점,출루율,장타율
0,홍창기,LG,0.307,215,66,3,3,2,28,48,...,8,8,2,2,1,0.229,0.365,38.386,0.439,0.377
1,문성주,LG,0.345,142,49,8,2,0,27,28,...,13,7,0,3,1,0.053,0.418,29.574,0.448,0.43
2,김현수,LG,0.3,267,80,15,2,6,39,28,...,2,1,1,4,0,-0.223,0.234,42.296,0.365,0.438
3,오스틴,LG,0.3,287,86,17,2,16,66,36,...,10,3,4,10,0,-1.036,0.301,60.676,0.372,0.54
4,문보경,LG,0.311,151,47,8,0,7,26,19,...,3,5,3,10,1,-0.567,0.224,29.236,0.367,0.503
5,박동원,LG,0.375,48,18,3,0,4,12,9,...,1,0,1,5,1,1.049,0.281,14.916,0.435,0.688
6,구본혁,LG,0.296,98,29,6,0,1,12,9,...,3,3,0,3,1,0.315,0.3,13.504,0.345,0.388
7,박해민,LG,0.246,69,17,3,2,0,7,7,...,26,7,0,7,4,0.527,0.268,14.022,0.314,0.348
8,신민재,LG,0.282,195,55,7,1,0,24,32,...,24,5,0,3,6,-0.203,0.333,30.59,0.381,0.328


In [32]:
# 데이터 로드
df = pd.read_csv('/content/drive/MyDrive/DATATHON/data/@KBO_5개구단/KBO_top5.csv', encoding='cp949')

# 필요없는 열 제거
df = df.drop(columns=['선수명', '년도'])

# '타율' 열에서 '-' 값을 가진 행 제거
df = df[df['타율'] != '-']

# 나머지 열에서 '-' 값을 0으로 변경
df.replace('-', 0, inplace=True)

# 특징과 타겟 설정
features = ['출루율', '장타율', '홈런', '수비 승리 기여도', '득점권타율', '도루허용']
target = '추정득점'

X = df[features]
y = df[target]

# 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=100)

# 모델 학습
model = RandomForestRegressor(n_estimators=50, max_depth=20, max_features=0.3, random_state=42)
model.fit(X_train, y_train)

# standard_df에 대한 추정득점 예측
standard_features = standard_df[features]
predicted_score = model.predict(standard_features)
total_predicted_score = predicted_score.mean()  # 각 선수의 예측 점수의 평균

print(f'standard_df에 대한 총 추정득점: {total_predicted_score}')

standard_df에 대한 총 추정득점: 52.93
