In [1]:
# 계산
import pandas as pd

# 차트
import matplotlib.pyplot as plt

# 데이터 검정
from scipy.stats import f_oneway

In [2]:
# 차트 경고 메세지 무시
import warnings
warnings.filterwarnings('ignore')

In [3]:
# 한글 폰트 문제 해결 함수
def fontKorea():
    # 한글 폰트 문제 해결 
    # matplotlib은 한글 폰트를 지원하지 않음
    # os정보
    import platform

    # font_manager : 폰트 관리 모듈
    # rc : 폰트 변경 모듈
    from matplotlib import font_manager, rc
    # unicode 설정
    plt.rcParams['axes.unicode_minus'] = False

    if platform.system() == 'Darwin':
        rc('font', family='AppleGothic') # os가 macos
    elif platform.system() == 'Windows':
        path = 'c:/Windows/Fonts/malgun.ttf' # os가 windows
        font_name = font_manager.FontProperties(fname=path).get_name()
        rc('font', family=font_name)
    else:
        print("Unknown System")

In [4]:
# 키움 관중 수 없는 데이터 가져오기
kiwoom_no_crowd = pd.read_csv('./Data/kiwoom.csv')
kiwoom_no_crowd

Unnamed: 0,날짜,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
0,2022.04.02(토),14:00,17:12,3:12,안우진,반즈,고척
1,2022.04.03(일),14:00,17:55,3:55,요키시,박세웅,고척
2,2022.04.05(화),18:30,21:34,3:04,애플러,임찬규,고척
3,2022.04.06(수),18:30,22:13,3:43,최원태,손주영,고척
4,2022.04.07(목),18:30,21:01,2:31,정찬헌,김윤식,고척
...,...,...,...,...,...,...,...
344,2024.06.04(화),18:30,21:32,3:02,이믿음,헤이수스,잠실
345,2024.06.05(수),18:30,21:57,3:27,최원태,하영민,잠실
346,2024.06.06(목),17:00,20:17,3:17,손주영,김인범,잠실
347,2024.06.07(금),18:31,21:42,3:11,조영건,원태인,고척


In [5]:
# 구장이 고척인 데이터만 가져오기
kiwoom_no_crowd = kiwoom_no_crowd[kiwoom_no_crowd['구장'] == '고척']
kiwoom_no_crowd

Unnamed: 0,날짜,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
0,2022.04.02(토),14:00,17:12,3:12,안우진,반즈,고척
1,2022.04.03(일),14:00,17:55,3:55,요키시,박세웅,고척
2,2022.04.05(화),18:30,21:34,3:04,애플러,임찬규,고척
3,2022.04.06(수),18:30,22:13,3:43,최원태,손주영,고척
4,2022.04.07(목),18:30,21:01,2:31,정찬헌,김윤식,고척
...,...,...,...,...,...,...,...
341,2024.05.31(금),18:30,21:23,2:53,김인범,오원석,고척
342,2024.06.01(토),17:01,19:53,2:52,조영건,시라카와,고척
343,2024.06.02(일),14:00,17:22,3:22,전준표,박종훈,고척
347,2024.06.07(금),18:31,21:42,3:11,조영건,원태인,고척


In [6]:
# 날짜 데이터 요일 제거
kiwoom_no_crowd['날짜'] = kiwoom_no_crowd['날짜'].str[:10].astype('datetime64[ns]')
kiwoom_no_crowd

Unnamed: 0,날짜,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
0,2022-04-02,14:00,17:12,3:12,안우진,반즈,고척
1,2022-04-03,14:00,17:55,3:55,요키시,박세웅,고척
2,2022-04-05,18:30,21:34,3:04,애플러,임찬규,고척
3,2022-04-06,18:30,22:13,3:43,최원태,손주영,고척
4,2022-04-07,18:30,21:01,2:31,정찬헌,김윤식,고척
...,...,...,...,...,...,...,...
341,2024-05-31,18:30,21:23,2:53,김인범,오원석,고척
342,2024-06-01,17:01,19:53,2:52,조영건,시라카와,고척
343,2024-06-02,14:00,17:22,3:22,전준표,박종훈,고척
347,2024-06-07,18:31,21:42,3:11,조영건,원태인,고척


In [7]:
# 정보 확인하기
kiwoom_no_crowd.info()

<class 'pandas.core.frame.DataFrame'>
Index: 175 entries, 0 to 348
Data columns (total 7 columns):
 #   Column   Non-Null Count  Dtype         
---  ------   --------------  -----         
 0   날짜       175 non-null    datetime64[ns]
 1   시작시간     175 non-null    object        
 2   종료시간     175 non-null    object        
 3   경기시간     175 non-null    object        
 4   홈팀 선발    175 non-null    object        
 5   어웨이팀 선발  175 non-null    object        
 6   구장       175 non-null    object        
dtypes: datetime64[ns](1), object(6)
memory usage: 10.9+ KB


In [8]:
# 2024년 06월 이전 데이터만 가져오기
kiwoom_no_crowd = kiwoom_no_crowd[kiwoom_no_crowd['날짜'] < '2024-06-01']
kiwoom_no_crowd

Unnamed: 0,날짜,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
0,2022-04-02,14:00,17:12,3:12,안우진,반즈,고척
1,2022-04-03,14:00,17:55,3:55,요키시,박세웅,고척
2,2022-04-05,18:30,21:34,3:04,애플러,임찬규,고척
3,2022-04-06,18:30,22:13,3:43,최원태,손주영,고척
4,2022-04-07,18:30,21:01,2:31,정찬헌,김윤식,고척
...,...,...,...,...,...,...,...
332,2024-05-19,14:01,17:13,3:12,김인범,오원석,고척
333,2024-05-21,18:31,21:53,3:22,하영민,신민혁,고척
334,2024-05-22,18:31,21:54,3:23,후라도,하트,고척
335,2024-05-23,18:30,21:14,2:44,헤이수스,김시훈,고척


In [9]:
# 키움 관중 수 있는 데이터 가져오기
kiwoom_crowd = pd.read_csv('./Data/kiwoom_heroes_info(2022~2024).csv')
kiwoom_crowd

Unnamed: 0,날짜,요일,홈팀,원정팀,장소,관중 수
0,2022-04-02,토,키움,롯데,고척,8257
1,2022-04-03,일,키움,롯데,고척,6115
2,2022-04-05,화,키움,LG,고척,2298
3,2022-04-06,수,키움,LG,고척,2304
4,2022-04-07,목,키움,LG,고척,2055
...,...,...,...,...,...,...
168,2024-05-22,수,키움,NC,고척,4337
169,2024-05-23,목,키움,NC,고척,3897
170,2024-05-31,금,키움,SSG,고척,5680
171,2024-06-01,토,키움,SSG,고척,10462


In [10]:
# 정보확인
kiwoom_crowd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173 entries, 0 to 172
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   날짜      173 non-null    object
 1   요일      173 non-null    object
 2   홈팀      173 non-null    object
 3   원정팀     173 non-null    object
 4   장소      173 non-null    object
 5   관중 수    173 non-null    int64 
dtypes: int64(1), object(5)
memory usage: 8.2+ KB


In [11]:
# 날짜 데이터 타입 변환
kiwoom_crowd['날짜'] = kiwoom_crowd['날짜'].astype('datetime64[ns]')
kiwoom_crowd

Unnamed: 0,날짜,요일,홈팀,원정팀,장소,관중 수
0,2022-04-02,토,키움,롯데,고척,8257
1,2022-04-03,일,키움,롯데,고척,6115
2,2022-04-05,화,키움,LG,고척,2298
3,2022-04-06,수,키움,LG,고척,2304
4,2022-04-07,목,키움,LG,고척,2055
...,...,...,...,...,...,...
168,2024-05-22,수,키움,NC,고척,4337
169,2024-05-23,목,키움,NC,고척,3897
170,2024-05-31,금,키움,SSG,고척,5680
171,2024-06-01,토,키움,SSG,고척,10462


In [12]:
# 정보 확인
kiwoom_crowd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173 entries, 0 to 172
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   날짜      173 non-null    datetime64[ns]
 1   요일      173 non-null    object        
 2   홈팀      173 non-null    object        
 3   원정팀     173 non-null    object        
 4   장소      173 non-null    object        
 5   관중 수    173 non-null    int64         
dtypes: datetime64[ns](1), int64(1), object(4)
memory usage: 8.2+ KB


In [13]:
# 2024년 06월 이전 데이터만 가져오기
kiwoom_crowd = kiwoom_crowd[kiwoom_crowd['날짜'] < '2024-06-01']
kiwoom_crowd

Unnamed: 0,날짜,요일,홈팀,원정팀,장소,관중 수
0,2022-04-02,토,키움,롯데,고척,8257
1,2022-04-03,일,키움,롯데,고척,6115
2,2022-04-05,화,키움,LG,고척,2298
3,2022-04-06,수,키움,LG,고척,2304
4,2022-04-07,목,키움,LG,고척,2055
...,...,...,...,...,...,...
166,2024-05-19,일,키움,SSG,고척,10049
167,2024-05-21,화,키움,NC,고척,3940
168,2024-05-22,수,키움,NC,고척,4337
169,2024-05-23,목,키움,NC,고척,3897


In [14]:
# 날짜가 중복된 데이터 확인
kiwoom_crowd[kiwoom_crowd.날짜.duplicated()]

Unnamed: 0,날짜,요일,홈팀,원정팀,장소,관중 수
136,2023-09-09,토,키움,한화,고척,10028


In [15]:
# 데이터 확인
kiwoom_crowd[kiwoom_crowd.날짜 == '2023-09-09']

Unnamed: 0,날짜,요일,홈팀,원정팀,장소,관중 수
135,2023-09-09,토,키움,한화,고척,9962
136,2023-09-09,토,키움,한화,고척,10028


> 2023-09-09 2개 경기가 있는거 확인.(실제 데이터 확인 결과)

In [16]:
# 행 번호 재정렬
kiwoom_no_crowd = kiwoom_no_crowd.reset_index(drop=True)
kiwoom_no_crowd

Unnamed: 0,날짜,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
0,2022-04-02,14:00,17:12,3:12,안우진,반즈,고척
1,2022-04-03,14:00,17:55,3:55,요키시,박세웅,고척
2,2022-04-05,18:30,21:34,3:04,애플러,임찬규,고척
3,2022-04-06,18:30,22:13,3:43,최원태,손주영,고척
4,2022-04-07,18:30,21:01,2:31,정찬헌,김윤식,고척
...,...,...,...,...,...,...,...
166,2024-05-19,14:01,17:13,3:12,김인범,오원석,고척
167,2024-05-21,18:31,21:53,3:22,하영민,신민혁,고척
168,2024-05-22,18:31,21:54,3:23,후라도,하트,고척
169,2024-05-23,18:30,21:14,2:44,헤이수스,김시훈,고척


In [17]:
# 날짜가 중복된 데이터 확인
kiwoom_no_crowd[kiwoom_no_crowd.날짜.duplicated()]

Unnamed: 0,날짜,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
136,2023-09-09,17:47,21:25,3:38,이명종,남지민,고척


In [18]:
# 데이터 확인
kiwoom_no_crowd[kiwoom_no_crowd.날짜 == '2023-09-09']

Unnamed: 0,날짜,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
135,2023-09-09,14:00,17:16,3:16,김동혁,김기중,고척
136,2023-09-09,17:47,21:25,3:38,이명종,남지민,고척


In [19]:
# kiwoom_crowd 기준 병합
kiwoom = \
    pd.merge(
        kiwoom_crowd, kiwoom_no_crowd,
        on='날짜',
        how='outer'
        )
kiwoom

Unnamed: 0,날짜,요일,홈팀,원정팀,장소,관중 수,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
0,2022-04-02,토,키움,롯데,고척,8257,14:00,17:12,3:12,안우진,반즈,고척
1,2022-04-03,일,키움,롯데,고척,6115,14:00,17:55,3:55,요키시,박세웅,고척
2,2022-04-05,화,키움,LG,고척,2298,18:30,21:34,3:04,애플러,임찬규,고척
3,2022-04-06,수,키움,LG,고척,2304,18:30,22:13,3:43,최원태,손주영,고척
4,2022-04-07,목,키움,LG,고척,2055,18:30,21:01,2:31,정찬헌,김윤식,고척
...,...,...,...,...,...,...,...,...,...,...,...,...
168,2024-05-19,일,키움,SSG,고척,10049,14:01,17:13,3:12,김인범,오원석,고척
169,2024-05-21,화,키움,NC,고척,3940,18:31,21:53,3:22,하영민,신민혁,고척
170,2024-05-22,수,키움,NC,고척,4337,18:31,21:54,3:23,후라도,하트,고척
171,2024-05-23,목,키움,NC,고척,3897,18:30,21:14,2:44,헤이수스,김시훈,고척


In [20]:
# 날짜 중복된 데이터 확인
kiwoom[kiwoom.날짜 == '2023-09-09']

Unnamed: 0,날짜,요일,홈팀,원정팀,장소,관중 수,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
135,2023-09-09,토,키움,한화,고척,9962,14:00,17:16,3:16,김동혁,김기중,고척
136,2023-09-09,토,키움,한화,고척,9962,17:47,21:25,3:38,이명종,남지민,고척
137,2023-09-09,토,키움,한화,고척,10028,14:00,17:16,3:16,김동혁,김기중,고척
138,2023-09-09,토,키움,한화,고척,10028,17:47,21:25,3:38,이명종,남지민,고척


In [21]:
# kiwoom_crowd와 kiwoom_no_crowd은 날짜 기준, 시간 오름차순 정렬
# 136, 138행은 거짓
kiwoom = kiwoom.drop(index=[136,138])
kiwoom

Unnamed: 0,날짜,요일,홈팀,원정팀,장소,관중 수,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
0,2022-04-02,토,키움,롯데,고척,8257,14:00,17:12,3:12,안우진,반즈,고척
1,2022-04-03,일,키움,롯데,고척,6115,14:00,17:55,3:55,요키시,박세웅,고척
2,2022-04-05,화,키움,LG,고척,2298,18:30,21:34,3:04,애플러,임찬규,고척
3,2022-04-06,수,키움,LG,고척,2304,18:30,22:13,3:43,최원태,손주영,고척
4,2022-04-07,목,키움,LG,고척,2055,18:30,21:01,2:31,정찬헌,김윤식,고척
...,...,...,...,...,...,...,...,...,...,...,...,...
168,2024-05-19,일,키움,SSG,고척,10049,14:01,17:13,3:12,김인범,오원석,고척
169,2024-05-21,화,키움,NC,고척,3940,18:31,21:53,3:22,하영민,신민혁,고척
170,2024-05-22,수,키움,NC,고척,4337,18:31,21:54,3:23,후라도,하트,고척
171,2024-05-23,목,키움,NC,고척,3897,18:30,21:14,2:44,헤이수스,김시훈,고척


In [22]:
# 행 번호 재정렬
kiwoom = kiwoom.reset_index(drop=True)
kiwoom

Unnamed: 0,날짜,요일,홈팀,원정팀,장소,관중 수,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
0,2022-04-02,토,키움,롯데,고척,8257,14:00,17:12,3:12,안우진,반즈,고척
1,2022-04-03,일,키움,롯데,고척,6115,14:00,17:55,3:55,요키시,박세웅,고척
2,2022-04-05,화,키움,LG,고척,2298,18:30,21:34,3:04,애플러,임찬규,고척
3,2022-04-06,수,키움,LG,고척,2304,18:30,22:13,3:43,최원태,손주영,고척
4,2022-04-07,목,키움,LG,고척,2055,18:30,21:01,2:31,정찬헌,김윤식,고척
...,...,...,...,...,...,...,...,...,...,...,...,...
166,2024-05-19,일,키움,SSG,고척,10049,14:01,17:13,3:12,김인범,오원석,고척
167,2024-05-21,화,키움,NC,고척,3940,18:31,21:53,3:22,하영민,신민혁,고척
168,2024-05-22,수,키움,NC,고척,4337,18:31,21:54,3:23,후라도,하트,고척
169,2024-05-23,목,키움,NC,고척,3897,18:30,21:14,2:44,헤이수스,김시훈,고척


In [23]:
# 키움 전적 관련 데이터 가져오기
kiwoom_rank = pd.read_csv('./Data/rank/키움_rank.csv')
kiwoom_rank

Unnamed: 0,날짜,팀명,순위,총 경기수,승리,패배,무승부,승률,게임차,최근 10경기 전적,연속 승패 현황,홈 경기 전적,원정 경기 전적
0,2022-04-02,키움,6,1,0,1,0,0.000,1.0,0승0무1패,1패,0-0-1,0-0-0
1,2022-04-03,키움,4,2,1,1,0,0.500,1.0,1승0무1패,1승,1-0-1,0-0-0
2,2022-04-05,키움,6,3,1,2,0,0.333,2.0,1승0무2패,1패,1-0-2,0-0-0
3,2022-04-06,키움,7,4,1,3,0,0.250,3.0,1승0무3패,2패,1-0-3,0-0-0
4,2022-04-07,키움,7,5,1,4,0,0.200,4.0,1승0무4패,3패,1-0-4,0-0-0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,2024-05-30,키움,9,53,22,31,0,0.415,11.5,4승0무6패,1패,11-0-16,11-0-15
384,2024-05-31,키움,9,54,22,32,0,0.407,11.5,4승0무6패,2패,11-0-17,11-0-15
385,2024-06-01,키움,9,55,22,33,0,0.400,12.5,3승0무7패,3패,11-0-18,11-0-15
386,2024-06-02,키움,10,56,22,34,0,0.393,12.5,3승0무7패,4패,11-0-19,11-0-15


In [24]:
kiwoom_rank.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 388 entries, 0 to 387
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   날짜          388 non-null    object 
 1   팀명          388 non-null    object 
 2   순위          388 non-null    int64  
 3   총 경기수       388 non-null    int64  
 4   승리          388 non-null    int64  
 5   패배          388 non-null    int64  
 6   무승부         388 non-null    int64  
 7   승률          388 non-null    float64
 8   게임차         388 non-null    float64
 9   최근 10경기 전적  388 non-null    object 
 10  연속 승패 현황    388 non-null    object 
 11  홈 경기 전적     388 non-null    object 
 12  원정 경기 전적    388 non-null    object 
dtypes: float64(2), int64(5), object(6)
memory usage: 39.5+ KB


In [25]:
# 날짜기준으로 병합하기 위해 타입 변환
kiwoom_rank['날짜'] = kiwoom_rank['날짜'].astype('datetime64[ns]')
kiwoom_rank

Unnamed: 0,날짜,팀명,순위,총 경기수,승리,패배,무승부,승률,게임차,최근 10경기 전적,연속 승패 현황,홈 경기 전적,원정 경기 전적
0,2022-04-02,키움,6,1,0,1,0,0.000,1.0,0승0무1패,1패,0-0-1,0-0-0
1,2022-04-03,키움,4,2,1,1,0,0.500,1.0,1승0무1패,1승,1-0-1,0-0-0
2,2022-04-05,키움,6,3,1,2,0,0.333,2.0,1승0무2패,1패,1-0-2,0-0-0
3,2022-04-06,키움,7,4,1,3,0,0.250,3.0,1승0무3패,2패,1-0-3,0-0-0
4,2022-04-07,키움,7,5,1,4,0,0.200,4.0,1승0무4패,3패,1-0-4,0-0-0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
383,2024-05-30,키움,9,53,22,31,0,0.415,11.5,4승0무6패,1패,11-0-16,11-0-15
384,2024-05-31,키움,9,54,22,32,0,0.407,11.5,4승0무6패,2패,11-0-17,11-0-15
385,2024-06-01,키움,9,55,22,33,0,0.400,12.5,3승0무7패,3패,11-0-18,11-0-15
386,2024-06-02,키움,10,56,22,34,0,0.393,12.5,3승0무7패,4패,11-0-19,11-0-15


In [26]:
kiwoom_rank.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 388 entries, 0 to 387
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   날짜          388 non-null    datetime64[ns]
 1   팀명          388 non-null    object        
 2   순위          388 non-null    int64         
 3   총 경기수       388 non-null    int64         
 4   승리          388 non-null    int64         
 5   패배          388 non-null    int64         
 6   무승부         388 non-null    int64         
 7   승률          388 non-null    float64       
 8   게임차         388 non-null    float64       
 9   최근 10경기 전적  388 non-null    object        
 10  연속 승패 현황    388 non-null    object        
 11  홈 경기 전적     388 non-null    object        
 12  원정 경기 전적    388 non-null    object        
dtypes: datetime64[ns](1), float64(2), int64(5), object(5)
memory usage: 39.5+ KB


In [27]:
# kiwoom 기준으로 내부 조인 병합
kiwoom_new = kiwoom_rank.merge(kiwoom, on="날짜", how="inner")
kiwoom_new

Unnamed: 0,날짜,팀명,순위,총 경기수,승리,패배,무승부,승률,게임차,최근 10경기 전적,...,홈팀,원정팀,장소,관중 수,시작시간,종료시간,경기시간,홈팀 선발,어웨이팀 선발,구장
0,2022-04-02,키움,6,1,0,1,0,0.000,1.0,0승0무1패,...,키움,롯데,고척,8257,14:00,17:12,3:12,안우진,반즈,고척
1,2022-04-03,키움,4,2,1,1,0,0.500,1.0,1승0무1패,...,키움,롯데,고척,6115,14:00,17:55,3:55,요키시,박세웅,고척
2,2022-04-05,키움,6,3,1,2,0,0.333,2.0,1승0무2패,...,키움,LG,고척,2298,18:30,21:34,3:04,애플러,임찬규,고척
3,2022-04-06,키움,7,4,1,3,0,0.250,3.0,1승0무3패,...,키움,LG,고척,2304,18:30,22:13,3:43,최원태,손주영,고척
4,2022-04-07,키움,7,5,1,4,0,0.200,4.0,1승0무4패,...,키움,LG,고척,2055,18:30,21:01,2:31,정찬헌,김윤식,고척
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,2024-05-19,키움,7,45,19,26,0,0.422,10.0,4승0무6패,...,키움,SSG,고척,10049,14:01,17:13,3:12,김인범,오원석,고척
167,2024-05-21,키움,8,46,19,27,0,0.413,10.0,4승0무6패,...,키움,NC,고척,3940,18:31,21:53,3:22,하영민,신민혁,고척
168,2024-05-22,키움,8,47,19,28,0,0.404,10.0,4승0무6패,...,키움,NC,고척,4337,18:31,21:54,3:23,후라도,하트,고척
169,2024-05-23,키움,8,48,20,28,0,0.417,9.0,5승0무5패,...,키움,NC,고척,3897,18:30,21:14,2:44,헤이수스,김시훈,고척


In [29]:
# 데이터 저장
kiwoom_new.to_csv('./Data/kiwoom_new.csv', index=None)