### '###' 검색하여 문제점 수정할 것

### import

In [2]:
import my_utils as mu
import pandas as pd
import time
import json

from tqdm import tqdm

### 사용 함수들

In [4]:
def rawdataLoader(datas: str):
    return json.loads(datas.replace("'", '"').replace('True', '1').replace('False', '0'))


def parseRawdata(rawdata: pd.DataFrame, parse_to: int, use_tqdm: bool=False) -> pd.DataFrame:
    """
    `parse_to`: 파싱 대상 컬럼의 시작 인덱스\n
    `use_tqdm`: tqdm 출력의 사용 여부
    """
    matches = {a:[] for a in rawdata.columns[parse_to:]}

    if use_tqdm: parse_data = tqdm(rawdata.iloc)
    else: parse_data = rawdata.iloc

    for rec in parse_data:
        for col in [a for a in rawdata.columns][parse_to:]:
            matches[col].append(rawdataLoader(rec[col]))
    for col in [a for a in rawdata.columns][parse_to:]:
        rawdata[col] = matches[col]
    return rawdata


def noneDropper(dataframe: pd.DataFrame) -> pd.DataFrame:
    """
    라인 정보에 문제가 있는 레코드를 제거하는 함수\n
    `dataframe`에 `lane` 컬럼 필요
    """
    return dataframe.drop(dataframe[dataframe['lane']=='NONE'].index).reset_index(drop=True)


def ripDropper(dataframe: pd.DataFrame, rip_cut: int=50) -> pd.DataFrame:
    """
    표본수가 너무 적은 레코드를 제거하는 함수\n
    `dataframe`에 `game_count` 컬럼 필요
    """
    return dataframe.drop(dataframe[dataframe['game_count'] < rip_cut].index).reset_index(drop=True)


def winRateAdder(dataframe: pd.DataFrame) -> pd.DataFrame:
    """
    챔피언, 라인별 승률을 추가하는 함수\n
    `dataframe`에 `win`, `game_count` 컬럼 필요
    """
    dataframe['win_rate'] = round(dataframe['win'] / dataframe['game_count'] * 100, 2)
    return dataframe


def pickRateAdder(dataframe: pd.DataFrame) -> pd.DataFrame:
    """
    챔피언, 라인별 픽률을 추가하는 함수\n
    `dataframe`에 `champ_id`, `lane`, `game_count` 컬럼 필요
    """
    champ_id = dataframe['champ_id'].tolist()
    lane = dataframe['lane'].tolist()
    game_count = dataframe['game_count'].tolist()
    
    ### tolist()와 Series 사용 간의 속도 차이 검증 필요.
    dataframe['pick_rate'] = [round(game_count[idx] / (dataframe[(dataframe['champ_id']==cid)&(dataframe['lane']==lane[idx])]['game_count'].sum()) * 100, 2)
                              for idx, cid in enumerate(champ_id)]
    return dataframe

### DB 데이터 파싱

In [59]:
need_cols = ['VERSION','GAME_ID','PARTICIPANT_NUMBER','CHAMPION_ID','LANE','GAME','CHAMPION','SPELL','SKILLTREE','RUNE','STARTITEM','ITEMTREE','ITEM','KDA','DAMAGE']
version = '13.9.506.4846'
update_time = time.strftime('%Y-%m-%d %H:%M:%S')

mu.rawdataIntegrityKeeper()
rawdata = mu.oracle_totalExecute(f"SELECT {','.join(need_cols)} FROM RAWDATA WHERE VERSION = '{version}'", debug_print=False)
rawdata.sort_values(['GAME_ID', 'PARTICIPANT_NUMBER'], ascending=[False, True], ignore_index=True, inplace=True)
rawdata = parseRawdata(rawdata, 5)
print('총 레코드 수:', len(rawdata))

RawData 테이블 무결성 상태 확인!
총 레코드 수: 5310


In [60]:
rawdata.head(1)

Unnamed: 0,VERSION,GAME_ID,PARTICIPANT_NUMBER,CHAMPION_ID,LANE,GAME,CHAMPION,SPELL,SKILLTREE,RUNE,STARTITEM,ITEMTREE,ITEM,KDA,DAMAGE
0,13.9.506.4846,KR_6497321016,1,69,TOP,"{'gameCreation': 1684169752418, 'gameStartTime...","{'championId': 69, 'championName': 'Cassiopeia...","{'summoner1Id': 12, 'summoner2Id': 4}","[3, 1, 2, 3, 3, 4, 3, 1, 1, 3, 4, 1, 1, 2, 2, 4]","{'runePrimaryStyle': 8200, 'runeCorePerk': 823...","[1056, 2003, 2003]","[6657, 3003, 3193]","{'item0': 1011, 'item1': 3193, 'item2': 1052, ...","{'kills': 6, 'deaths': 4, 'assists': 5, 'kda':...","{'teamDamagePercentage': 0.2929071651223167, '..."


In [91]:
rip = rawdata[['CHAMPION_ID','LANE']]
rip['GAME_COUNT'] = 1
rip = rip.groupby(['CHAMPION_ID','LANE']).sum().reset_index()
rip = rip.drop(rip[rip['GAME_COUNT']>=10].index).reset_index(drop=True)
rip
# for a in rip

# l = rip['LANE'].tolist()
# rip_list = [(c,l[i]) for i,c in enumerate(rip['CHAMPION_ID'].tolist())]
# rip_list

Unnamed: 0,CHAMPION_ID,LANE,GAME_COUNT
0,1,UTILITY,2
1,2,JUNGLE,2
2,2,TOP,7
3,3,UTILITY,2
4,4,JUNGLE,1
...,...,...,...
227,875,UTILITY,1
228,876,TOP,2
229,888,UTILITY,4
230,895,BOTTOM,5


In [65]:
la = rawdata['LANE'].tolist()
rawdata['rip_base'] = [(c, la[i]) for i,c in enumerate(rawdata['CHAMPION_ID'].tolist())]

In [66]:
rawdata.drop(rawdata[rawdata['rip_base'] in rip_list])

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
rawdata['rip_base'] == rip_list

In [68]:
rip_list

[(1, 'UTILITY'),
 (2, 'JUNGLE'),
 (2, 'TOP'),
 (3, 'UTILITY'),
 (4, 'JUNGLE'),
 (4, 'TOP'),
 (4, 'UTILITY'),
 (5, 'JUNGLE'),
 (5, 'TOP'),
 (6, 'JUNGLE'),
 (6, 'MIDDLE'),
 (7, 'TOP'),
 (7, 'UTILITY'),
 (8, 'TOP'),
 (9, 'MIDDLE'),
 (9, 'TOP'),
 (9, 'UTILITY'),
 (10, 'MIDDLE'),
 (11, 'TOP'),
 (12, 'MIDDLE'),
 (12, 'TOP'),
 (13, 'BOTTOM'),
 (13, 'MIDDLE'),
 (13, 'TOP'),
 (14, 'MIDDLE'),
 (14, 'UTILITY'),
 (17, 'JUNGLE'),
 (17, 'MIDDLE'),
 (17, 'UTILITY'),
 (18, 'MIDDLE'),
 (18, 'TOP'),
 (19, 'JUNGLE'),
 (19, 'TOP'),
 (19, 'UTILITY'),
 (20, 'MIDDLE'),
 (22, 'TOP'),
 (23, 'JUNGLE'),
 (23, 'MIDDLE'),
 (23, 'TOP'),
 (24, 'JUNGLE'),
 (24, 'MIDDLE'),
 (25, 'MIDDLE'),
 (26, 'MIDDLE'),
 (27, 'MIDDLE'),
 (27, 'TOP'),
 (27, 'UTILITY'),
 (29, 'BOTTOM'),
 (29, 'JUNGLE'),
 (29, 'UTILITY'),
 (30, 'TOP'),
 (31, 'BOTTOM'),
 (31, 'JUNGLE'),
 (31, 'MIDDLE'),
 (31, 'TOP'),
 (31, 'UTILITY'),
 (33, 'JUNGLE'),
 (33, 'MIDDLE'),
 (34, 'UTILITY'),
 (35, 'MIDDLE'),
 (35, 'TOP'),
 (36, 'JUNGLE'),
 (37, 'UTILITY'),
 

## 인터벌 테이블 리스트

### 챔피언/초기 정보 (완료!)

In [4]:
def getChampionInfo(rawdata: pd.DataFrame, version: str, update_time: str) -> pd.DataFrame:

    # 속도 개선을 위한 리스트화
    gameid_list = rawdata['GAME_ID'].tolist()
    champid_list = rawdata['CHAMPION_ID'].tolist()
    lane_list = rawdata['LANE'].tolist()
    win_list = [int(a['win']) for a in rawdata['CHAMPION'].tolist()]
    ban_list = [[b['championId'] for b in a['bans']] for a in rawdata['GAME'].tolist()]

    # 필요 데이터 추출
    need_datas = [{
        'version': version,
        'update_time': update_time,
        'game_id': game_id,
        'champ_id': champid_list[idx],
        'lane': lane_list[idx],
        'win': win_list[idx],
        'bans': ban_list[idx],
        'game_count': 1
    } for idx, game_id in enumerate(gameid_list)]
    champ_info = pd.DataFrame(need_datas)

    # 그룹화
    result = champ_info.groupby(['version','update_time','champ_id','lane']).sum().reset_index()

    # 라인 정보에 문제가 있는 레코드 제거
    result = noneDropper(result)
    # 표본수가 부족한 레코드 제거
    result = ripDropper(result, 50)
    # 승률 정보 추가
    result = winRateAdder(result)
    # 픽률 정보 추가
    result['pick_rate'] = round((result['game_count'] / len(rawdata)/10)*100, 2)

    # 밴률 정보 추가
    champs = mu.oracle_totalExecute('SELECT CHAMP_ID FROM CHAMPDATA', debug_print=False)
    champs = {a:0 for a in champs['CHAMP_ID']}
    champs[-1] = 0
    for idx in range(0, len(rawdata), 10):
        for cid in set(ban_list[idx]):
            champs[cid] += 1
    result['ban_rate'] = result['champ_id'].apply(lambda x: round(champs[x]/len(champ_info)*100, 2))

    ### 티어 산출 알고리즘 적용 필요!
    result['tier'] = 0

    result_cols = [
    'version','update_time','champ_id','lane',
    'tier','win_rate','pick_rate','ban_rate']

    return result[result_cols]

### 챔피언/룬 (완료!)

In [5]:
def getChampionRune(rawdata: pd.DataFrame, version: str, update_time: str) -> pd.DataFrame:
    
    # 속도 개선을 위한 리스트화
    champid_list = rawdata['CHAMPION_ID'].tolist()
    lane_list = rawdata['LANE'].tolist()
    rune_list =  rawdata['RUNE'].tolist()
    win_list = [int(a['win']) for a in rawdata['CHAMPION'].tolist()]

    # 필요 데이터 추출
    need_datas = [{
        'version': version,
        'update_time': update_time,
        'champ_id': champ_id,
        'lane': lane_list[idx],
        'runetype_core': rune_list[idx]['runePrimaryStyle'],
        'core_mainrune': rune_list[idx]['runeCorePerk'],
        'core_rune1': rune_list[idx]['runePrimaryPerk1'],
        'core_rune2': rune_list[idx]['runePrimaryPerk2'],
        'core_rune3': rune_list[idx]['runePrimaryPerk3'],
        'runetype_sub': rune_list[idx]['runeSubStyle'],
        'sub_rune1': rune_list[idx]['runeSubPerk1'],
        'sub_rune2': rune_list[idx]['runeSubPerk2'],
        'shard_offense': rune_list[idx]['runeShardOffense'],
        'shard_flex': rune_list[idx]['runeShardFlex'],
        'shard_defence': rune_list[idx]['runeShardDefense'],
        'win': win_list[idx],
        'game_count': 1
    } for idx, champ_id in enumerate(champid_list)]
    champ_rune = pd.DataFrame(need_datas)

    # 그룹화
    result = champ_rune.groupby([
        'version','update_time','champ_id','lane',
        'runetype_core','core_mainrune','core_rune1','core_rune2','core_rune3',
        'runetype_sub','sub_rune1','sub_rune2',
        'shard_offense','shard_flex','shard_defence']).sum().reset_index()

    # 라인 정보에 문제가 있는 레코드 제거
    result = noneDropper(result)
    # 표본수가 부족한 레코드 제거
    result = ripDropper(result, 50)
    # 승률 정보 추가
    result = winRateAdder(result)
    # 픽률 정보 추가
    result = pickRateAdder(result)
    
    # 테이블의 컬럼에 맞춰 정렬 후 리턴
    result_cols = [
        'version','update_time','champ_id','lane',
        'runetype_core','core_mainrune','core_rune1','core_rune2','core_rune3',
        'runetype_sub','sub_rune1','sub_rune2',
        'shard_offense','shard_flex','shard_defence',
        'pick_rate','game_count','win_rate']

    return result[result_cols]

### 챔피언/스킬 (완료!)

In [6]:
def getChampionSkill(rawdata: pd.DataFrame, version: str, update_time: str) -> pd.DataFrame:

    # 속도 개선을 위한 리스트화
    champid_list = rawdata['CHAMPION_ID'].tolist()
    lane_list = rawdata['LANE'].tolist()
    win_list = [int(a['win']) for a in rawdata['CHAMPION']]
    skilltree_list = rawdata['SKILLTREE'].tolist()

    # 필요 데이터 추출
    need_datas = [{
        'version': version,
        'update_time': update_time,
        'champ_id': champ_id,
        'lane': lane_list[idx],
        'skill_build': str(skilltree_list[idx][:15]),
        'level': len(skilltree_list[idx]), # 15레벨 이상의 데이터만 뽑기 위해 임의로 추가한 데이터
        'win': win_list[idx],
        'game_count': 1
    } for idx, champ_id in enumerate(champid_list)]
    champ_skilltree = pd.DataFrame(need_datas)

    # 15레벨 이하의 레코드 제거 후 불필요해진 level 컬럼 제거
    champ_skilltree.drop(champ_skilltree[champ_skilltree['level']<15].index, inplace=True)
    champ_skilltree.drop('level', axis=1, inplace=True)
    champ_skilltree.reset_index(drop=True, inplace=True)

    # 그룹화
    result = champ_skilltree.groupby(['version','update_time','champ_id','lane','skill_build']).sum().reset_index()

    # 라인 정보에 문제가 있는 레코드 제거
    result = noneDropper(result)
    # 표본수가 부족한 레코드 제거
    result = ripDropper(result, 50)
    # 승률 정보 추가
    result = winRateAdder(result)
    # 픽률 정보 추가
    result = pickRateAdder(result)

    # 테이블의 컬럼에 맞춰 정렬 후 리턴
    result_cols = [
        'version','update_time','champ_id','lane',
        'skill_build','pick_rate','game_count','win_rate']

    return result[result_cols]

### 챔피언/아이템/빌드 (완료!)

In [7]:
def getChampionItemBuild(rawdata: pd.DataFrame, version: str, update_time: str) -> pd.DataFrame:

    # 속도 개선을 위한 리스트화
    champid_list = rawdata['CHAMPION_ID'].to_list()
    lane_list = rawdata['LANE'].to_list()
    win_list = [int(a['win']) for a in rawdata['CHAMPION'].tolist()]
    itemtree_list = rawdata['ITEMTREE'].to_list()
    
    # 필요 데이터 추출
    need_datas = [{
        'version': version,
        'update_time': update_time,
        'champ_id': champ_id,
        'lane': lane_list[idx],
        'item_count': len(itemtree_list[idx]), # 아이템이 3개 이상인 데이터만 뽑기 위해 임의로 추가한 데이터
        'win': win_list[idx],
        'game_count': 1
    } for idx, champ_id in enumerate(champid_list)]
    champ_item_build = pd.DataFrame(need_datas)
    
    # 아이템 갯수가 3개 미만인 레코드 제거 후 불필요해진 item_count 컬럼 제거
    champ_item_build.drop(champ_item_build[champ_item_build['item_count']!=3].index, inplace=True)
    champ_item_build.drop('item_count', axis=1, inplace=True)

    # item1, 2, 3 컬럼 추가
    champ_item_build['item1'] = [itemtree_list[a][0] for a in champ_item_build.index]
    champ_item_build['item2'] = [itemtree_list[a][1] for a in champ_item_build.index]
    champ_item_build['item3'] = [itemtree_list[a][2] for a in champ_item_build.index]
    champ_item_build.reset_index(drop=True, inplace=True)

    # 그룹화
    result = champ_item_build.groupby(['version','update_time','champ_id','lane','item1','item2','item3']).sum().reset_index()
    
    # 라인 정보에 문제가 있는 레코드 제거
    result = noneDropper(result)
    # 표본수가 부족한 레코드 제거
    result = ripDropper(result, 50)
    # 승률 정보 추가
    result = winRateAdder(result)
    # 픽률 정보 추가
    result = pickRateAdder(result)

    # 테이블의 컬럼에 맞춰 정렬 후 리턴
    result_cols = [
        'version','update_time','champ_id','lane',
        'item1','item2','item3',
        'pick_rate','game_count','win_rate']
    
    return result[result_cols]

### 챔피언/아이템/신발 (완료!)

In [8]:
def getChampionItemBoots(rawdata: pd.DataFrame, version: str, update_time: str) -> pd.DataFrame:
    
    # 속도 개선을 위한 리스트화
    champid_list = rawdata['CHAMPION_ID'].tolist()
    lane_list = rawdata['LANE'].tolist()
    win_list = [int(a['win']) for a in rawdata['CHAMPION'].tolist()]
    item_list = rawdata['ITEM'].tolist()

    # 신발 id 리스트
    boots_id_list = [1001,2422,3006,3009,3020,3047,3111,3117,3158]

    # 신발이 있는 레코드의 인덱스와 해당 레코드에 있는 신발의 id를 리스트로 저장
    boots_list = []
    has_boots_index_list = []
    for idx, items in enumerate(item_list):
        is_boots = 0
        for item in items.values():
            if item in boots_id_list:
                is_boots = item
                has_boots_index_list.append(idx)
                break
        boots_list.append(is_boots)

    # 필요 데이터 추출
    need_datas = [{
        'version': version,
        'update_time': update_time,
        'champ_id': champid_list[idx],
        'lane': lane_list[idx],
        'boots': boots_list[idx],
        'win': win_list[idx],
        'game_count': 1
    } for idx in has_boots_index_list]
    champ_item_boots = pd.DataFrame(need_datas)

    # 그룹화
    result = champ_item_boots.groupby(['version','update_time','champ_id','lane','boots']).sum().reset_index()

    # 라인 정보에 문제가 있는 레코드 제거
    result = noneDropper(result)
    # 표본수가 부족한 레코드 제거
    result = ripDropper(result, 50)
    # 승률 정보 추가
    result = winRateAdder(result)
    # 픽률 정보 추가
    result = pickRateAdder(result)

    # 테이블의 컬럼에 맞춰 정렬 후 리턴
    result_cols = [
        'version','update_time','champ_id','lane',
        'boots','pick_rate','game_count','win_rate']
    
    return result[result_cols]

### 챔피언/아이템/시작 (완료!)

In [9]:
def getChampionItemStart(rawdata: pd.DataFrame, version: str, update_time: str) -> pd.DataFrame:

    # 속도 개선을 위한 리스트화
    champid_list = rawdata['CHAMPION_ID'].to_list()
    lane_list = rawdata['LANE'].tolist()
    win_list = [int(a['win']) for a in rawdata['CHAMPION']]

    # 정렬용 함수
    def sortItems(items: list) -> list:
        items.sort()
        return items
    # 시작 아이템은 그룹화를 위해 정렬 상태로 리스트화
    start_item_list = [sortItems(item) for item in rawdata['STARTITEM'].tolist()]
    
    # 필요 데이터 추출
    need_datas = [{
        'version': version,
        'update_time': update_time,
        'champ_id': champ_id,
        'lane': lane_list[idx],
        'start_item': str(start_item_list[idx]),
        'win': win_list[idx],
        'game_count': 1
    } for idx, champ_id in enumerate(champid_list)]
    champ_item_start = pd.DataFrame(need_datas)

    # 그룹화
    result = champ_item_start.groupby(['version','update_time','champ_id','lane','start_item']).sum().reset_index()
    
    # 라인 정보에 문제가 있는 레코드 제거
    result = noneDropper(result)
    # 표본수가 부족한 레코드 제거
    result = ripDropper(result, 50)
    # 승률 정보 추가
    result = winRateAdder(result)
    # 픽률 정보 추가
    result = pickRateAdder(result)

    # 테이블의 컬럼에 맞춰 정렬 후 리턴
    result_cols = [
        'version','update_time','champ_id','lane',
        'start_item','pick_rate','game_count','win_rate']
    
    return result[result_cols]

### 챔피언/스펠 (완료!)

In [10]:
def getChampionSpell(rawdata: pd.DataFrame, version: str, update_time: str) -> pd.DataFrame:
    
    # 속도 개선을 위한 리스트화
    champid_list = rawdata['CHAMPION_ID'].tolist()
    lane_list = rawdata['LANE'].tolist()
    win_list = [int(a['win']) for a in rawdata['CHAMPION']]
    spell_list = rawdata['SPELL'].tolist()

    # 필요 데이터 추출
    need_datas = [{
        'version': version,
        'update_time': update_time,
        'champ_id': champ_id,
        'lane': lane_list[idx],
        'spell1': spell_list[idx]['summoner1Id'],
        'spell2': spell_list[idx]['summoner2Id'],
        'win': win_list[idx],
        'game_count': 1
    } for idx, champ_id in enumerate(champid_list)]
    champ_spell = pd.DataFrame(need_datas)
    
    # 그룹화
    result = champ_spell.groupby(['version','update_time','champ_id','lane','spell1','spell2']).sum().reset_index()
    
    # 라인 정보에 문제가 있는 레코드 제거
    result = noneDropper(result)
    # 표본수가 부족한 레코드 제거
    result = ripDropper(result, 50)
    # 승률 정보 추가
    result = winRateAdder(result)
    # 픽률 정보 추가
    result = pickRateAdder(result)
    
    # 테이블의 컬럼에 맞춰 정렬 후 리턴
    result_cols = [
        'version','update_time','champ_id','lane',
        'spell1','spell2','pick_rate','game_count','win_rate']

    return result[result_cols]

### 챔피언/매치업 (작성 필요)

In [155]:
def getChampionMatchup(rawdata: pd.DataFrame, version: str, update_time: str) -> pd.DataFrame:

    #list화
    gameid_list = rawdata['GAME_ID'].tolist()
    champid_list = rawdata['CHAMPION_ID'].tolist()
    champion_list =  rawdata['CHAMPION'].tolist()
    kda_list = rawdata['KDA'].tolist()
    damage_list = rawdata['DAMAGE'].tolist()

    need_datas = [{
        'version': version,
        'update_time': update_time,
        'gameid': gameid_list[idx],
        'champ_id': champ_id,
        'lane': champion_list[idx]['teamPosition'],
        'win': int(champion_list[idx]['win']),
        'teamId': champion_list[idx]['teamId'],
        'kda': round(kda_list[idx]['kda'], 2),
        'killParticipation': round(kda_list[idx]['killParticipation'], 2),
        'totalDamageDealtToChampions': damage_list[idx]['totalDamageDealtToChampions']
    } for idx, champ_id in enumerate(champid_list)]
    champ_match = pd.DataFrame(need_datas)
    
    # 그룹화
    champ_match_100 = champ_match[champ_match['teamId']==100]
    champ_match_100.drop('teamId', axis=1, inplace=True)
    champ_match_200 = champ_match[champ_match['teamId']==200]
    champ_match_200.drop(['teamId','win'], axis=1, inplace=True)
    
    # champ_match_200 = champ_match_200.rename(columns={'champ_id':'opponent_Id','win':'opponent_win','teamId':'opponent_teamId',
    #                                              'kda':'opponent_kda','killParticipation':'opponent_killParticipation',
    #                                              'totalDamageDealtToChampions':'opponent_totalDamageDealtToChampions'})

    match_data = pd.merge(champ_match_100,champ_match_200, on=['version','update_time','gameid','lane'])
    match_data['matchup_count'] = 1
    xid = match_data['champ_id_x'].tolist()
    yid = match_data['champ_id_y'].tolist()
    def sort_callback(data: list):
        data.sort()
        return data
    match_data['champs'] = [str(sort_callback([x, yid[i]])) for i,x in enumerate(xid)]

    group_cols = ['version','update_time','gameid','lane','champs']

    match_win = match_data[group_cols+['win','matchup_count']].groupby(group_cols).sum()

    mean_cols = [
        'kda_x','kda_y',
        'killParticipation_x','killParticipation_y',
        'totalDamageDealtToChampions_x','totalDamageDealtToChampions_y']

    # return match_data.groupby(group_cols).mean()

    return match_win

    #둘이한 게임수
    match_info_game = match_info_rawdata.groupby(['version','update_time','lane','champ_id','opponent_Id']).count()[['win']]\
        .rename(columns = {'win':'game_count'}).reset_index()
    
    #승리한 100이 200을 승리한 수
    match_info_win = match_info_rawdata.groupby(['version','update_time','lane','champ_id','opponent_Id'])\
        .sum()[['win']].reset_index()
    match_info_game_win = match_info_game.merge(match_info_win)

    champ_match = winRateAdder(match_info_game_win)

    # return champ_match
    
    match_info_game_win['win_rate'] = round((match_info_game_win['win'] / match_info_game_win['game_count']) * 100, 2)
    match_info_game_win['pick_rate'] = round(match_info_game_win['game_count'] / (sum(match_info_game_win['game_count']) / 10) * 100, 2)
    
    # 새로운 열 추가
    match_info_game_win['champ_opponent'] = match_info_game_win[['champ_id', 'opponent_Id']].apply(lambda x: tuple(sorted(x)), axis=1)

    # 중복 제거
    match_info_game_win_unique = match_info_game_win.drop_duplicates(subset=['update_time', 'lane', 'champ_opponent'])
    
    result = match_info_game_win_unique[['update_time','lane','champ_id','opponent_Id','game_count','win_rate']]
    
    return result

In [156]:
champion_matchup = getChampionMatchup(rawdata, version, update_time)

In [158]:
# .sort_values('game_count', ascending=False)
champion_matchup.sort_values('win')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,win,matchup_count
version,update_time,gameid,lane,champs,Unnamed: 5_level_1,Unnamed: 6_level_1
13.9.506.4846,2023-05-17 13:57:20,KR_6492175791,MIDDLE,"[4, 112]",0,1
13.9.506.4846,2023-05-17 13:57:20,KR_6495517003,BOTTOM,"[81, 202]",0,1
13.9.506.4846,2023-05-17 13:57:20,KR_6495517003,JUNGLE,"[30, 79]",0,1
13.9.506.4846,2023-05-17 13:57:20,KR_6495517003,MIDDLE,"[99, 103]",0,1
13.9.506.4846,2023-05-17 13:57:20,KR_6495517003,TOP,"[78, 86]",0,1
13.9.506.4846,2023-05-17 13:57:20,...,...,...,...,...
13.9.506.4846,2023-05-17 13:57:20,KR_6491401584,TOP,"[106, 875]",1,1
13.9.506.4846,2023-05-17 13:57:20,KR_6491401584,MIDDLE,"[8, 166]",1,1
13.9.506.4846,2023-05-17 13:57:20,KR_6491401584,JUNGLE,"[200, 517]",1,1
13.9.506.4846,2023-05-17 13:57:20,KR_6491487384,MIDDLE,"[55, 127]",1,1


## 작동 테스트

In [120]:
# info = getChampionInfo(rawdata, version, update_time)
# rune = getChampionRune(rawdata, version, update_time)
# skill = getChampionSkill(rawdata, version, update_time)
# item_build = getChampionItemBuild(rawdata, version, update_time)
# item_boots = getChampionItemBoots(rawdata, version, update_time)
# item_start = getChampionItemStart(rawdata, version, update_time)
# spell = getChampionSpell(rawdata, version, update_time)

In [121]:
def updateIntervalTables(version: str):
    
    print('>> RawData 테이블 무결성 상태 확인 중...')
    mu.rawdataIntegrityKeeper()

    print('>> RawData에서 데이터 가져오는 중...')
    need_cols = ['VERSION','GAME_ID','PARTICIPANT_NUMBER','CHAMPION_ID','LANE','GAME','CHAMPION','SPELL','SKILLTREE','RUNE','STARTITEM','ITEMTREE','ITEM']
    update_time = time.strftime('%Y-%m-%d %H:%M:%S')
    rawdata = mu.oracle_totalExecute(f"SELECT {','.join(need_cols)} FROM RAWDATA WHERE VERSION = '{version}'", debug_print=False)
    print('>> 총 레코드 수:', len(rawdata))

    print('>> 가져온 데이터 정렬 및 파싱 중...')
    rawdata.sort_values(['GAME_ID', 'PARTICIPANT_NUMBER'], ascending=[False, True], ignore_index=True, inplace=True)
    rawdata = parseRawdata(rawdata, 5)
    
    print('>> CHAMPION_INFO 삽입 중...')
    mu.insertDataFrameIntoTable(getChampionInfo(rawdata, version, update_time), 'CHAMPION_INFO', True)
    print('>> CHAMPION_RUNE 삽입 중...')
    mu.insertDataFrameIntoTable(getChampionRune(rawdata, version, update_time), 'CHAMPION_RUNE', True)
    print('>> CHAMPION_SKILL 삽입 중...')
    mu.insertDataFrameIntoTable(getChampionSkill(rawdata, version, update_time), 'CHAMPION_SKILL', True)
    print('>> CHAMPION_ITEM_BUILD 삽입 중...')
    mu.insertDataFrameIntoTable(getChampionItemBuild(rawdata, version, update_time), 'CHAMPION_ITEM_BUILD', True)
    print('>> CHAMPION_ITEM_BOOTS 삽입 중...')
    mu.insertDataFrameIntoTable(getChampionItemBoots(rawdata, version, update_time), 'CHAMPION_ITEM_BOOTS', True)
    print('>> CHAMPION_ITEM_START 삽입 중...')
    mu.insertDataFrameIntoTable(getChampionItemStart(rawdata, version, update_time), 'CHAMPION_ITEM_START', True)
    print('>> CHAMPION_SPELL 삽입 중...')
    mu.insertDataFrameIntoTable(getChampionSpell(rawdata, version, update_time), 'CHAMPION_SPELL', True)
    # print('>> CHAMPION_MATCHUP 삽입 중...')
    # mu.insertDataFrameIntoTable(getChampionMatchup(rawdata, version, update_time), 'CHAMPION_MATCHUP', True)
    print('>> 모든 인터벌 테이블 삽입 완료!')

In [None]:
updateIntervalTables(version='13.9.506.4846')