# OWL 赛事数据统计

根据 liquipedia 数据统计 OWL 解说以及和比赛比分等信息

### 基础模板解析

In [1]:
from datetime import datetime, timedelta
import re
# import wikitextparser as wtp
# https://github.com/5j9/wikitextparser#miscellaneous
from collections import defaultdict

In [45]:
datetime.strptime('May 5, 2022 - 12:00', '%B %d, %Y - %H:%M') - timedelta(hours=-15)

datetime.datetime(2022, 5, 6, 3, 0)

In [49]:
from datetime import datetime, timedelta
files = ['liquipedia/KickoffClash_Qualifiers.txt',
         'liquipedia/KickoffClash_Tournament.txt',
         'liquipedia/MidseasonMadness_Qualifiers.txt',
         'liquipedia/MidseasonMadness_Tournament.txt',
         'liquipedia/SummerShowdown_Qualifiers.txt']
# 寻找所有用到的时区代号
timezones = set()
for file in files:
    with open(file) as f:
        data = f.readlines()
    data = ''.join(data)
    timezones = set.union(timezones, set(re.findall('{{Abbr/(.+?)}}', data)))
print(sorted(timezones))
#        {'BST', 'CDT', 'CEST', 'CST', 'EDT', 'KST', 'PDT', 'UTC'}
deltas = [   +1,    -5,     +2,    +8,    -4,    +9,    -7,     0]
# （美国)东部夏令时, 英国夏时制, （美国)太平洋夏令时, 世界时, 韩国标准时间. 欧洲中部夏令时间, 北京时间, （美国)中部夏令时
# https://blog.csdn.net/whatday/article/details/109856495

tdelta = dict([(k, timedelta(hours=deltas[i]-8)) for i, k in enumerate(sorted(timezones))])
tdelta
# 为了统一修正到北京时间, 需要将 liquipedia 中的时间 - delta

['BST', 'CDT', 'CEST', 'CST', 'EDT', 'KST', 'PDT', 'UTC']


{'BST': datetime.timedelta(days=-1, seconds=61200),
 'CDT': datetime.timedelta(days=-1, seconds=39600),
 'CEST': datetime.timedelta(days=-1, seconds=64800),
 'CST': datetime.timedelta(0),
 'EDT': datetime.timedelta(days=-1, seconds=43200),
 'KST': datetime.timedelta(seconds=3600),
 'PDT': datetime.timedelta(days=-1, seconds=32400),
 'UTC': datetime.timedelta(days=-1, seconds=57600)}

In [3]:
s_liq1 = '''|M3={{Match
    |date=September 2, 2022 - 12:00 {{Abbr/PDT}}
    |youtube=overwatchleague
    |opponent1={{TeamOpponent|atl}}
    |opponent2={{TeamOpponent|gla}}
    |map1={{Map|map=Lijiang Tower|mode=Control|score1=1|score2=2|winner=2}}
    |map2={{Map|map=Hollywood|mode=Hybrid|score1=2|score2=3|winner=2}}
    |map3={{Map|map=Dorado|mode=Escort|score1=0|score2=1|winner=2}}
    |map4={{Map|map=Colosseo|mode=Push|score1=|score2=|winner=Skip}}
    |map5={{Map|map=Nepal|mode=Control|score1=|score2=|winner=Skip}}
	|mvp=Happy
	|comment=
	|owl=39227
	|overgg=
	|vod=
}}
'''
# w = wtp.parse(re.sub('\n( |\t)+', '', s_liq))
# print(w.templates[4].arguments)

In [50]:
def wiki2dict(s):
    d = defaultdict(str)
    sd = s.replace('{', '') \
        .replace('}', '') \
        .split('|')[1:]
    dl  = [i           for i in sd if '=' in i ]
    idx = [i.find('=') for i in sd if '=' in i ]
    for sdi, i in zip(dl, idx):
        d[sdi[:i]] = sdi[i+1:].strip()
    return d

def parse_wiki(s_liq, is_name=True):
    # refactoring the code
    s_wiki = re.sub('\n\s+', '', s_liq).strip()
    s_wiki = s_wiki.replace('opponent', 'team')# compatible with former data
    tz = re.findall('\{\{Abbr/(.+?)\}\}', s_wiki)[0]
    s_wiki = s_wiki.replace('{{Abbr/' + tz + '}}', '')
    if is_name:
        i = s_wiki.find("=")
        name = s_wiki[1:i]
        # team1, team2, map1-5, comment1-2
        s_list = re.findall('\{\{.+?\}\}', s_wiki[i+3:])
        s = re.sub('\{\{.+?\}\}', '', s_wiki[i+3:])
    else:
        name = ''
        s_list = re.findall('\{\{.+?\}\}', s_wiki)
        s = re.sub('\{\{.+?\}\}', '', s_wiki)   

    # get_name = lambda s: re.search(r'\{\{.+\|(.+)\|.+\}\}', s).group(1)
    get_name = lambda s: s.replace('{', '').replace('}', '').split('|')[1]

    d = wiki2dict(s)
    # bestof
    if 'bestof' not in s_wiki:
        d['bestof'] = max([int(i) for i in re.findall('\|map([1-9])', s_liq1)])
    # name
    d['name'] = name
    # date
    d['date'] = datetime.strptime(d['date'].strip(), '%B %d, %Y - %H:%M')   # June 2, 2022 - 19:00 {{Abbr/UTC}}
    d['date'] = d['date'] - tdelta[tz]
    # opponent / team 
    for i in [1, 2]:
        d[f'team{i}'] = get_name(s_list[i-1])
    # comment
    if 'comment' in s_wiki:
        if not re.search('\|comment=\|', s_wiki):
            for i in [1, 2]:
                d[f'comment{i}' ] = get_name(s_list[i - 3])
        del d['comment']
    # flatten every map dict
    # print(d['bestof'])
    for i in range(int(d['bestof'])):
        del d[f'map{i+1}']
        dmap = wiki2dict(s_list[i-1 + 3])
        for kmap in dmap.keys():
            v = dmap[kmap]
            if 'score' in kmap or 'winner' in kmap:
                if len(v) == 1:
                    v = int(v)
                elif v.strip().lower() == 'skip':
                    v = ''
                elif v.strip().lower() == 'draw':
                    v = -1
                elif len(v) != 0:
                    v = float(v) if v[-1].isdigit() else float(v[:-1])
            d[f'{kmap}_map{i+1}'] = v
    return d
        
parse_wiki(s_liq1)

defaultdict(str,
            {'date': datetime.datetime(2022, 9, 3, 3, 0),
             'youtube': 'overwatchleague',
             'team1': 'atl',
             'team2': 'gla',
             'mvp': 'Happy',
             'owl': '39227',
             'overgg': '',
             'vod': '',
             'bestof': 5,
             'name': 'M3',
             'map_map1': 'Lijiang Tower',
             'mode_map1': 'Control',
             'score1_map1': 1,
             'score2_map1': 2,
             'winner_map1': 2,
             'map_map2': 'Hollywood',
             'mode_map2': 'Hybrid',
             'score1_map2': 2,
             'score2_map2': 3,
             'winner_map2': 2,
             'map_map3': 'Dorado',
             'mode_map3': 'Escort',
             'score1_map3': 0,
             'score2_map3': 1,
             'winner_map3': 2,
             'map_map4': 'Colosseo',
             'mode_map4': 'Push',
             'score1_map4': '',
             'score2_map4': '',
             'winner_map

In [5]:
# the data format is quiet different in `liquipedia/KickoffClash_Qualifiers.txt`
s_liq2 = '''{{MatchMaps
|date=May 5, 2022 - West
|team1=nye|games1=1
|team2=gla|games2=3
|winner=2
|details={{BracketMatchSummary
|date=May 5, 2022 - 12:00 {{Abbr/PDT}}
|youtube=overwatchleague
|finished=true
|comment=Casters: {{player|Uber|flag=au}} & {{player|Mr X|flag=us}}
|map1=Lijiang Tower|map1score=1-2|map1win=2
|map2=Midtown|map2score=0-2|map2win=2
|map3=Circuit Royal|map3score=3-2|map3win=1
|map4=New Queen Street|map4score=71.89-103.57|map4win=2
|map5=Ilios|map5score=|map5win=skip
|vodgame1=https://youtu.be/4HC6Ev_dlyI?t=234
|vodgame2=https://youtu.be/4HC6Ev_dlyI?t=1613
|vodgame3=https://youtu.be/4HC6Ev_dlyI?t=3233
|vodgame4=https://youtu.be/4HC6Ev_dlyI?t=5011
|mvp=Kevster
|owl=38971
|overgg=15902
|pf=
|wl=
}}
'''

liquipedia/KickoffClash_Qualifiers.txt 文件中存储比赛的格式用的和剩下的还都不相同, 需要额外标准化处理

In [6]:
def fix(s_liq2):
    s = re.sub(r'\|team([0-9])=(.+?)\|\S+\n',
            r'|opponent\1={{TeamOpponent|\2|score=}}\n',
            s_liq2.replace('|details={{BracketMatchSummary\n', ''))
    s = re.sub('\{\{MatchMaps\n.+\n\|team',
            '{{Match\n|bestof=5\n',
            s)
    s = re.sub('\|comment=(.+)\n((\s|\S)+)\n\}\}',
            r'\2\n|comment=\1\n}}',
                s)
        
    maps_type = ['Control', 'Hybrid', 'Escort', 'Push', 'Control']
    for i in range(1, 6):
        pattern = f'\|map{i}=(.+)?\|map{i}score=(.+)?\|map{i}win=(.+)?'
        m = re.findall(pattern, s)[0]
        if '-' in m[1]:
            s1, s2 = m[1].split('-')
            rep = f"|map{i}={m[0]}|map{i}score={s1}-{s2}|map{i}win={m[2]}"
        else:
            s1, s2 = '', ''
            rep = f"|map{i}={m[0]}|map{i}score={m[1]}|map{i}win={m[2]}"
        after = f'|map{i}={{{{Map|map={m[0]}|mode={maps_type[i-1]}|score1={s1}|score2={s2}|winner={m[2]}}}}}'
        s = s.replace(rep, after)
    return s

parse_wiki(fix(s_liq2), is_name=False)

defaultdict(str,
            {'date': datetime.datetime(2022, 5, 5, 20, 0),
             'team1': 'nye',
             'team2': 'gla',
             'winner': '2',
             'youtube': 'overwatchleague',
             'finished': 'true',
             'vodgame1': 'https://youtu.be/4HC6Ev_dlyI?t=234',
             'vodgame2': 'https://youtu.be/4HC6Ev_dlyI?t=1613',
             'vodgame3': 'https://youtu.be/4HC6Ev_dlyI?t=3233',
             'vodgame4': 'https://youtu.be/4HC6Ev_dlyI?t=5011',
             'mvp': 'Kevster',
             'owl': '38971',
             'overgg': '15902',
             'pf': '',
             'wl': '',
             'bestof': 5,
             'name': '',
             'comment1': 'Uber',
             'comment2': 'Mr X',
             'map_map1': 'Lijiang Tower',
             'mode_map1': 'Control',
             'score1_map1': 1,
             'score2_map1': 2,
             'winner_map1': 2,
             'map_map2': 'Midtown',
             'mode_map2': 'Hybrid',
        

### 整理汇总各阶段数据

In [7]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
files = ['liquipedia/KickoffClash_Qualifiers.txt',      # 37 / 60 comment data given, fixed manually
         'liquipedia/KickoffClash_Tournament.txt',      # R2M2, R4M2, R5M1(Grand Final) data in file on liquipedia is not standard, fixed manually
         'liquipedia/MidseasonMadness_Qualifiers.txt',
         'liquipedia/MidseasonMadness_Tournament.txt',
         'liquipedia/SummerShowdown_Qualifiers.txt']
keys = ['bestof', 'date', 'team1', 'team2', 'owl', 'overgg', 'mvp', 'vod', 'comment1', 'comment2'] + [f'winner_map{i+1}' for i in range(7)]

#### 测试各文件中数据是否正常获得

In [8]:
with open('liquipedia/KickoffClash_Qualifiers.txt') as f:
    data = f.readlines()
data = ''.join(data)
matches = re.findall('\{\{MatchMaps\n[\s\S]*?\n\}\}\n', data)
print(len(matches))
for m in matches:
    # print(m, len(m))
    d = parse_wiki(fix(m), is_name=False)
    for k in keys:
        d[k]

60


In [9]:
file = 'liquipedia/KickoffClash_Tournament.txt'
# file = 'liquipedia/MidseasonMadness_Qualifiers.txt'
# file = 'liquipedia/MidseasonMadness_Tournament.txt'
# file = 'liquipedia/SummerShowdown_Qualifiers.txt'
with open(file) as f:
    data = f.readlines()
data = ''.join(data)
matches = re.findall('\|.+\{\{[\s\S]*?\n\}\}\n', data)
print(len(matches))
for m in matches:
    d = parse_wiki(m)
    for k in keys:
        d[k]

14


#### 汇总

In [193]:
N = len(keys)
lb, ld, lt1, lt2, lo, lgg, lmvp, lv, lc1, lc2, lwm1, lwm2, lwm3, lwm4, lwm5, lwm6, lwm7 = [list() for _ in range(N)]
ls1, ls2, lm = [list() for _ in range(3)]
with open('liquipedia/KickoffClash_Qualifiers.txt') as f:
    data = f.readlines()
data = ''.join(data)
matches = re.findall('\{\{MatchMaps\n[\s\S]*?\n\}\}\n', data)

prefix = 'liquipedia'
files = ['KickoffClash_Tournament.txt', 'MidseasonMadness_Qualifiers.txt', 'MidseasonMadness_Tournament.txt', 'SummerShowdown_Qualifiers.txt']
for file in files:
    with open(f'{prefix}/{file}') as f:
        data = f.readlines()
    data = ''.join(data)
    matches += re.findall('\|.+\{\{[\s\S]*?\n\}\}\n', data)
    
for i, m in enumerate(matches):
    if i < 60:      # First 60 matches from KickoffClash_Qualifiers.txt
        d = parse_wiki(fix(m), is_name=False)
    else:
        d = parse_wiki(m)
    if len(d['mvp']) != 0:
        lb.append(d['bestof'])
        ld.append(d['date'])
        lt1.append(d['team1'])
        lt2.append(d['team2'])
        lo.append(d['owl'])
        lgg.append(d['overgg'])
        lmvp.append(d['mvp'])
        lv.append(d['vod'])
        lc1.append(d['comment1'])
        lc2.append(d['comment2'])
        lwm1.append(d['winner_map1'])
        lwm2.append(d['winner_map2'])
        lwm3.append(d['winner_map3'])
        lwm4.append(d['winner_map4'])
        lwm5.append(d['winner_map5'])
        lwm6.append(d['winner_map6'])
        lwm7.append(d['winner_map7'])
        # count score and total map
        ls1.append(np.sum([1 for i in range(7) if d[f'winner_map{i}'] == 1]))
        ls2.append(np.sum([1 for i in range(7) if d[f'winner_map{i}'] == 2]))
        lm.append(np.sum([1 for i in range(7) if d[f'winner_map{i}'] != '']))
# create data frame
df = pd.DataFrame(np.c_[lb, ld, lt1, lt2, lo, lgg, lmvp, lv, lc1, lc2, lwm1, lwm2, lwm3, lwm4, lwm5, lwm6, lwm7, ls1, ls2, lm], 
                        columns=keys + ['score1', 'score2', 'maps'])
df

Unnamed: 0,bestof,date,team1,team2,owl,overgg,mvp,vod,comment1,comment2,winner_map1,winner_map2,winner_map3,winner_map4,winner_map5,winner_map6,winner_map7,score1,score2,maps
0,5,2022-05-06 03:00:00,nye,gla,38971,15902,Kevster,,Uber,Mr X,2,2,1,2,,,,1,3,4
1,5,2022-05-06 04:30:00,sfs,par,38972,15903,FiNN,,Uber,Mr X,1,1,1,,,,,3,0,3
2,5,2022-05-06 06:00:00,fla,atl,38973,15904,UltraViolet,,Jaws,Nekkra,1,2,2,2,,,,1,3,4
3,5,2022-05-07 03:00:00,ldn,sfs,38975,15908,s9mm,,Jaws,Nekkra,2,2,2,,,,,0,3,3
4,5,2022-05-07 04:30:00,bos,van,38974,15907,Punk,,LemonKiwi,LEGDAY,1,2,1,2,1,,,3,2,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207,5,2022-09-02 18:00:00,gzc,cdh,39249,,ChoiSehwan,,,,1,1,1,,,,,3,0,3
208,5,2022-09-02 19:30:00,shd,val,39250,,WhoRU,,,,1,1,1,,,,,3,0,3
209,5,2022-09-03 03:00:00,atl,gla,39227,,Happy,,,,2,2,2,,,,,0,3,3
210,5,2022-09-03 04:30:00,par,nye,39228,,Yaki,,,,2,2,1,2,,,,1,3,4


#### 和以前结果合并

In [37]:
# 队伍代码和队名
dteam = {'atl': '亚特兰大君临队',
 'bos': '波士顿崛起队',
 'cdh': '成都猎人队',
 'dal': '达拉斯燃料队',
 'fla': '佛罗里达狂欢队',
 'gla': '洛杉矶角斗士队',
 'gzc': '广州冲锋队',
 'hou': '休斯敦神枪手队',
 'hzs': '杭州闪电队',
 'ldn': '伦敦喷火战斗机队',
 'nye': '纽约九霄天擎队',
 'par': '巴黎永生队',
 'phi': '费城融合队',
 'seo': '首尔王朝队',
 'sfs': '旧金山震动队',
 'shd': '上海龙之队',
 'tor': '多伦多捍卫者队',
 'val': '洛杉矶英勇队',
 'van': '温哥华泰坦队',
 'was': '华盛顿正义队'}

In [192]:
df_old = pd.read_excel('owl_data.xlsx', usecols=range(1, 13))       # 有的队名标题把 '休斯敦神枪手队' 打成了 '休斯顿神枪手队', 都替换为了前者
df_old

Unnamed: 0,month,day,play,team1,team2,length,bvid,name1,name2,score1,score2,maps
0,8,20,7935,洛杉矶英勇队,费城融合队,117.600000,BV1eT411c71L,狂疯小逗,老陈,2,3,5
1,8,20,9753,上海龙之队,杭州闪电队,70.350000,BV1rd4y1w72n,九朵,老陈,3,0,3
2,8,20,15333,首尔王朝队,成都猎人队,82.700000,BV1qU4y1r7M1,九朵,狂疯小逗,3,0,3
3,8,20,8725,多伦多捍卫者队,达拉斯燃料队,85.450000,BV1rT411c7db,娓娓,杨小龙,1,3,4
4,8,20,5075,旧金山震动队,波士顿崛起队,57.116667,BV13P411j7as,杨小龙,时光,3,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...
178,5,7,35572,波士顿崛起队,温哥华泰坦队,117.050000,BV1KA4y1S7Jh,九朵,Alan,3,2,5
179,5,7,36078,伦敦喷火战斗机队,旧金山震动队,65.916667,BV1GA4y1S76n,九朵,时光,0,3,3
180,5,6,68021,佛罗里达狂欢队,亚特兰大君临队,83.733333,BV1XL4y1c74j,木子,赤小兔,1,3,4
181,5,6,53709,旧金山震动队,巴黎永生队,56.133333,BV1Y54y1f7j8,木子,赤小兔,3,0,3


In [191]:
df_mini = df[:183].sort_values('date', ascending=False)
df_mini

Unnamed: 0,bestof,date,team1,team2,owl,overgg,mvp,vod,comment1,comment2,winner_map1,winner_map2,winner_map3,winner_map4,winner_map5,winner_map6,winner_map7,score1,score2,maps
182,5,2022-08-20 21:00:00,val,phi,39059,16042,ZEST,https://www.youtube.com/watch?v=eyOqneIx2l0,Achilios,AVRL,1,2,2,1,2,,,2,3,5
181,5,2022-08-20 19:30:00,shd,hzs,39058,16041,LIP,https://www.youtube.com/watch?v=ABvOrx-Lx0g,Achilios,AVRL,1,1,1,,,,,3,0,3
180,5,2022-08-20 18:00:00,seo,cdh,39057,16040,Stalk3r,https://www.youtube.com/watch?v=7L_IoxIhjgU,Achilios,AVRL,1,1,1,,,,,3,0,3
179,5,2022-08-20 06:00:00,tor,dal,39211,16039,Edison,https://www.youtube.com/watch?v=O56NzdSUWlA,Jaws,Mr X,2,1,2,2,,,,1,3,4
178,5,2022-08-20 04:30:00,sfs,bos,39210,16038,Proper,https://www.youtube.com/watch?v=KjQ9SoOCpC8,Jaws,Mr X,1,1,1,,,,,3,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,5,2022-05-07 04:30:00,bos,van,38974,15907,Punk,,LemonKiwi,LEGDAY,1,2,1,2,1,,,3,2,5
3,5,2022-05-07 03:00:00,ldn,sfs,38975,15908,s9mm,,Jaws,Nekkra,2,2,2,,,,,0,3,3
2,5,2022-05-06 06:00:00,fla,atl,38973,15904,UltraViolet,,Jaws,Nekkra,1,2,2,2,,,,1,3,4
1,5,2022-05-06 04:30:00,sfs,par,38972,15903,FiNN,,Uber,Mr X,1,1,1,,,,,3,0,3


Switch team
```
\|opponent1=\{\{TeamOpponent\|(.+)\|score=\}\}\n\s+\|opponent2=\{\{TeamOpponent\|(.+)\|score=\}\}
|opponent1={{TeamOpponent|$2|score=}}\n    |opponent2={{TeamOpponent|$1|score=}}
```

Switch score
```
Map\|map=(.+)\|mode=(.+)\|score1=(.+)\|score2=(.+)\|
Map|map=$1|mode=$2|score1=$4|score2=$3|
```

In [188]:
for i, (t1, t2, s1, s2, nm) in enumerate(zip(df_mini['team1'], df_mini['team2'], df_mini['score1'], df_mini['score2'], df_mini['maps'])):
    r = df_old.iloc[i]
    if set((dteam[t1], dteam[t2])) != set((r['team1'], r['team2'])):
        print(i)
    if s1 != r['score1']:
        print(i)
    if s2 != r['score2']:
        print(i)
    if nm != r['maps']:
        print(i)

In [197]:
dN = 212 - 183
df_total = pd.DataFrame(np.c_[lb, ld, lt1, lt2, lo, lgg, lmvp, lv, lc1, lc2, lwm1, lwm2, lwm3, lwm4, lwm5, lwm6, lwm7, ls1, ls2, lm, 
                                list(reversed(df_old['name1'])) + [""] * dN, list(reversed(df_old['name2'])) + [""] * dN], 
                        columns=keys + ['score1', 'score2', 'maps', 'name1', 'name2'])
df_total.to_excel('owl_raw_keep_update.xlsx')

### 分析

In [204]:
df = pd.read_excel('owl_raw_keep_update.xlsx', usecols=range(1, 23))
df[0:1]

Unnamed: 0,bestof,date,team1,team2,owl,overgg,mvp,vod,comment1,comment2,...,winner_map3,winner_map4,winner_map5,winner_map6,winner_map7,score1,score2,maps,name1,name2
0,5,2022-05-06 03:00:00,nye,gla,38971.0,15902.0,Kevster,,Uber,Mr X,...,1,2.0,,,,1,3,4,Alan,木子


In [205]:
names = set(df['name1']) | set(df['name2'])
print(', '.join(list(names)))

银河, 杨小龙, 花猫, 赤小兔, 狂疯小逗, 蛋壳, 九朵, 桃叽, Alan, Roy, 小霜, Youngjin, 木子, 老陈, 2dm, 时光, 娓娓


In [207]:
all, total, maps, zero, mzeros, one, mones, two, mtwos = [], [], [], [], [], [], [], [], []
for name in names:
    all_length, total_match, total_map, zero_match, one_match, two_match = 0, 0, 0, 0, 0, 0
    max0, czero = 0, 0 # 连续的不加班天数
    max1, cone  = 0, 0 # 连续三一重工
    max2, ctwo  = 0, 0 # 连续加班!
    flag0, flag1, flag2 = False, False, False
    for _, row in df.iterrows():
        if name == row['name1'] or name == row['name2']:
            # all_length += row['length']
            total_match += 1
            # s1, s2, m = row['score1'], row['score2'], row['maps']
            m = row['maps']
            total_map += m
            if m == 3:
                zero_match += 1
                czero = czero + 1 if flag0 else 1
                flag0, max0 = True, max(czero, max0)
                flag1, cone , max1 = False, 0, max(cone , max1)
                flag2, ctwo , max2 = False, 0, max(ctwo , max2)
            elif m == 4:
                one_match += 1
                flag0, czero, max0 = False, 0, max(czero, max0)
                cone  = cone + 1 if flag1 else 1
                flag1, max1 = True, max(cone , max1)
                flag2, ctwo , max2 = False, 0, max(ctwo , max2)
            elif m == 5:
                two_match += 1
                flag0, czero, max0 = False, 0, max(czero, max0)
                flag1, czero, max1 = False, 0, max(czero, max1)
                ctwo   = ctwo + 1 if flag2 else 1
                flag2, max2 = True, max(ctwo , max2)
    # all.append(all_length)
    total.append(total_match)
    zero.append(zero_match)
    one.append(one_match)
    two.append(two_match)
    mzeros.append(max0)
    mones.append(max1)
    mtwos.append(max2)
    maps.append(total_map)

total, maps, zero, one, two = np.array(total), np.array(maps), np.array(zero), np.array(one), np.array(two)

In [210]:
df_names = pd.DataFrame(list(zip(names, total, maps, maps/total, zero, one, two, mzeros, mones, mtwos, zero/total, one/total, two/total)), 
                        columns=['names', 'total', 'maps', 'ave', 'zero', 'one', 'two', 'max0', 'max1', 'max2', 'zero_rate', 'one_rate', 'two_rate'])

# df_names = df_names[df_names['all'] > 1000]   # 排除时间较短的
# df_names = df_names[df_names['total'] < 10]   # 排除时间较短的

# df_names.sort_values(by='zero_rate', ascending=False)
# df_names.sort_values(by='one_rate', ascending=False)
df_names.sort_values(by='two_rate', ascending=False)

# df_names.sort_values(by='max0', ascending=False)
# df_names.sort_values(by='max2', ascending=False)

# df_names

Unnamed: 0,names,total,maps,ave,zero,one,two,max0,max1,max2,zero_rate,one_rate,two_rate
0,银河,6,25,4.166667,2,1,3,1,1,3,0.333333,0.166667,0.5
5,蛋壳,8,30,3.75,5,0,3,2,0,2,0.625,0.0,0.375
11,Youngjin,22,92,4.181818,4,10,8,3,4,2,0.181818,0.454545,0.363636
13,老陈,22,84,3.818182,11,4,7,3,1,2,0.5,0.181818,0.318182
8,Alan,67,264,3.940299,22,27,18,3,3,2,0.328358,0.402985,0.268657
4,狂疯小逗,4,14,3.5,3,0,1,2,0,1,0.75,0.0,0.25
6,九朵,56,216,3.857143,22,20,14,7,3,3,0.392857,0.357143,0.25
3,赤小兔,16,64,4.0,4,8,4,1,3,1,0.25,0.5,0.25
15,时光,52,201,3.865385,21,18,12,5,5,3,0.403846,0.346154,0.230769
9,Roy,18,67,3.722222,9,5,4,3,2,2,0.5,0.277778,0.222222


In [211]:
# plot
import plotly.express as px

In [212]:
names, maps, rates = [], [], []
for _, row in df_names.sort_values(by='one_rate', ascending=False).iterrows():
    name, zero, one, two = row['names'], row['zero'], row['one'], row['two']
    total = zero + one + two
    for map, i in zip([3, 4, 5], [zero, one, two]):
        names.append(name)
        maps.append(map)
        rates.append(i / total)
df_cum = pd.DataFrame(list(zip(names, maps, rates)), 
                        columns=['names', 'maps', 'rate'])

fig = px.histogram(df_cum,
                    x="names", y="rate",
                    color='maps', # barmode='group',
                    # histfunc='avg',
                    labels={'names':'解说', 'rate':'解说比赛地图数占比'},
                    height=400)
fig.show()

In [213]:
fig = px.histogram(df_names[df_names['total'] >= 10].sort_values(by='two_rate', ascending=False),
                    x="names", y="two_rate",
                    color='max2', # barmode='group',
                    histfunc='avg',
                    labels={'names':'解说', 'two_rate':'解说到第 5 张地图占比'},
                    height=400)
fig.show()

In [214]:
fig = px.histogram(df_names.sort_values(by='two_rate', ascending=False),
                    x="names", y="two_rate",
                    color='max2', # barmode='group',
                    histfunc='avg',
                    labels={'names':'解说', 'two_rate':'解说到第 5 张地图占比'},
                    height=400)
fig.show()

In [215]:
fig = px.histogram(df_names[df_names['total'] >= 10].sort_values(by='one_rate', ascending=False),
                    x="names", y="one_rate",
                    color='max1', # barmode='group',
                    histfunc='avg',
                    labels={'names':'解说', 'one_rate':'解说比赛三一重工占比'},
                    height=400)
fig.show()

In [216]:
fig = px.histogram(df_names.sort_values(by='one_rate', ascending=False),
                    x="names", y="one_rate",
                    color='max1', # barmode='group',
                    histfunc='avg',
                    labels={'names':'解说', 'one_rate':'解说比赛共 4 张地图占比'},
                    height=400)
fig.show()

In [217]:
fig = px.histogram(df_names.sort_values(by='max0', ascending=False),
                    x="names", y="max0",
                    histfunc='avg',
                    labels={'names':'解说', 'max0':'连续不加班次数'},
                    height=400)
fig.show()

In [218]:
fig = px.line(df_names.sort_values(by='zero_rate', ascending=False), 
                x="names", y="zero_rate", 
                labels={'names':'解说', 'zero_rate':'3-0占比'},
                text="ave")
fig.update_traces(textposition="top right", texttemplate="%{y:.2f}")
fig.show()

In [219]:
fig = px.line(df_names.sort_values(by='ave', ascending=False), 
                x="names", y="ave", 
                labels={'names':'解说', 'ave':'平均地图数'},
                text="ave")
fig.update_traces(textposition="top right", texttemplate="%{y:.2f}")
fig.show()

In [227]:
# 站位决定成败
names = set(df['name1']) | set(df['name2'])
print(', '.join(list(names)))

total, win = [], []
for name in names:
    t, w = 0, 0
    for _, row in df.iterrows():
        s1, s2 = row['score1'], row['score2']
        cond1 = (name == row['name1']) and (s1 > s2)
        cond2 = (name == row['name2']) and (s1 < s2)
        if name in [row['name1'], row['name2']]:
            t += 1
        if cond1 or cond2:
            w += 1
    total.append(t)
    win.append(w)

total, win = np.array(total), np.array(win)

银河, 杨小龙, 花猫, 赤小兔, 狂疯小逗, 蛋壳, 九朵, 桃叽, Alan, Roy, 小霜, Youngjin, 木子, 老陈, 2dm, 时光, 娓娓


In [228]:
df_names = pd.DataFrame(list(zip(names, total, win, win/total)), 
                        columns=['names', 'total', 'win', 'win_rate'])
df_names.sort_values(by='win_rate', ascending=False)

Unnamed: 0,names,total,win,win_rate
1,杨小龙,16,11,0.6875
10,小霜,6,4,0.666667
11,Youngjin,22,13,0.590909
6,九朵,56,32,0.571429
3,赤小兔,16,9,0.5625
13,老陈,22,12,0.545455
7,桃叽,28,15,0.535714
15,时光,52,27,0.519231
16,娓娓,14,7,0.5
4,狂疯小逗,4,2,0.5
