-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_liquipedia_data.py
165 lines (145 loc) · 7.59 KB
/
get_liquipedia_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import json
import os
import numpy as np
import pandas as pd
import pathlib
import re
import requests
import time
from datetime import datetime
from math import ceil
# IMPORTANT:
# API Terms of use: https://liquipedia.net/api-terms-of-use
# - Rate limit all HTTP requests to no more than 1 request per 2 seconds.
# Check if we already gathered data from API in the last X hours
hours_trigger = 36
use_cache = False
try:
f_name = pathlib.Path('player_results.json') # 'team_results.json'
m_time = datetime.fromtimestamp(f_name.stat().st_mtime)
time_diff = (datetime.now() - m_time).total_seconds() / 60 / 60
if time_diff < hours_trigger:
use_cache = True
else:
print('Cache is outdated (at least {} hours old). Retrieving data from Liquipedia API'.format(hours_trigger))
except FileNotFoundError:
print("file 'team_results.json' does not exist. Retrieving data from Liquipedia API")
if not use_cache:
def get_entity_data(entity_list, printouts, sort_property):
"""
:param entity_list: list of all unique entities you want to query in Liquipedia API
:param printouts: properties the API shall return for each entity
:param sort_property: property used for sorting entities
:return: dictionary, key: name of entity, value: dictionary of properties and values of entities
"""
# distinct between team and player lookup_properties
base_uri = 'https://liquipedia.net/overwatch/api.php?'
# Limit the amount of entities queried to 15 at a time. More than this is not possible in the API.
amount = ceil(len(entity_list) / 15) + 1
# Loop over groups of entities splitted into "amount" and execute API requests
last = 0
all_requests = []
for i in range(1, amount):
entities = entity_list[last:15*i]
if sort_property == 'Has_id_sort':
lookup_property = 'Has_id_sort'
query = '||'.join(entities).lower()
else:
lookup_property = 'Has_id'
query = '||'.join(entities)
last = 15*i
params = {
'action': 'ask',
'query': '[[{}::{}]]|{}|sort={}'.format(lookup_property, query, printouts, sort_property),
'format': 'json'
# 'api_version': 3
}
headers = {
'User-Agent': 'UniversityOfBergen-INFO216-Group1',
'Content-Type': 'application/x-www-form-urlencoded'
}
try:
r = requests.get(base_uri, params=params, headers=headers)
all_requests.append(r)
except:
print('failed reee:', query)
# sleep for 2 seconds to follow the API terms of use
time.sleep(2)
# loop over all entities and retrieve only necessary info
results = {}
for request in all_requests:
for entity, data in request.json()['query']['results'].items():
if 'Has id' in data['printouts'].keys() and entity.lower() != str(data['printouts']['Has id'][0]).lower():
entity = data['printouts']['Has id'][0]
results[entity.lower()] = {}
for prop, value in data['printouts'].items():
if len(value) > 0:
value = value[0]
if prop == 'Has ids':
value = value['fulltext']
elif prop == 'Has birth day' or prop == 'Modification date':
value = value['raw']
elif prop == 'Has sponsor':
value = re.sub('|\[|\]', '', value).split('<br>')
value = [(''.join(y[1:]), y[0]) for y in [x.split() for x in value]]
elif prop == 'Is active':
if value == 't':
value = True
else:
value = False
results[entity.lower()].update({prop: value})
return results
# import datasets to one big df
all_dfs = []
phs_dir = os.getcwd() + r'\phs_data'
for sub_dir in os.listdir(phs_dir):
# define full path to the sub_dir
full_path = os.path.join(phs_dir, sub_dir)
# if sub_dir is not a directory, it's the map stats dataset
if os.path.isfile(full_path):
map_stats = pd.read_csv(full_path)
continue
# get new dfs
df = pd.concat([pd.read_csv(os.path.join(full_path, x)) for x in os.listdir(full_path)])
#
# if dataset is 2020
if sub_dir == 'phs_2020' or sub_dir == 'phs_2021':
df.rename(columns={'esports_match_id': 'match_id', 'tournament_title': 'stage',
'team_name': 'team', 'player_name': 'player',
'hero_name': 'hero'}, inplace=True)
# add df_new to df
all_dfs.append(df)
dfs = pd.concat(all_dfs)
dfs.player.replace('blase', 'blasé', inplace=True)
dfs.player.replace('frd', 'frdwnr', inplace=True)
dfs.player.replace('lr1s', 'ir1s', inplace=True)
# collect entity lists
player_list = list(dfs.player.unique())
team_list = list(dfs.team.unique())
# players
printouts = '?Has name|?Has birth day|?Has age|?Has nationality|?Has id|?Has role|?Modification date'
player_results = get_entity_data(player_list, printouts, 'Has_id_sort')
# teams
printouts = '?Has name|?Has region|?Has location|?Has site|?Has twitter|?Has instagram profile|?Was created|?Is active|?Is tier|?Has sponsor|?Modification date'
team_results = get_entity_data(team_list, printouts, 'Has_name')
# output results from API to .json files as cache
with open('team_results.json', 'w', encoding='utf-8') as f:
json.dump(team_results, f)
with open('player_results.json', 'w', encoding='utf-8') as f:
json.dump(player_results, f)
# Get data from cache
with open('player_results.json', 'r', encoding='utf-8') as f:
player_results = json.load(f)
with open('team_results.json', 'r', encoding='utf-8') as f:
team_results = json.load(f)
with open('map_results.json', 'r', encoding='utf-8') as f:
map_results = json.load(f)
# Some players have changed id since they played in OWL.
# We did manual research to fill in this data, as the Liquipedia API could not provide us this data automatically
player_results['frd'] = {"Has name": "Nathan Goebel", "Has birth day": "1/1999/8/11", "Has age": 21, "Has nationality": "United States", "Has id": 'frd', "Has role": "Tank"}
player_results['blasé'] = {"Has name": "Jeffrey Tsang", "Has birth day": "1/1999/2/22", "Has age": 22, "Has nationality": "South Korea", "Has id": 'blasé', "Has role": "DPS"}
player_results['lr1s'] = {"Has name": "Kim Seung-hyun", "Has birth day": "1/2000/9/10", "Has age": 20, "Has nationality": "South Korea", "Has id": 'lr1s', "Has role": "Support"}
player_results['freefeel'] = {"Has name": "Xu Peixuan", "Has birth day": "1/1995/7/29", "Has age": 25, "Has nationality": "China", "Has id": 'freefeel', "Has role": "Support"}
player_results['snt'] = {"Has name": "Kim Sung-hoon", "Has birth day": "1/1994/4/13", "Has age": 27, "Has nationality": "South Korea", "Has id": 'snt', "Has role": "Coach"}
player_results['mouffin'] = {"Has name": "Walid Bassal", "Has nationality": "North America", "Has id": 'mouffin', "Has role": "Tank"}
player_results['fiveking'] = {"Has name": "Chen Zhaoyu", "Has birth day": "1/1997/12/8", "Has age": 23, "Has nationality": "China", "Has id": "fiveking", "Has role": "Support"}