In [None]:
import pandas as pd
import json
import requests
from tqdm import tqdm
from google.colab import drive
drive.mount('/content/drive')

Сначала нужно получить данные о нужных матчах, их можно взять через get запрос на https://analytics.deadlock-api.com/ endpoint: /v1/matches/search.

Далее проходимся по каждому матчу, собираем данные со всех временных промежутков в игре, считываем их в DataFrame и соединяем с предыдущим матчем.

In [None]:
data = pd.read_csv('/content/drive/MyDrive/deadlock_data/clean_data.csv')
match_ids = data['match_id']
i = 0
df = pd.DataFrame()
for match_id in tqdm(match_ids):
    url = f'https://analytics.deadlock-api.com/v1/matches/{match_id}/timestamps'
    response = requests.get(url)
    if response.status_code != 200:
        print(response.status_code)
        continue
    i += 1
    match_info = json.loads(response.text)
    df = pd.concat([df, pd.DataFrame(match_info)])
    if df.shape[0] % 1000 == 0:
        df.to_csv(f'/content/drive/MyDrive/deadlock_data/df_{i}.csv', index=False)

  0%|          | 154/63702 [00:37<4:17:08,  4.12it/s]


KeyboardInterrupt: 

Аналогичный код с использованием многозадачности с помощью библиотеки concurrent.

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
import pandas as pd
import json
from tqdm import tqdm

def fetch_match_data(match_id):
    url = f'https://analytics.deadlock-api.com/v1/matches/{match_id}/timestamps'
    response = requests.get(url)
    if response.status_code == 200:
        return json.loads(response.text)
    return None

match_ids = data['match_id']
df = pd.DataFrame()

with ThreadPoolExecutor(max_workers=10) as executor:
    futures = [executor.submit(fetch_match_data, match_id) for match_id in match_ids[51295:]]
    for future in tqdm(as_completed(futures), total=len(futures)):
        match_info = future.result()
        if match_info:
            df = pd.concat([df, pd.DataFrame(match_info)])

            if len(df) % 5000 == 0:
                df.to_csv(f'/content/drive/MyDrive/deadlock_data/df_batch_{len(df)}.csv', index=False)

df.to_csv(f'/content/drive/MyDrive/deadlock_data/df_batch_{len(df)}.csv', index=False)


100%|██████████| 12407/12407 [41:32<00:00,  4.98it/s]
