In [1]:
import pandas as pd

In [2]:
file = "Worlds_2021.csv"
df = pd.read_csv(file)
df.head()

Unnamed: 0,game_id,game_date,game_length,blue_teamname,blue_kills,blue_deaths,blue_assists,blue_tower_kills,blue_inhibitor_kills,blue_herald_kills,...,red_jun_totalDamageDealtToChampions,red_jun_totalDamageTaken,red_jun_totalTimeCrowdControlDealt,red_jun_firstBloodAssist,red_jun_firstBloodKill,red_jun_firstInhibitorAssist,red_jun_firstInhibitorKill,red_jun_firstTowerAssist,red_jun_firstTowerKill,blue_wins
0,1800,2021-11-06T21:16:55.000Z,2139,DK,4,16,10,4,0,1,...,11083,36466,524,0,0,1,0,0,1,0
1,1788,2021-11-06T22:19:44.000Z,1938,DK,22,3,53,10,1,1,...,7687,34912,569,0,0,0,0,0,0,1
2,1787,2021-11-06T23:15:09.000Z,2162,EDG,7,17,20,3,0,1,...,6597,30946,546,0,0,0,1,0,0,0
3,1786,2021-11-07T00:15:10.000Z,1988,EDG,6,3,13,9,1,2,...,4113,25888,258,0,0,0,0,0,0,1
4,1768,2021-11-07T01:12:34.000Z,2479,DK,10,21,18,6,0,2,...,12436,52899,719,0,0,1,0,0,0,0


In [3]:
def convert_to_teamdata(df):
    general_attr = ["game_date", "game_length", "blue_wins"]
    # 這裡如果已經改成a和b的話判斷要做修改
    blue_attr = [col for col in df.columns if "blue" in col and col != "blue_wins"]
    red_attr = [col for col in df.columns if "red" in col]
    neutral_attr = [col.replace("red_", "") for col in red_attr]

    df["game_date"] = pd.to_datetime(df["game_date"])
    general = df[general_attr]
    blue = df[blue_attr]
    red = df[red_attr]

    # 將 blue 和 red 的列名統一
    blue.columns = neutral_attr
    red.columns = neutral_attr

    # 合併 blue 和 red DataFrame
    team_data = pd.concat([blue, red], axis=0, ignore_index=True)
    team_data["game_date"] = pd.concat([general["game_date"], general["game_date"]], axis=0, ignore_index=True)
    team_data["game_length"] = pd.concat([general["game_length"], general["game_length"]], axis=0, ignore_index=True)
    team_data["blue_wins"] = pd.concat([general["blue_wins"], general["blue_wins"]], axis=0, ignore_index=True)
    team_data = team_data.sort_values("game_date", ascending=False)

    return team_data

team_data = convert_to_teamdata(df)
team_data.head()

Unnamed: 0,teamname,kills,deaths,assists,tower_kills,inhibitor_kills,herald_kills,dragon_kills,elder_dragon_kills,baron_kills,...,jun_totalTimeCrowdControlDealt,jun_firstBloodAssist,jun_firstBloodKill,jun_firstInhibitorAssist,jun_firstInhibitorKill,jun_firstTowerAssist,jun_firstTowerKill,game_date,game_length,blue_wins
4,DK,10,21,18,6,0,2,1,0,1,...,344,0,0,0,0,0,1,2021-11-07 01:12:34+00:00,2479,0
125,EDG,21,10,51,11,3,0,5,1,1,...,719,0,0,1,0,0,0,2021-11-07 01:12:34+00:00,2479,0
3,EDG,6,3,13,9,1,2,4,0,1,...,173,0,0,0,0,0,0,2021-11-07 00:15:10+00:00,1988,1
124,DK,3,6,7,2,0,0,0,0,0,...,258,0,0,0,0,0,0,2021-11-07 00:15:10+00:00,1988,1
2,EDG,7,17,20,3,0,1,0,0,0,...,101,0,0,0,0,0,0,2021-11-06 23:15:09+00:00,2162,0


In [4]:
# Remove duplicate column names
team_data = team_data.loc[:, ~team_data.columns.duplicated()]

def find_most_recent_games(df, team_name, n_games=3, compete_date=None):
    """
    Find the most recent games before compete date for a given team
    """
    if compete_date is not None:
        df = df[df["game_date"] < compete_date]
    team_games = df[(df["teamname"] == team_name)]
    return team_games.head(n_games)

def produce_match_list(team_data_1, team_data_2):
    """
    Based on the data of two teams, combine each other to produce new match list
    """
    match_list = []
    for i, game_1 in team_data_1.iterrows():
        for j, game_2 in team_data_2.iterrows():
            # set game_1 columns to all have "a_" prefix, and game_2 columns to all have "b_" prefix
            game_1 = game_1.rename(lambda x: "a_" + x if not x.startswith("a_") else x)
            game_2 = game_2.rename(lambda x: "b_" + x if not x.startswith("b_") else x)
            # combine them into a single row
            match = pd.concat([game_1, game_2])
            match_list.append(match)
    return pd.DataFrame(match_list)

team_1_game_data = find_most_recent_games(team_data, "T1", 3, "2021-10-15")
team_2_game_data = find_most_recent_games(team_data, "HLE", 3, "2021-10-15")
match_list = produce_match_list(team_1_game_data, team_2_game_data)
match_list.head()
match_list.to_csv("match_list.csv", index=False)