In [1]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

In [2]:
# colour blind palette
colour_palette = ["#e69f00", "#56b4e9", "#009e74", "#f0e442", "#0072b2", "#d55e00", "#cc79a7"]

In [3]:
# loads in scrim day data for the specified day
def load_day_data(day_num):
    if (day_num != 7):
        file_name = f"data/MagwatchVSaikyoS5_SD{day_num}.csv"
    else:
        file_name = "data/MagwatchVSaikyoS5_T.csv"
    print(f"Reading {file_name}")
    df = pd.read_csv(file_name, encoding="latin-1")
    # add the Day column
    df.insert(0, "Day", day_num)
    return df

In [4]:
# load in all the scrim data
dfs = []
from_day = 1
to_day = 7

for i in range(from_day, to_day+1):
    df = load_day_data(i)
    dfs.append(df)

df_appended = pd.concat(dfs)
df_appended.head()

Reading data/MagwatchVSaikyoS5_SD1.csv
Reading data/MagwatchVSaikyoS5_SD2.csv
Reading data/MagwatchVSaikyoS5_SD3.csv
Reading data/MagwatchVSaikyoS5_SD4.csv
Reading data/MagwatchVSaikyoS5_SD5.csv
Reading data/MagwatchVSaikyoS5_SD6.csv
Reading data/MagwatchVSaikyoS5_T.csv


Unnamed: 0,Day,Game,Round,Ring Closing,Time Remaining,Time Stamp Value,Match Elapsed,Hit,Target,Target Team,Comments
0,1,1,2,No,2:29,587,442,No,,,VS. Jewel Box
1,1,1,2,No,2:03,613,468,No,,,
2,1,1,2,No,1:04,673,528,Yes,Tsuna Nekota,Jewel Box,First friend lost: his CR Cup 10 teammate
3,1,1,2,Yes,0:08,803,658,No,,,
4,1,1,3,No,1:32,854,709,No,,,"VS. Epicenter, Make Way and Dekani9"


In [5]:
# load in team data (used later)
df_teams = pd.read_csv("data/MagwatchVSaikyoS5_TeamSheet.csv", encoding="latin-1")
df_teams.head()

Unnamed: 0,Team Name,Leader,Member 1,Member 2,Coach
0,StarReiDogs,Shirayuki Reid,Hoshikawa Sara,Aruse Inu,BobSappAim
1,Jewel Box,Tsuna Nekota,Tokoyami Towa,Yakumo Beni,Cpt
2,Kansai Genki Hoan Kyoukai,Honma Himawari,Shiina Yuika,Utai Meika,Mukai
3,Pink Brain,Hanabusa Lisa,Fura Kanato,Makaino Ririmu,YufuNa
4,Sadame Gaming,Ichinose Uruha,Komori Met,Ibrahim,uruca


In [6]:
def group_strings(x):
    if (list(x.isna())[0]):
        return list()
    else:
        return list(x)

# Data Cleaning

`df_appended`

Least modified DataFrame containing the same row information as the original dataset

`df_drills`

Contains information on every individual drill

`df_teams_collapsed`

Contains all the member names of a team

`df_target_count`

Contains the number of hits received for each member and as a team

In [7]:
# group multi-hit drills into a single row
df_drills = df_appended[["Day", "Game", "Match Elapsed", "Hit", "Target Team", "Target"]].groupby(["Day", "Game", "Match Elapsed"]).aggregate({"Hit":"first",
                                                                                                                                               "Target Team": "first",
                                                                                                                                               "Target": group_strings})
df_drills = df_drills.rename({"Target": "Targets"}, axis=1)
df_drills = df_drills.reset_index()
df_drills.head()

Unnamed: 0,Day,Game,Match Elapsed,Hit,Target Team,Targets
0,1,1,442,No,,[]
1,1,1,468,No,,[]
2,1,1,528,Yes,Jewel Box,[Tsuna Nekota]
3,1,1,658,No,,[]
4,1,1,709,No,,[]


In [8]:
# collapse the leader, member 1 and member 2 columns in the team data into separate rows
df_teams_collapsed = pd.melt(df_teams, id_vars="Team Name", value_vars=["Leader", "Member 1", "Member 2"]).drop("variable", axis=1).sort_values("Team Name").rename({"value":"Member"}, axis=1)

In [9]:
df_accuracy = pd.DataFrame(df_drills[["Day", "Game", "Hit"]].groupby(["Day", "Game"]).value_counts())
df_accuracy = df_accuracy.rename({0:"Count"}, axis=1).reset_index()
df_accuracy["Hit"] = df_accuracy["Hit"].replace({"No": "Miss", "Yes": "Hit"})
df_accuracy.head()

Unnamed: 0,Day,Game,Hit,Count
0,1,1,Miss,9
1,1,1,Hit,2
2,1,2,Miss,6
3,1,2,Hit,2
4,1,3,Miss,7


# Tournament Day Analysis

In [10]:
# create a dataframe containing the number of times each member get hit on the tournament day
df_target_count_t_day = pd.DataFrame(df_appended[df_appended["Day"] == 7]["Target"].dropna().value_counts()).reset_index(names="Member")

# merge the dataframes to include team name
df_target_count_t_day = df_teams_collapsed.merge(df_target_count_t_day, how="outer").fillna(0)
df_target_count_t_day = df_target_count_t_day.rename({"Target":"Targeted Count"}, axis=1)

# add a column for the number of times each team get hit
df_target_count_t_day["Team Targeted Count"] = df_target_count_t_day.groupby("Team Name").transform("sum", "Targeted Count")
df_target_count_t_day = df_target_count_t_day.set_index(["Team Name", "Member"]).sort_values(["Team Targeted Count", "Team Name", "Targeted Count"], ascending=[True, True, False]).reset_index()
df_target_count_t_day.head()

Unnamed: 0,Team Name,Member,Targeted Count,Team Targeted Count
0,AQF,Ex Albio,0.0,0.0
1,AQF,Kuzuha,0.0,0.0
2,AQF,Fuwa Minato,0.0,0.0
3,Jewel Box,Tokoyami Towa,0.0,0.0
4,Jewel Box,Yakumo Beni,0.0,0.0


In [35]:
# [T Day Only] plot a barchart for the number of hits for each member
target_count_threshold = 2
df_target_count_thresholded = df_target_count_t_day[df_target_count_t_day["Targeted Count"] >= target_count_threshold].sort_values("Targeted Count", ascending=False)
fig = px.bar(df_target_count_thresholded,
             x="Member",
             y="Targeted Count",
             color_discrete_sequence=[colour_palette[5]],
             title="Targeted Count")

fig.update_layout(
    title="[T DAY] Number of Times Hit (Individual)",
    title_font_size=30,
    xaxis_title="Victim Name",
    yaxis_title="Number of Times Hit",
    xaxis=dict(
        tickmode = "array",
        tickvals = list(range(0, len(df_target_count_thresholded))),
        ticktext = df_target_count_thresholded["Member"].apply(lambda x: x.split(" ")[-1] if (len(x.split(" ")) > 1) else x),
        showgrid = False
    ),
    yaxis = dict(
        tickmode = 'linear',
        tick0 = 0,
        dtick = 1,
    ),
    height=1000,
    width=1200,
    font=dict(color="white",
            size=24),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(255,255,255,0.1)'
)

fig.update_yaxes(ticksuffix = "  ")

fig.show()

In [37]:
# plot a stacked bar chart for the number of times each team get hit by a drill
fig = px.bar(df_target_count_t_day,
             x="Targeted Count",
             y="Team Name",
             color="Member",
             orientation="h",
             color_discrete_sequence=colour_palette[:3],
             text=df_target_count_t_day["Member"].apply(lambda x: x.split(" ")[-1])
             )

fig.update_layout(height=1000,
                  width=1800,
                  title="[T DAY] Number of Times Hit (Team)",
                  title_font_size=30,
                  xaxis_title="Number of Times Hit",
                  showlegend=False,
                  font=dict(size=24,
                            color="white"),
                  paper_bgcolor='rgba(0,0,0,0)',
                  plot_bgcolor='rgba(255,255,255,0.1)'
                  )

fig.update_yaxes(ticksuffix = "  ")

fig.show()

In [13]:
fig = px.pie(df_accuracy[df_accuracy["Day"] == 7][["Hit", "Count"]].groupby("Hit").sum().reset_index(),
             values="Count",
             names="Hit",
             color_discrete_sequence=[colour_palette[5], colour_palette[1]]
            )

fig.update_traces(textposition="inside",
                  textinfo="value")

fig.update_layout(height=500,
                  width=500,
                  title="[T DAY] Drill Accuracy",
                  title_font_size=30,
                  font=dict(size=24,
                            color="white"),
                  paper_bgcolor='rgba(0,0,0,0)',
                  )
fig.show()

# Overall Analysis

In [14]:
# plot a scatterplot for the drills over time for each scrim day
fig = go.Figure()

# plot the misses
filter_cond = df_drills["Hit"] == "No"
fig.add_trace(go.Scatter(x=df_drills[filter_cond]["Match Elapsed"] / 60,
                         y=df_drills[filter_cond]["Day"],
                         mode="markers",
                         opacity=0.5,
                         marker=dict(color=colour_palette[1],
                                     size=20,
                                     symbol="circle"
                                     ),
                        name="Miss"
                        ))

# plot the hits
filter_cond = df_drills["Hit"] == "Yes"
fig.add_trace(go.Scatter(x=df_drills[filter_cond]["Match Elapsed"] / 60,
                         y=df_drills[filter_cond]["Day"],
                         mode="markers",
                         opacity=0.7,
                         marker=dict(color=colour_palette[5],
                                     size=24,
                                     symbol="hexagram"
                                     ),
                        name="Hit"
                        ))

fig.update_layout(
    #title="Riot Drills Fired Per Day",
    #title_font_size=30,
    xaxis_title="Game Time (Min)",
    yaxis_title="Day",
    xaxis=dict(
        tickmode = 'linear',
        tick0 = 0,
        dtick = 2
    ),
    yaxis = dict(
        tickmode = 'linear',
        tick0 = 0,
        dtick = 1,
        showgrid = False
    ),
    height=500,
    width=1500,
    font=dict(color="white",
            size=24),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(255,255,255,0.1)',
    legend_traceorder="reversed"
)

fig.update_yaxes(ticksuffix = "  ")

fig.show()

In [15]:
# create a dataframe containing the number of times each member get hit
df_target_count = pd.DataFrame(df_appended["Target"].dropna().value_counts()).reset_index(names="Member")

# merge the dataframes to include team name
df_target_count = df_teams_collapsed.merge(df_target_count, how="outer").fillna(0)
df_target_count = df_target_count.rename({"Target":"Targeted Count"}, axis=1)

# add a column for the number of times each team get hit
df_target_count["Team Targeted Count"] = df_target_count.groupby("Team Name").transform("sum", "Targeted Count")
df_target_count = df_target_count.set_index(["Team Name", "Member"]).sort_values(["Team Targeted Count", "Team Name", "Targeted Count"], ascending=[True, True, False]).reset_index()



# integrity check to make sure the member names from the 2 joined DataFrames matches
print("Rows dropped:")
display(df_target_count[df_target_count["Team Name"] == 0])

df_target_count = df_target_count.set_index("Team Name").drop(index=0).reset_index()

print("")
print("Remaining data:")
df_target_count.head()

Rows dropped:


Unnamed: 0,Team Name,Member,Targeted Count,Team Targeted Count
6,0,dtto.,2.0,3.0
7,0,Astel Leda,1.0,3.0



Remaining data:


Unnamed: 0,Team Name,Member,Targeted Count,Team Targeted Count
0,KatyouGetsu,Uzuki Kou,0.0,0.0
1,KatyouGetsu,Watarai Hibari,0.0,0.0
2,KatyouGetsu,Tachibana Hinano,0.0,0.0
3,Mayumi-do Shiroan Set,Yukishiro Mahiro,0.0,0.0
4,Mayumi-do Shiroan Set,And Uge,0.0,0.0


In [39]:
# plot a barchart for the number of hits for each member
target_count_threshold = 5
df_target_count_thresholded = df_target_count[df_target_count["Targeted Count"] >= target_count_threshold].sort_values("Targeted Count", ascending=False)
fig = px.bar(df_target_count_thresholded,
             x="Member",
             y="Targeted Count",
             color_discrete_sequence=[colour_palette[5]],
             title="Targeted Count")

fig.update_layout(
    title="Total Number of Times Hit (Individual)",
    title_font_size=30,
    xaxis_title="Victim Name",
    yaxis_title="Number of Times Hit",
    xaxis=dict(
        tickmode = "array",
        tickvals = list(range(0, len(df_target_count_thresholded))),
        ticktext = df_target_count_thresholded["Member"].apply(lambda x: x.split(" ")[-1] if (len(x.split(" ")) > 1) else x),
        showgrid = False
    ),
    yaxis = dict(
        tickmode = 'linear',
        tick0 = 0,
        dtick = 1,
    ),
    height=1000,
    width=1200,
    font=dict(color="white",
            size=24),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(255,255,255,0.1)'
)

fig.update_yaxes(ticksuffix = "  ")

fig.show()

In [38]:
# plot a stacked bar chart for the number of times each team get hit by a drill
fig = px.bar(df_target_count,
             x="Targeted Count",
             y="Team Name",
             color="Member",
             orientation="h",
             color_discrete_sequence=colour_palette[:3],
             text=df_target_count["Member"].apply(lambda x: x.split(" ")[-1])
             )

fig.update_layout(height=1000,
                  width=1800,
                  title="Total Number of Times Hit (Team)",
                  title_font_size=30,
                  xaxis_title="Number of Times Hit",
                  showlegend=False,
                  font=dict(size=24,
                            color="white"),
                  paper_bgcolor='rgba(0,0,0,0)',
                  plot_bgcolor='rgba(255,255,255,0.1)'
                  )

fig.update_yaxes(ticksuffix = "  ")

fig.show()

In [18]:
df_count_frequency = df_target_count[["Member", "Targeted Count"]].groupby("Targeted Count").count().reset_index()
df_count_frequency = df_count_frequency.rename({"Member": "Frequency"}, axis=1)
df_count_frequency

Unnamed: 0,Targeted Count,Frequency
0,0.0,10
1,1.0,8
2,2.0,13
3,3.0,7
4,4.0,9
5,5.0,3
6,6.0,2
7,7.0,2
8,8.0,1
9,9.0,2


In [43]:
fig = px.bar(df_count_frequency, x="Targeted Count", y="Frequency", color_discrete_sequence=[colour_palette[5]])

fig.update_layout(height=1000,
                  width=1500,
                  bargap=0,
                  title="Hit Frequency Distribution (Individual)",
                  xaxis_title="Number of Times Hit",
                  title_font_size=30,
                  showlegend=False,
                  font=dict(size=24,
                            color="white"),
                  paper_bgcolor='rgba(0,0,0,0)',
                  plot_bgcolor='rgba(255,255,255,0.1)'
                  )
fig.show()

In [20]:
fig = px.pie(df_accuracy[["Hit", "Count"]].groupby("Hit").sum().reset_index(),
             values="Count",
             names="Hit",
             color_discrete_sequence=[colour_palette[1], colour_palette[5]]
            )

fig.update_traces(textposition="inside",
                  textinfo="value")

fig.update_layout(height=500,
                  width=500,
                  title="Drill Accuracy",
                  title_font_size=30,
                  font=dict(size=24,
                            color="white"),
                  paper_bgcolor='rgba(0,0,0,0)',
                  legend_traceorder="reversed"
                  )
fig.show()

In [21]:
def calc_percentage(df, column):
    df["Percentage"] = df[column] / df[column].sum()
    return df

In [22]:
df_accuracy = df_accuracy.groupby(["Day", "Game"], group_keys=False).apply(calc_percentage, "Count")
df_accuracy.head()

Unnamed: 0,Day,Game,Hit,Count,Percentage
0,1,1,Miss,9,0.818182
1,1,1,Hit,2,0.181818
2,1,2,Miss,6,0.75
3,1,2,Hit,2,0.25
4,1,3,Miss,7,0.7


In [23]:
fig = px.line(df_accuracy[df_accuracy["Hit"] == "Hit"],
              x="Game",
              y="Percentage",
              color="Day",
              labels={"Percentage": "Hit Rate"},
              color_discrete_map={7: "black"},
              color_discrete_sequence=colour_palette,
              markers=True)


fig.update_layout(height=1000,
                  width=1500,
                  title="Drill Accuracy",
                  title_font_size=30,
                  font=dict(size=24,
                            color="white"),
                  paper_bgcolor='rgba(0,0,0,0)',
                  plot_bgcolor='rgba(255,255,255,0.1)',
                  legend_traceorder="reversed"
                  )

fig.show()

In [24]:
df_drill_count = df_accuracy[["Day", "Game", "Count"]].groupby(["Day", "Game"]).sum().reset_index()
df_drill_count["Game"] = df_drill_count["Game"].astype(str)

fig = px.bar(df_drill_count,
             x="Day",
             y="Count",
             color="Game",
             color_discrete_sequence=px.colors.qualitative.Set2,
             title="Number of Drills Per Day")

fig.update_layout(height=1000,
                  width=1500,
                  title="Drill Accuracy",
                  title_font_size=30,
                  font=dict(size=24,
                            color="white"),
                  paper_bgcolor='rgba(0,0,0,0)',
                  plot_bgcolor='rgba(255,255,255,0.1)',
                  legend_traceorder="reversed"
                  )

fig.show()