In [None]:
# You will need the following packages
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

**About Overwatch League.** In the Overwatch League, teams of professional players compete against each other to be the best in the world at playing the team-based first-person shooter game, Overwatch.

Each match consists of two teams of six players each, with each player selecting one of the game's many heroes to play as. The teams are then tasked with completing various objectives, such as escorting a payload across the map or capturing and holding control points.

The team that is able to complete their objectives the quickest or prevent the other team from completing their objectives wins the match.

Matches are typically played in a best-of-three format, meaning that the first team to win two rounds is declared the winner. Each round is called a "map", and the team that wins the most maps wins the match.

The Overwatch League is divided into two seasons per year, with teams from all around the world competing in a variety of different venues. The top teams from each season advance to the playoffs, where they compete for the chance to be crowned the Overwatch League champion. Each map is one of two types, it is either a control map or an escort map.  

In [None]:
df = pd.read_csv("data/match_map_stats.csv")

**1b) (1 mark)** Print the first 5 rows of the DataFrame

In [None]:
print(df.head().to_string(index=False))


In [None]:
for col in df.columns:
    print(col)

We have the following columns

- **round_start_time** Stating time of the round
- **round_end_time** Ending time of the round
- **stage** Stage of the competition
- **match_id** Id for the match
- **game_number** Game number in the given match
- **match_winner** Winner of the match
- **map_winner** Winner of the game/map
- **map_loser** Lost of the game/map
- **map_name** Name of map
- **map_round** Round number (each map has several rounds)
- **winning_team_final_map_score** Final score of map winner
- **losing_team_final_map_score** Final score of map loser
- **control_round_name** Name of the round if it was a control type map
- **attacker** Team that was the attacker
- **defender** Team that was the defender
- **team_one_name** Name of first team in match
- **team_two_name** Name of second team in match
- **attacker_payload_distance** Distance the attacker moved the payload on payload maps
- **defender_payload_distance** 
- **attacker_time_banked** Attacker time left over
- **defender_time_banked** Defender time left over
- **attacker_control_perecent** Percent objective was charged by the attacker team
- **defender_control_perecent** Percent objective was charged by the defender team
- **attacker_round_end_score** Attacker score at the end of this round
- **defender_round_end_score** Defender score at the end of this round

In [None]:
#a
map_type = []
for index, row in df.iterrows():
    if not np.isnan(row['attacker_control_perecent']) or not np.isnan(row['defender_control_perecent']):
        map_type.append('control')
    else:
        map_type.append('escort')

df['map_type'] = map_type

#b
for index, row in df.iterrows():
    if row['map_type'] == 'control':
        df.at[index, 'attacker_payload_distance'] = np.nan
        df.at[index, 'defender_payload_distance'] = np.nan
        df.at[index, 'attacker_time_banked'] = np.nan
        df.at[index, 'defender_time_banked'] = np.nan
print(df[['map_type', 'attacker_payload_distance', 'defender_payload_distance', 'attacker_time_banked', 'defender_time_banked']].head())

#c
df['round_start_time'] = pd.to_datetime(df['round_start_time'], format="%m/%d/%y %H:%M")
df['round_end_time'] = pd.to_datetime(df['round_end_time'], format="%m/%d/%y %H:%M")
print(df['round_start_time'].dtype)
print(df['round_end_time'].dtype)
year = []
for index, row in df.iterrows():
    year_part = row["stage"].split(":")[0]
    year.append(year_part)
df['year'] = year


In [None]:
team_columns = ['team_one_name', 'team_two_name']
unique_teams = set()
for column in team_columns:
    unique_teams.update(df[column].unique())
unique_teams_df = pd.DataFrame({'Team': list(unique_teams)})
print(unique_teams_df.to_string(index=False, header=False))

In [None]:
country = {
    'Seoul Dynasty': 'Korea',
    'Atlanta Reign': 'United States',
    'Florida Mayhem': 'United States',
    'Washington Justice': 'United States',
    'Los Angeles Valiant': 'United States',
    'Hangzhou Spark': 'China',
    'London Spitfire': 'England',
    'Shanghai Dragons': 'China',
    'Vancouver Titans': 'Canada',
    'Dallas Fuel': 'United States',
    'Houston Outlaws': 'United States',
    'New York Excelsior': 'United States',
    'Boston Uprising': 'United States',
    'San Francisco Shock': 'United States',
    'Los Angeles Gladiators': 'United States',
    'Paris Eternal': 'France',
    'Guangzhou Charge': 'China',
    'Philadelphia Fusion': 'United States',
    'Chengdu Hunters': 'China',
    'Toronto Defiant': 'Canada'
}

for team, country_name in country.items():
    print(f"{team}: {country_name}")


In [None]:
#c
control_maps_df = df[df['map_type'] == 'control']


In [None]:
team_wins = {team: 0 for team in country.keys()}
win_percentages = {}

for team in country.keys():
    team_wins[team] = len(control_maps_df[control_maps_df['map_winner'] == team])

for team, wins in team_wins.items():
    total_maps_played = len(
        control_maps_df[
            (control_maps_df['team_one_name'] == team) | (control_maps_df['team_two_name'] == team)
        ]
    )
    win_percentage = (wins / total_maps_played) if total_maps_played > 0 else 0
    win_percentages[team] = win_percentage

for team, wins in team_wins.items():
    total_maps_played = len(
        control_maps_df[
            (control_maps_df['team_one_name'] == team) | (control_maps_df['team_two_name'] == team)
        ]
    )
    print(f"{team}: {wins} wins out of {total_maps_played} games on control")

sorted_teams = sorted(win_percentages.keys(), key=lambda x: win_percentages[x], reverse=True)
sorted_win_percentages = [win_percentages[team] for team in sorted_teams]
sorted_countries = [country[team] for team in sorted_teams]

fig, ax = plt.subplots(figsize=(10, 8))
bar_width = 0.9
index = np.arange(len(sorted_teams))
country_colors = {
    'Korea': 'purple',
    'United States': 'brown',
    'China': 'orange',
    'England': 'green',
    'Canada': 'blue',
    'France': 'red'
}
colors = [country_colors[c] for c in sorted_countries]
bars = plt.barh(index, sorted_win_percentages, bar_width, color=colors)
plt.axvline(x=0.5, color='pink', linestyle='-')
plt.xlim(0, 0.7)
plt.xticks(np.arange(0, 0.8, 0.1))
plt.ylim(-0.5, len(sorted_teams) - 0.5)
plt.yticks(index, sorted_teams)
plt.xlabel('Win Percentage')
plt.ylabel('Team')
plt.title("Control Map Wins For Teams")

legend_handles = [plt.Rectangle((0, 0), 1, 1, color=color, label=country) for country, color in country_colors.items()]
legend = plt.legend(handles=legend_handles, loc='lower left')

plt.tight_layout()
plt.show()



The San Francisco Shock, Seoul Dynasty, Los Angeles Gladiators, Atlanta Reign, New York Excelsior, Hangzhou Spark, Dallas Fuel, Sganghai Dragons, Toronto Defiant, Philadelphia Fusion all have higher then 50% wins. The USA seems to produce better teams than the other countries

In [None]:
#a
yearly_win_percentages = {}
for year in range(2018, 2023):
    control_maps_year_df = control_maps_df[control_maps_df['year'] == str(year)]
    shanghai_dragons_wins_year = len(control_maps_year_df[control_maps_year_df['map_winner'] == 'Shanghai Dragons'])
    total_shanghai_dragons_maps_played_year = len(
        control_maps_year_df[(control_maps_year_df['team_one_name'] == 'Shanghai Dragons') |
                             (control_maps_year_df['team_two_name'] == 'Shanghai Dragons')]
    )
    shanghai_dragons_win_percentage_year = (shanghai_dragons_wins_year / total_shanghai_dragons_maps_played_year) if total_shanghai_dragons_maps_played_year > 0 else 0
    yearly_win_percentages[year] = shanghai_dragons_win_percentage_year
shanghai_dragons_wins_overall = len(control_maps_df[control_maps_df['map_winner'] == 'Shanghai Dragons'])
total_shanghai_dragons_maps_played_overall = len(
    control_maps_df[(control_maps_df['team_one_name'] == 'Shanghai Dragons') |
                    (control_maps_df['team_two_name'] == 'Shanghai Dragons')]
)
shanghai_dragons_win_percentage_overall = (shanghai_dragons_wins_overall / total_shanghai_dragons_maps_played_overall) if total_shanghai_dragons_maps_played_overall > 0 else 0
for year, win_percentage in yearly_win_percentages.items():
    print(f"Shanghai Dragons' win percentage on control maps in {year}: {win_percentage:.2%}")
print(f"Overall win percentage for Shanghai Dragons on control maps: {shanghai_dragons_win_percentage_overall:.2%}")
years = list(yearly_win_percentages.keys())
win_percentages = list(yearly_win_percentages.values())
plt.figure(figsize=(10, 6))
plt.plot(years, win_percentages, marker='o', linestyle='-', color='b')
plt.title("Shanghai Dragons' Win Rate on Control Maps Over the Years")
plt.xlabel("Year")
plt.ylabel("Win Rate")
plt.xticks(np.arange(min(years), max(years) + 1, 1))
plt.grid(True)
plt.show()



- Based on the trends from 2018 the Shanghai dragon didn't have much success, as they only one %7.69 of games on control maps. 
- In 2019 they saw significant improvement as they won %53.77 of games on control maps.
- In 2020 they saw another significant imporvement as they won %72.87 of games on control maps.
- In 2021 they saw little improvement as they they won %73.57 of games on control maps.
- Finally in 2022, they saw a slight drop off as they won 51.02% of games on control maps.
- In total the Shanghai dragons winning percentage is 51.66%

In [None]:
control_maps_2022 = control_maps_df[control_maps_df['year'] == '2022']
team_wins_2022 = {team: 0 for team in team_wins.keys()}
win_percentages_2022 = {}

for index, row in control_maps_2022.iterrows():
    map_winner = row['map_winner']
    team_wins_2022[map_winner] += 1

for team, wins in team_wins_2022.items():
    total_maps_played = len(control_maps_2022[(control_maps_2022['team_one_name'] == team) | (control_maps_2022['team_two_name'] == team)])
    win_percentage = (wins / total_maps_played) if total_maps_played > 0 else 0
    win_percentages_2022[team] = win_percentage

print("Winning Percentages in 2022:")
for team, win_percentage in win_percentages_2022.items():
    print(f"{team}: {win_percentage * 100:.2f}%")


top_5_teams_2022 = sorted(win_percentages_2022.items(), key=lambda x: x[1], reverse=True)[:5]

top_5_teams_names = [team[0] for team in top_5_teams_2022]
top_5_teams_data = control_maps_df[
    (control_maps_df['team_one_name'].isin(top_5_teams_names)) |
    (control_maps_df['team_two_name'].isin(top_5_teams_names))
]

plt.figure(figsize=(12, 6))

for team_name in top_5_teams_names:
    team_data = top_5_teams_data[(top_5_teams_data['team_one_name'] == team_name) |(top_5_teams_data['team_two_name'] == team_name)]
    win_rates_over_years = team_data.groupby('year')['map_winner'].apply(lambda x: (x == team_name).mean())
    plt.plot(win_rates_over_years.index, win_rates_over_years.values, label=team_name)

plt.xlabel('Year')
plt.ylabel('Win Rate on Control Maps')
plt.title('Win Rates on Control Maps Over the Years (Top 5 Teams in 2022)')
plt.legend()
plt.grid(True)
plt.show()



Two things I noticed from the graph trends are that 3/5 teams, Huston Outlaws, Seoul Dynasty, and Atlanta Reign all had there lowest winning percentages in 2020 and all the teams, including Los Angekes Gladiators and Dallas Fuels all had there best season in 2022

In [None]:
#5a
unique_control_round_namesdf = control_maps_df['control_round_name'].unique()
for control_round_name in unique_control_round_namesdf:
    print(control_round_name)

toronto_data = control_maps_df[(control_maps_df['team_one_name'] == 'Toronto Defiant') | (control_maps_df['team_two_name'] == 'Toronto Defiant')]
map_wins = toronto_data[toronto_data['match_winner'] == 'Toronto Defiant'].groupby(['control_round_name']).size().reset_index(name='wins')
map_played = toronto_data.groupby(['control_round_name']).size().reset_index(name='total_games')
map_win_rate = map_wins.merge(map_played, on=['control_round_name'])
map_win_rate['win_rate'] = map_win_rate['wins'] / map_win_rate['total_games']
for index, row in map_win_rate.iterrows():
    control_round_name = row['control_round_name']
    wins = row['wins']
    total_games = row['total_games']
    win_rate = row['win_rate']
    print(f"Toronto Defiant: {wins} wins out of {total_games} games on {control_round_name}, Win Rate: {win_rate:.2%}")

map_win_rate = map_win_rate.sort_values(by='win_rate', ascending=True)


control_round_names = map_win_rate['control_round_name']
win_rates = map_win_rate['win_rate']
control_round_colors = {
    'Lighthouse' : 'red',
    'Ruins' : 'orange',
    'Well' : 'yellow',
    'University' : 'green',
    'Gardens' : 'blue',
    'City Center' : 'indigo',
    'Control Center' : 'violet',
    'Night Market' : 'brown',
    'Garden' : 'gray',
    'Shrine' : 'aqua',
    'Sanctum' : 'purple',
    'Village' : 'lime',
    'MEKA Base' : 'black',
    'Downtown' : 'gold',
    'Sanctuary' : 'beige'
}
colors = [control_round_colors[round_name] for round_name in control_round_names]
# Create a figure and axis
fig, ax = plt.subplots()
# Plot horizontal bars with control map names on the y-axis
bars = ax.barh(control_round_names, win_rates, color=colors)
# Set the xlim to have the bars touch the bottom or top of the y-axis
plt.xlim(0, 0.8)
# Add a vertical line at x=0.5 for reference
plt.axvline(x=0.5, color='pink', linestyle='-')
# Set x-axis ticks
plt.xticks(np.arange(0, 0.8, 0.1))
# Set labels for x and y axes
plt.xlabel('Win Rate')
plt.ylabel('Control Map Name')

# Invert the y-axis to have the highest win rate at the bottom
ax.invert_yaxis()
plt.title("All time Wins on control maps for Toronto Defiant")

# Display the plot
plt.tight_layout()
plt.show()

In [None]:
#5a
toronto_data_2022 = toronto_data[toronto_data['year'] == '2022']
map_wins = toronto_data_2022[toronto_data_2022['match_winner'] == 'Toronto Defiant'].groupby(['control_round_name']).size().reset_index(name='wins')
map_played = toronto_data_2022.groupby(['control_round_name']).size().reset_index(name='total_games')
map_win_rate = map_wins.merge(map_played, on=['control_round_name'])
map_win_rate['win_rate'] = map_win_rate['wins'] / map_win_rate['total_games']
map_win_rate = map_win_rate.sort_values(by='win_rate', ascending=False)
control_round_names = map_win_rate['control_round_name']
map_win_rate = map_win_rate.sort_values(by='win_rate', ascending=True)

win_rates = map_win_rate['win_rate']
control_round_colors = {
    'Lighthouse' : 'red',
    'Ruins' : 'orange',
    'Well' : 'yellow',
    'University' : 'green',
    'Gardens' : 'blue',
    'City Center' : 'indigo',
    'Control Center' : 'violet',
    'Night Market' : 'brown',
    'Garden' : 'gray',
    'Shrine' : 'aqua',
    'Sanctum' : 'purple',
    'Village' : 'lime',
    'MEKA Base' : 'black',
    'Downtown' : 'gold',
    'Sanctuary' : 'beige'
}
colors = [control_round_colors[round_name] for round_name in control_round_names]
# Create a figure and axis
fig, ax = plt.subplots()
# Plot horizontal bars with control map names on the y-axis
bars = ax.barh(control_round_names, win_rates, color=colors)
# Set the xlim to have the bars touch the bottom or top of the y-axis
plt.xlim(0, 0.8)
# Add a vertical line at x=0.5 for reference
plt.axvline(x=0.5, color='pink', linestyle='-')
# Set x-axis ticks
plt.xticks(np.arange(0, 1.1, 0.1))
# Set labels for x and y axes
plt.xlabel('Win Rate')
plt.ylabel('Control Map Name')

# Invert the y-axis to have the highest win rate at the bottom
ax.invert_yaxis()
plt.title("Wins for Toronto Defiant on Control Maps in 2022")

# Display the plot
plt.tight_layout()
plt.show()




A scatter plot should be used to make recommendations for Toronto in 2023, so they could see how they performed on each map throughout the year to see if there is a decline versus improvement in their skills on a certain map. Based on the plot I created, it can be determined that Toronto should practice on is University because that is their lowest winning percentage. The problem with that however is that the Gardens, Garden, Meka base, and other maps etc are close to the percentage of wins on University, that it is difficult to tell whether or not they should focus their attention to the University Map, or somewhere else.

In [None]:
#5c
for index, row in control_maps_2022.iterrows():
    map_winner = row['map_winner']
    team_wins_2022[map_winner] += 1
    
for team, wins in team_wins_2022.items():
    total_maps_played = len(
        control_maps_2022[
            (control_maps_2022['team_one_name'] == team) | (control_maps_2022['team_two_name'] == team)
        ]
    )
    win_percentage = (wins / total_maps_played) if total_maps_played > 0 else 0
    win_percentages_2022[team] = win_percentage
sorted_teams_2022 = sorted(win_percentages_2022.keys(), key=lambda team: win_percentages_2022[team], reverse=True)
print("Control Map Win Ranks in 2022:")
for rank, team in enumerate(sorted_teams_2022, start=1):
    print(f"{team}: Rank {rank}")


In [None]:
#5d
toronto_data_2022 = toronto_data[toronto_data['year'] == '2022']
map_wins = toronto_data_2022[toronto_data_2022['match_winner'] == 'Toronto Defiant'].groupby(['control_round_name']).size().reset_index(name='wins')
map_played = toronto_data_2022.groupby(['control_round_name']).size().reset_index(name='total_games')
map_win_rate = map_wins.merge(map_played, on=['control_round_name'])
map_win_rate['win_rate'] = map_win_rate['wins'] / map_win_rate['total_games']
map_win_rate = map_win_rate.sort_values(by='win_rate', ascending=False)

for index, row in map_win_rate.iterrows():
    control_round_name = row['control_round_name']
    win_rate = row['win_rate']
    print(f"{control_round_name}: Win Rate - {win_rate:.2%}")


top_3_maps = map_win_rate.head(3)
bottom_3_maps = map_win_rate.tail(3)

print("\nTop 3 Control Map Names (by Win Rate) and Mean Rank:")
for index, row in top_3_maps.iterrows():
    control_round_name = row['control_round_name']
    win_rate = row['win_rate']  
    matches = toronto_data_2022[toronto_data_2022['control_round_name'] == control_round_name]
    mean_rank = (matches['attacker_round_end_score'].mean() + matches['defender_round_end_score'].mean()) / 2
    
    print(f"{control_round_name}: Win Rate - {win_rate:.2%}, Mean Rank - {mean_rank:.2f}")

print("\nBottom 3 Control Map Names (by Win Rate) and Mean Rank:")
for index, row in bottom_3_maps.iterrows():
    control_round_name = row['control_round_name']
    win_rate = row['win_rate']
    matches = toronto_data_2022[toronto_data_2022['control_round_name'] == control_round_name]
    mean_rank = (matches['attacker_round_end_score'].mean() + matches['defender_round_end_score'].mean()) / 2
    
    print(f"{control_round_name}: Win Rate - {win_rate:.2%}, Mean Rank - {mean_rank:.2f}")


# 5e
* When Toronto plays on MEKA Base, Toronto played weaker opponents and as a result played really well
* When Toronto plays on Light House, they face relatively strong opponents and play extremely well
* When Toronto plays on Sactuary, they also face relatively strong opponents and play extremely well
* When Toronto plays on Shrine, they're completition is extremely strong, as a result, they perform very poorly
* When Toronto plays on Well, they're competition they faced generally weaker opponents, and probably overestimated them, resulting in a loss
* When Toronto plays on Sanctum, they're competition is relatively strong, and they usually play very poorly

In [None]:
escort_maps_df = df[df['map_type'] == 'escort']
draws_on_escort = len(escort_maps_df[escort_maps_df['map_winner'] == 'draw'])
total_games_on_escort = len(escort_maps_df)
proportion_of_draws = round((draws_on_escort / total_games_on_escort) * 100) 

print(f"Number of draws on escort maps: {draws_on_escort}")
print(f"Total number of games on escort maps: {total_games_on_escort}")
print(f"Proportion of escort maps ending in a draw (rounded to the nearest whole number): {proportion_of_draws}%")


# Group by map_name and count the number of draws and total games for each map
map_draws_counts = escort_maps_df[escort_maps_df['map_winner'] == 'draw'].groupby('map_name').size().reset_index(name='draws')
map_total_games_counts = escort_maps_df.groupby('map_name').size().reset_index(name='total_games')

# Merge the two DataFrames on map_name
map_draws_and_total = pd.merge(map_draws_counts, map_total_games_counts, on='map_name', how='inner')

# Calculate the proportion of draws for each map
map_draws_and_total['draw_percentage'] = (map_draws_and_total['draws'] / map_draws_and_total['total_games']) 

# Print the results
for index, row in map_draws_and_total.iterrows():
    map_name = row['map_name']
    draw_percentage = row['draw_percentage']
    draws = row['draws']
    total_games = row['total_games']
    print(f"Map: {map_name}, Draw Percentage as raito: {draw_percentage:.2f}, Total Games: {total_games}, Draws: {draws}")
escort_round_names = map_draws_and_total['map_name']
draw_percentages = map_draws_and_total['draw_percentage']

map_colours = {
    'Blizzard World': 'red',
    'Eichenwalde': 'orange',
    'Hanamura': 'yellow',
    'Hollywood': 'green',
    'Horizon Lunar Colony': 'blue',
    "King's Row": 'violet',
    'Midtown': 'purple',
    'Numbani': 'gold',
    'Paris': 'silver',
    'Temple of Anubis': 'aqua',
    'Volskaya Industries': 'lime'
}

# Use the 'draws' column for the data
draw_counts = map_draws_and_total['draws']

# Extract map names and draw percentages
escort_round_names = map_draws_and_total['map_name']
draw_percentages = map_draws_and_total['draw_percentage']

# Get colors based on map names
colors = [map_colours[map_name] for map_name in escort_round_names]

# Create the bar plot
fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.barh(escort_round_names, draw_counts, color=colors)

# Customize the plot
plt.xlim(0, max(draw_counts) + 1)
plt.xlabel('Number of Draws')
plt.ylabel('Map Name')
ax.invert_yaxis()
plt.tight_layout()
plt.show()




- According to the graph, Paris (0.15%), and a tie between Volskaya Industries and Horizon Lunar Colony (both 10%) have the highest draw percentages on escort maps.
- To decrease the number of draws in Paris, more games have to be played there as it is one of the lowest playing maps at 207 games played
- To decrease the number of draws in Volskaya Industries, less games have to be played there as it has the most total games played at 935
- To decrease the number of draws in Horizon Lunar Colony either more games have to be played or less games have to be played, as it is played a moderate amount at 383 games played

In [None]:
escort_maps_df = df[df['map_type'] == 'escort']
attacker_won_rounds = escort_maps_df[escort_maps_df['attacker_round_end_score'] > escort_maps_df['defender_round_end_score']]
attacker_lost_rounds = escort_maps_df[escort_maps_df['attacker_round_end_score'] < escort_maps_df['defender_round_end_score']]

summary_stats1 = attacker_won_rounds['attacker_time_banked'].describe()
summary_stats2 = attacker_lost_rounds['attacker_payload_distance'].describe()
print(summary_stats1)
print(summary_stats2)


plt.figure(figsize=(10, 5))
plt.hist(attacker_won_rounds['attacker_time_banked'], bins=20, color='blue', alpha=0.7, label='Attacker Won Rounds')
plt.hist(attacker_lost_rounds['attacker_time_banked'], bins=20, color='red', alpha=0.7, label='Attacker Lost Rounds')
plt.xlabel('Attacker Time Banked')
plt.ylabel('Frequency')
plt.title('Histogram of Attacker Time Banked')
plt.legend()
plt.show()

plt.figure(figsize=(10, 5))
plt.hist(attacker_won_rounds['attacker_payload_distance'], bins=20, color='blue', alpha=0.7, label='Attacker Won Rounds')
plt.hist(attacker_lost_rounds['attacker_payload_distance'], bins=20, color='red', alpha=0.7, label='Attacker Lost Rounds')
plt.xlabel('Attacker Payload Distance')
plt.ylabel('Frequency')
plt.title('Histogram of Attacker Payload Distance')
plt.legend()
plt.show()



Histogram of Attacker Time Banked: The histogram is right-skewed, most of the data points are concentrated on the lower end of the scal, as evidenced by the low mean and the fact that both the median and the 25th percentile are close to 0. The data is spread out, as shown by the relatively high standard deviation.

Historam of Attacker Payload Distance: The data has a lower mean compared to Attacker Time Banked indicating that, on average, the values are smaller, and the standard deviation is also lower meaning that the data points are less spread out from the mean compared to the first dataset.