In [1]:
from acquire import *
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
robot_dict = read_pickles_in_directory("pickle-files")

In [4]:
final_df = create_long_format_stats_df(robot_dict)

In [37]:
final_df[final_df['RobotName'] == 'tombstone-2']

Unnamed: 0,RobotName,Stats,Year,Value
1690,tombstone-2,Total matches,Career,11.0
1691,tombstone-2,Win percentage,Career,91.0
1692,tombstone-2,Total wins,Career,10.0
1693,tombstone-2,Losses,Career,1.0
1694,tombstone-2,Knockouts,Career,8.0
1695,tombstone-2,KO percentage,Career,73.0
1696,tombstone-2,Average Knockout Time Seconds,Career,66.0
1697,tombstone-2,Knockouts against,Career,0.0
1698,tombstone-2,KO against percentage,Career,0.0
1699,tombstone-2,Judges decision wins,Career,2.0


In [41]:
final_df[(final_df['RobotName'] == 'riptide-wcvii') & (final_df['Year'] == "Career")]

Unnamed: 0,RobotName,Stats,Year,Value
13630,riptide-wcvii,Total matches,Career,11.0
13631,riptide-wcvii,Win percentage,Career,82.0
13632,riptide-wcvii,Total wins,Career,9.0
13633,riptide-wcvii,Losses,Career,2.0
13634,riptide-wcvii,Knockouts,Career,8.0
13635,riptide-wcvii,KO percentage,Career,73.0
13636,riptide-wcvii,Average Knockout Time Seconds,Career,71.0
13637,riptide-wcvii,Knockouts against,Career,1.0
13638,riptide-wcvii,KO against percentage,Career,9.0
13639,riptide-wcvii,Judges decision wins,Career,0.0


# Actions

In [32]:
final_df['Stats'] = final_df['Stats'].replace('Average knockout time', 'Average Knockout Time Seconds')

In [33]:
final_df['Value'].fillna(0, inplace=True)

In [34]:
final_df['Value'] = final_df['Value'].apply(lambda x: float(x.replace('s', '').replace('%', '')) if isinstance(x, str) else x)

In [35]:
#final_df['Value'] = final_df.apply(lambda row: float(row['Value']) / 100 if 'percentage' in row['Stats'] else row['Value'], axis=1)

In [36]:
# Reset the index
final_df = final_df.reset_index(drop=True)

# Visualize

In [None]:
# filter out data for 'Career'
robot_data = final_df[final_df['Year'] != 'Career']

# Filter the data to include only KO percentage stats
ko_pct_df = robot_data[robot_data['Stats'] == 'KO percentage']

# Group the data by year and calculate the mean KO percentage
ko_pct_mean_by_year = ko_pct_df.groupby('Year')['Value'].mean()

# Create a scatter plot of the KO percentage by year
plt.scatter(ko_pct_mean_by_year.index, ko_pct_mean_by_year.values)

# Add labels and title to the plot
plt.xlabel('Year')
plt.ylabel('KO Percentage')
plt.title('Average KO Percentage by Year')

# Display the plot
plt.show()


In [None]:
# select data for riptide-wcvii
robot_data = final_df[final_df['RobotName'] == 'riptide-wcvii']

# filter out data for 'Career'
robot_data = robot_data[robot_data['Year'] != 'Career']

# create a pivot table to group the data by year and stats
pivot = robot_data.pivot_table(values='Value', index='Year', columns='Stats', aggfunc='first')

# calculate the percentage of wins, losses, and draws
pivot['Win percentage'] = pivot['Win percentage'] * 100
pivot['Losses'] = pivot['Losses'] * 100
pivot['Judges decision wins'] = pivot['Judges decision wins'] * 100

# create a stacked bar chart
pivot[['Win percentage', 'Losses', 'Judges decision wins']].plot(kind='bar', stacked=True, figsize=(10, 6))

# set the title and axis labels
plt.title("Percentage of Wins, Losses, and Draws for riptide-wcvii")
plt.xlabel("Year")
plt.ylabel("Percentage")

plt.show()


In [None]:
# filter out data for 'Career'
visual_df = final_df[final_df['Year'] != 'Career']

# Filter for Win percentage stats only
win_perc_df = visual_df[visual_df['Stats'] == 'Win percentage']

# Group by Year and calculate the mean Win percentage for each season
avg_win_perc_by_season = win_perc_df.groupby('Year')['Value'].mean().reset_index()

# Plot the bar chart of average Win percentage by season
sns.barplot(x='Year', y='Value', data=avg_win_perc_by_season, color='steelblue')
plt.title('Average Win Percentage by Season')
plt.xlabel('Season')
plt.ylabel('Average Win Percentage')
plt.show()


In [None]:
# select the data for the robot of interest
robot_data = final_df[final_df['RobotName'] == 'riptide-wcvii']

# create the boxplot
sns.barplot(x='Year', y='Value', hue='Stats', data=robot_data)

# add a title and axis labels
plt.title('Distribution of win percentages for riptide-wcvii by year and statistic')
plt.xlabel('Year')
plt.ylabel('Value')

In [None]:
# Filter final_df to only include Win percentage stats
win_pct_df = final_df[final_df['Stats'] == 'Win percentage']

# Get top N robots by win percentage in their career
N = 10
top_n_robots = win_pct_df[win_pct_df['Year'] == 'Career'].nlargest(N, 'Value')['RobotName'].tolist()

# Filter win_pct_df to only include rows with the top N robots
top_n_win_pct_df = win_pct_df[win_pct_df['RobotName'].isin(top_n_robots)]

# Create a pivot table to use as input for the heatmap
heatmap_data = top_n_win_pct_df.pivot_table(index='RobotName', columns='Year', values='Value')

# Create the heatmap
plt.figure(figsize=(12, 6))
sns.heatmap(heatmap_data, annot=True, fmt=".2f", cmap="YlGnBu")
plt.title(f'Top {N} Robots Win Percentages Over Years')
plt.show()

In [None]:
# filter out data for 'Career'
visual_df = final_df[final_df['Year'] != 'Career']

# Filter final_df to only include Win percentage stats
win_pct_df = visual_df[visual_df['Stats'] == 'Win percentage']

# Group the data by Year and calculate the average win percentage
avg_win_pct_by_year = win_pct_df.groupby('Year')['Value'].mean().reset_index()

# Create the line plot
plt.figure(figsize=(12, 6))
sns.lineplot(x='Year', y='Value', data=avg_win_pct_by_year, marker='o')
plt.title('Average Win Percentage Per Year for All Robots')
plt.xlabel('Year')
plt.ylabel('Average Win Percentage')
plt.xticks(rotation=45)
plt.show()

In [None]:
# filter out data for 'Career'
visual_df = final_df[final_df['Year'] != 'Career']

# Filter final_df to only include KO percentage stats
ko_pct_df = visual_df[visual_df['Stats'] == 'KO percentage']

# Create the box plot
plt.figure(figsize=(12, 6))
sns.boxplot(x='Year', y='Value', data=ko_pct_df)
plt.title('Distribution of KO Percentages Per Year for All Robots')
plt.xlabel('Year')
plt.ylabel('KO Percentage')
plt.xticks(rotation=45)
plt.show()

In [None]:
# filter out data for 'Career'
visual_df = final_df[final_df['Year'] != 'Career']

# Filter final_df to only include Average Knockout Time Seconds stats
avg_ko_time_df = visual_df[visual_df['Stats'] == 'Average Knockout Time Seconds']

# Create the violin plot
plt.figure(figsize=(12, 6))
sns.violinplot(x='Year', y='Value', data=avg_ko_time_df, inner="quartile")
plt.title('Distribution of Average Knockout Time Seconds Per Year for All Robots')
plt.xlabel('Year')
plt.ylabel('Average Knockout Time Seconds')
plt.xticks(rotation=45)
plt.show()

In [None]:
# filter out data for 'Career'
visual_df = final_df[final_df['Year'] != 'Career']

# Filter final_df to only include Total wins stats
total_wins_df = visual_df[visual_df['Stats'] == 'Total wins']

# Group the data by Year and calculate the average total wins
avg_total_wins_by_year = total_wins_df.groupby('Year')['Value'].mean().reset_index()

# Create the bar plot
plt.figure(figsize=(12, 6))
sns.barplot(x='Year', y='Value', data=avg_total_wins_by_year)
plt.title('Average Total Wins Per Year for All Robots')
plt.xlabel('Year')
plt.ylabel('Average Total Wins')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Filter final_df to only include Total wins stats
total_wins_df = final_df[(final_df['Stats'] == 'Total wins') & (final_df['Year'] == 'Career')]

# Sort the data by total wins and get the top 10 performing robots
top_10_robots = total_wins_df.sort_values(by='Value', ascending=False).head(10)

# Create the bar plot
plt.figure(figsize=(12, 6))
sns.barplot(x='RobotName', y='Value', data=top_10_robots)
plt.title('Top 10 Performing Robots by Total Wins')
plt.xlabel('Robot Name')
plt.ylabel('Total Wins')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Filter final_df to only include Total wins stats for career year
total_wins_df = final_df[(final_df['Stats'] == 'Total wins') & (final_df['Year'] == 'Career')]

# Sort the data by total wins and get the top 20 performing robots
top_20_robots = total_wins_df.sort_values(by='Value', ascending=False).head(20)['RobotName']

# Filter final_df to only include KO percentage and Average Knockout Time Seconds stats for top 20 robots in career year
filtered_df = final_df[(final_df['RobotName'].isin(top_20_robots)) &
                       (final_df['Year'] == 'Career') &
                       (final_df['Stats'].isin(['KO percentage', 'Average Knockout Time Seconds']))]

# Pivot the DataFrame for easier plotting
pivot_df = filtered_df.pivot_table(index='RobotName', columns='Stats', values='Value').reset_index()

# Create the scatter plot
plt.figure(figsize=(12, 6))
sns.scatterplot(x='KO percentage', y='Average Knockout Time Seconds', data=pivot_df, hue='RobotName', s=100)
plt.title('Top 20 Performing Robots: KO Percentage vs. Average Knockout Time Seconds')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
plt.show()


In [None]:
final_df["Stats"].unique()

In [None]:
# Filter final_df to only include Total wins stats for career year
total_wins_df = final_df[(final_df['Stats'] == 'Total wins') & (final_df['Year'] == 'Career')]

# Sort the data by total wins and get the top 20 performing robots
top_20_robots = total_wins_df.sort_values(by='Value', ascending=False).head(20)['RobotName']

# Filter final_df to only include selected stats for top 20 robots in career year
selected_stats = ['Win percentage', 'KO percentage', 'Average Knockout Time Seconds', 'Knockouts against', 'KO against percentage', 'Judges decision wins']
filtered_df = final_df[(final_df['RobotName'].isin(top_20_robots)) &
                       (final_df['Year'] == 'Career') &
                       (final_df['Stats'].isin(selected_stats))]

# Pivot the DataFrame for easier plotting
pivot_df = filtered_df.pivot_table(index='RobotName', columns='Stats', values='Value').reset_index()

# Create the pair plots
sns.pairplot(pivot_df, diag_kind='hist', corner=True)
plt.show()


# Jacob

In [None]:
jacob = pd.read_csv('battlebots_wiki.csv', index_col=False)

In [None]:
# created subset of csv dataset
jacob = jacob[['Unnamed: 0', 'reboot_seasons', 'team', 'weapons', 'weight',
       'other_events', 'power']]

In [None]:
# renamed first column
jacob.columns = ['link', 'reboot_seasons', 'team', 'weapons', 'weight',
       'other_events', 'power']

In [None]:
# drop if all columns are null
jacob.dropna(subset=['reboot_seasons', 'team', 'weapons', 'weight', 'other_events', 'power'], how='all', inplace=True)

In [None]:
# drops season if null
jacob = jacob.dropna(subset=['reboot_seasons'])

In [None]:
jacob.shape

In [None]:
jacob

In [None]:
# "~" for dropping nulls in season instead
# learn .where() version of dropping all null columns
# learn renaming columns without reassigning .columns list output