This notebook is designed to automatically output a decent player/team evaluation for each season.

In [36]:
from blaseball_mike import reference as r
from blaseball_mike.models import Team
import json
import pandas as pd
import qgrid

# Get the data

In [37]:
season=13
p_stats = r.get_stats(season=season)

`def get_stats(type_='season', group='hitting,pitching', fields=None, season='current', game_type=None, sort_stat=None, order=None, player_id=None, team_id=None, limit=None)`

Get the stats filtered by team/player/season. Defaults to fetching all stats which is extremely slow, be warned.

Args

**type_** : str
The type of stat split (defaults to season).

**group** : str
The stat groups to return (e.g. hitting,pitching or hitting).

**fields** : list
The stat fields to return (e.g. [strikeouts,home_runs] or [home_runs]).

**season**
The (1-indexed) Blaseball season (or current for current season).

**game_type** : str
The type of game (e.g. R for regular season, P for postseason).

**sort_stat** : str
The stat field to sort on.

**order** : str
The order of the sorted stat field.

**player_id** : str
The ID of a player.

**team_id** : str
The ID of a team to retrieve player stats for.

**limit** : int
The number of rows to return for each field (e.g. 5).

# get stats leaders as well??

# Save the data

In [3]:
import os

season = p_stats[0]["splits"][0]["season"] if season == "current" else season
cwd = os.getcwd()
season_folder = os.path.join(cwd, "Season {}".format(season))
if not os.path.exists(season_folder):
   os.makedirs(season_folder)

with open(season_folder+"/player_stats.json", "w") as f:
    json.dump(p_stats, f)

# Hitting Data

In [38]:
hitting_df = pd.DataFrame([[i["player"]["fullName"]]+[i["player"]["id"]]+[i["team"]["team_abbreviation"]]+[i["team"]["team_id"]]+list(i["stat"].values()) for i in p_stats[0]["splits"]], 
                         columns = ["name","id", "team_abb", "team_id"]+list(p_stats[0]["splits"][0]["stat"].keys()))
q_widget = qgrid.show_grid(hitting_df, show_toolbar=True, grid_options={"forceFitColumns":False, "defaultColumnWidth":50})
q_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': False, 'defa…

# Pitching Data

In [39]:
pitching_df = pd.DataFrame([[i["player"]["fullName"]]+[i["player"]["id"]]+[i["team"]["team_abbreviation"]]+[i["team"]["team_id"]]+list(i["stat"].values()) for i in p_stats[1]["splits"]], 
                         columns = ["name","id", "team_abb", "team_id"]+list(p_stats[1]["splits"][0]["stat"].keys()))

q_widget = qgrid.show_grid(pitching_df, show_toolbar=True, grid_options={"forceFitColumns":False, "defaultColumnWidth":50})
q_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': False, 'defa…

### Note
Some players may have more than one row (this data describes one season). This is because a player may be Feedbacked
to another team in the middle of the season. Note that team names are currently bugged when this happens, so the 
team before and after will have the same name.

Note that "NaN" is an actual player and not an error.

"Duplicate" hitters:

In [40]:
g1 = hitting_df[["name", "team_abb"]].groupby("name").count()
g1.loc[g1["team_abb"] > 1]

Unnamed: 0_level_0,team_abb
name,Unnamed: 1_level_1
Augusta Chadwell,2
Lawrence Horne,2
Raúl Leal,2
Sutton Bishop,2
Sutton Picklestein,2
Theodore Holloway,2


"Duplicate" pitchers:

In [41]:
g2 = pitching_df[["name", "team_abb"]].groupby("name").count()
g2.loc[g2["team_abb"] > 1]

Unnamed: 0_level_0,team_abb
name,Unnamed: 1_level_1
Bright Zimmerman,2
Doc Anice,2
Gabriel Griffith,2
Jaylen Hotdogfingers,3
Lizzy Pasta,2
Mindy Kugel,2
Yosh Carpenter,2


# Season Summary Statistics
## Hitters

In [59]:
stats_frame = hitting_df.drop(columns=["id", "name", "team_abb", "team_id"], axis=1).dropna()
sum_stats = [stats_frame[col].describe().reset_index(drop=True) for col in stats_frame.columns]
p_summary = pd.DataFrame(sum_stats)
p_summary.columns = ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]
q_widget = qgrid.show_grid(p_summary.round(2), show_toolbar=True, grid_options={"forceFitColumns":False, "defaultColumnWidth":100})
q_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': False, 'defa…

### Mins and Maxes

In [61]:
min_max_names = []
for stat in stats_frame.columns:
    min_ = stats_frame[stat].min()
    max_ = stats_frame[stat].max()
    min_max_names.append({"min":min_, "min_name":", ".join([i[0] for i in hitting_df.loc[hitting_df[stat] == stats_frame[stat].min()].values]),
                         "max":max_, "max_name":", ".join([i[0] for i in hitting_df.loc[hitting_df[stat] == stats_frame[stat].max()].values])})
min_max_df = pd.DataFrame(min_max_names, index=stats_frame.columns)
q_widget = qgrid.show_grid(min_max_df.round(2), show_toolbar=True, grid_options={"forceFitColumns":False, "defaultColumnWidth":100})
q_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': False, 'defa…

## Pitchers

In [42]:
stats_frame = pitching_df.drop(columns=["id", "name", "team_abb", "team_id"], axis=1).dropna()
sum_stats = [stats_frame[col].describe().reset_index(drop=True) for col in stats_frame.columns]
p_summary = pd.DataFrame(sum_stats)
p_summary.columns = ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]
q_widget = qgrid.show_grid(p_summary.round(2), show_toolbar=True, grid_options={"forceFitColumns":False, "defaultColumnWidth":100})
q_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': False, 'defa…

### Mins and Maxes

In [56]:
min_max_names = []
for stat in stats_frame.columns:
    min_ = stats_frame[stat].min()
    max_ = stats_frame[stat].max()
    min_max_names.append({"min":min_, "min_name":", ".join([i[0] for i in pitching_df.loc[pitching_df[stat] == stats_frame[stat].min()].values]),
                         "max":max_, "max_name":", ".join([i[0] for i in pitching_df.loc[pitching_df[stat] == stats_frame[stat].max()].values])})
min_max_df = pd.DataFrame(min_max_names, index=stats_frame.columns)
q_widget = qgrid.show_grid(min_max_df.round(2), show_toolbar=True, grid_options={"forceFitColumns":False, "defaultColumnWidth":100})
q_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': False, 'defa…

# Metrics Comparison
## Hitters

In [63]:
rc_df = hitting_df.assign(runs_created = lambda x: ((x.hits+x.walks-x.caught_stealing+x.hit_by_pitches)*
                          (x.total_bases + (.26 * (x.walks+x.hit_by_pitches) )+(.52*( x.sacrifice_bunts+x.sacrifice_flies+x.stolen_bases))))/
                         (x.at_bats+x.walks+x.hit_by_pitches+x.sacrifice_bunts+x.sacrifice_flies) )
                         
rank_compare = pd.DataFrame({"OBS": rc_df[["name", "on_base_slugging"]].sort_values(
                                        by="on_base_slugging", ascending=False)["name"].reset_index(
                                        drop=True), 
                             "OBS_drop": rc_df[rc_df["plate_appearances"] > 5][["name", "on_base_slugging"]].reindex(range(len(rc_df))).sort_values(
                                        by="on_base_slugging", ascending=False)["name"].reset_index(
                                        drop=True),
                            "RC": rc_df[["name", "runs_created"]].sort_values(
                                        by="runs_created",  ascending=False)["name"].reset_index(
                                        drop=True),
                            "BA_RISP": rc_df[["name", "batting_average_risp"]].sort_values(
                                        by="batting_average_risp",  ascending=False)["name"].reset_index(
                                        drop=True)})
q_widget = qgrid.show_grid(rank_compare, show_toolbar=True, grid_options={"forceFitColumns":False, "defaultColumnWidth":150})
q_widget

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': False, 'defa…

## Pitchers