In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Pitcher

In [None]:
##Gathering data
pitching = pd.read_csv('baseballdatabank-2022.2/core/Pitching.csv')
pitching = pitching[['playerID', 'yearID', 'G','IPouts', 'BFP', 'ERA']]
players = pd.read_csv('baseballdatabank-2022.2/core/People.csv')
players = players [['playerID', 'nameFirst', 'nameLast']]
pitchers = pd.merge(players, pitching, on='playerID')

##ERA per year per pitcher from original data
sns.scatterplot(data=pitchers, x='yearID', y='ERA', s=5)

In [None]:
##Grouping data by pitcher and summing games, BFP, and ERA
pitcherERA = pitchers.groupby(['playerID']).agg({'G':'sum', 'IPouts':'sum', 'BFP':'sum', 'ERA':'mean'}).reset_index()
sns.lineplot(data=pitchers, x='yearID', y='ERA')
plt.ylim([1, 15])

In [None]:
##Removing players with 1 game or IPout
pitchersOutliers = pitchers[(pitchers.G > 1) & (pitchers.IPouts > 1)]
sns.lineplot(data=pitchersOutliers, x='yearID', y='ERA')
plt.ylim([1, 15])

In [None]:
##20 best pitchers by ERA exluding outliers from above
pitchersHighGames = pitchersOutliers[(pitchersOutliers.G > 25)]
best20Pitchers = pitchersHighGames.sort_values(by='ERA', ascending=True).head(20)

##Lowest ERA(more pitches with unsucessful hits) per pitcher with more than 25 games (played at least part of a season)
sns.barplot(data=best20Pitchers, x='playerID', y='ERA')
plt.xticks(rotation=60)

# Manager

In [None]:
##Gathering data
teams = pd.read_csv('baseballdatabank-2022.2/core/Teams.csv')
teams = teams[['teamID','yearID','G','W','L']]
managers = pd.read_csv('baseballdatabank-2022.2/core/Managers.csv')
managers = managers[['playerID','teamID', 'rank']]
teamManagers = pd.merge(managers, teams, on='teamID')

##Manager wins per year from original data
sns.scatterplot(data=teamManagers, x='yearID', y='W', s=5)

In [None]:
##Determine best manager after season length 162 games
managerWins1961 = teamManagers[(teamManagers.yearID > 1960)]
managerWins1961 = managerWins1961.groupby(['playerID']).agg({'G':'sum', 'W':'sum'}).reset_index()
managerWins1961['winAvg'] = managerWins1961['W'] / managerWins1961['G']
best20ManagerWins = managerWins1961.sort_values(by='winAvg', ascending=False).head(20)

##Top 20 managers with highest win average for season lasting 162 games
sns.barplot(data=best20ManagerWins, x='playerID', y='winAvg')
plt.xticks(rotation=60)

In [None]:
##Determine average wins per manager for all MLB
teamManagers['winAvg'] = teamManagers['W'] / teamManagers['G']
managerWins = teamManagers.sort_values(by='winAvg', ascending=False).head(20)

##Top 20 (turned out to be 9) managers with best win average for all of MLB
sns.barplot(data=managerWins, x='playerID', y='winAvg')
plt.xticks(rotation=60)