In [2]:
from dash import Dash, dcc, html
from dash.dependencies import Input, Output, State
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import pandas as pd
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from plotly.offline import plot
import numpy as np
from bball_data_handling import data, teams, team_dict, team_color_map, team_names_df

In [3]:

external_stylesheets = [
    {
        "href": (
            "https://fonts.googleapis.com/css2?"
            "family=Lato:wght@400;700&display=swap"
        ),
        "rel": "stylesheet",
    },
]
app = Dash(__name__, external_stylesheets=external_stylesheets)
app.title = "Baseball Dashboard: Learn More About Your Favorite Teams!"

app.layout = html.Div(
    children=[
        html.Div(
            children=[
                html.P(children="âš¾", className="header-emoji"),
                html.H1(
                    children="Baseball Analytics", className="header-title"
                ),
                html.P(
                    children=(
                        "EDA for baseball teams"
                    ),
                    className="header-description",
                ),
            ],
            className="header",
        ),
        html.Div(
            children=[
                html.Div(
                    children=[
                        html.Div(children="All Teams", className="menu-title"),

                        dcc.Checklist(
                          id="my-checklist",
                          options=team_dict,
                          value=["NYA"],
                          className='my_box_container',
                          labelStyle={"display": "inline-block"},
                        )
                    ]
                ),
                html.Div(
                    children=[
                        html.Div(
                            children="Date Range", className="menu-title"
                        ),
                        dcc.DatePickerRange(
                            id="date-range",
                            min_date_allowed=data["Date"].min(),
                            max_date_allowed=data["Date"].max(),
                            start_date=data["Date"].min(),
                            end_date=data["Date"].max(),
                        ),
                    ]
                ),
            ],
            className="menu",
        ),
        html.Div(
            children=[
                html.Div(
                    children=dcc.Graph(
                        id="win-chart",
                        config={"displayModeBar": False},
                    ),
                    className="card",
                ),
                html.Div(
                    children=dcc.Graph(
                        id="diff-chart",
                        config={"displayModeBar": False},
                    ),
                    className="card",
                ),
                html.Div(
                    children=[
                        html.Div(
                            children="Best/Worst Record through the first N Games:", className="menu-title"
                        ),
                        dcc.Input(
                            id='range',
                            type='number',
                            min=1,
                            max=162,
                            value=5,
                            step=1
                        ),
                    ]
                ),
                html.Div(
                    children=dcc.Graph(
                        id="best-start",
                        config={"displayModeBar": False},
                    ),
                    className="card",
                ),
                html.Div(
                    children=dcc.Graph(
                        id="worst-start",
                        config={"displayModeBar": False},
                    ),
                    className="card",
                ),
                html.Div(
                    children=dcc.Graph(
                        id="best-stretch-chart",
                        config={"displayModeBar": False},
                    ),
                    className="card",
                ),
                html.Div(
                    children=dcc.Graph(
                        id="worst-stretch-chart",
                        config={"displayModeBar": False},
                    ),
                    className="card",
                )
            ],
            className="wrapper",
        ),
    ]
)



@app.callback(
    Output("win-chart", "figure"),
    Output("diff-chart", "figure"),
    Output("best-start","figure"),
    Output("worst-start", "figure"),
    Output("best-stretch-chart","figure"),
    Output("worst-stretch-chart", "figure"),
    Input("my-checklist","value"),
    Input("date-range", "start_date"),
    Input("date-range", "end_date"),
    Input("range","value")
)

def update_charts(options, start_date, end_date, num_games):
    
    # Chart for Win Total and Point Differential Total over massive time span
    
    # Determine the Teams and the Relevant Time Range
    filtered_data = data[(data['Team'].isin(options)) & (data['Date'] >= start_date) & (data['Date'] <= end_date)]
    filtered_data = filtered_data.reset_index(drop = True)
    
    # Idea: Instead of using Date for X-Axis, we create "Game No." variable; we must:
    # 1. Create a Dataframe for every game a team played in the time frame, and get the index
    # 2. Stack these dataframes together
    # 3. Get the index of this finalized dataframe
    # 4. Convert it to list so that we can add it as its own category
    filtered_data['Game No.'] = list(pd.concat([pd.DataFrame(filtered_data[filtered_data["Team"] == team][['Team','Differential']].reset_index(drop=True).reset_index()['index']) for team in options])['index'])
    
    # Start at 1, not 0
    filtered_data['Game No.'] = filtered_data['Game No.'] + 1
    filtered_data["Year"] = filtered_data['Date'].dt.year
    
    reg_df = filtered_data.sort_values(['Team Name', 'Game No.'], ascending = True)


    # Determine the number of Wins up to a certain game/date
    reg_df["Wins"] = reg_df[["Team","Result"]].groupby(by=["Team"]).cumsum()
    

    # Incorporate W-L visual for our charts
    reg_df["Losses"] = reg_df["Game No."] - reg_df["Wins"]
    reg_df["Win-Loss"] = reg_df["Wins"].astype(str) + '-' + reg_df["Losses"].astype(str)
    
    # Determine the run differential up to a certain game/date
    reg_df["Run Differential"] = reg_df[["Team","Differential"]].groupby(by=["Team"]).cumsum()
    
    # We can repeat this process for various counting stats (# of hits, rolling averages, etc.)
    
    # Make sure it is ordered

    win_chart_figure = px.line(reg_df, x="Game No.", y="Wins", color="Team Name",
                               title='Total Wins in Time Period', color_discrete_map=team_color_map,
                              hover_data={'Game No.': False,'Date':True, 'Win-Loss':True,'Wins':False})
    
    diff_chart_figure = px.line(reg_df, x="Game No.", y="Run Differential", color="Team Name",
                                title="Team's Run Differential in this Stretch", color_discrete_map=team_color_map,
                               hover_data={'Game No.': False,"Date":True})

    
    # Charts for Best/Worst start to a season for each team
    

    # Game No: Same idea as before, but instead of just team we must look at every team-year combination
    filtered_data['Game No.'] = list(pd.concat([pd.DataFrame(
        filtered_data[(filtered_data["Team"] == team) & (filtered_data["Year"] == year)][
            ['Team Name', 'Year', 'Differential']].reset_index(drop=True).groupby(
            by=["Team Name", "Year"]).cumsum().reset_index()['index']) for team in options for year in range(1990, 2024)])['index'])
    filtered_data['Game No.'] = filtered_data['Game No.'] + 1
   # filtered_data.sort_values(['Team Name', 'Game No.'], ascending = True, inplace = True)

    # Focus on the first N games (N is what you define in the Dashboard)
    first_n_games = filtered_data[filtered_data['Game No.'] <= num_games]
    
    # Create "Wins" columns by cumulatively adding wins across the first N games
    first_n_games["Wins"] = first_n_games[["Team Name", "Year", "Result"]].groupby(by=["Team Name", "Year"]).cumsum()
    
    nth_game = first_n_games[first_n_games["Game No."] == num_games]
    
    # Find the Most Wins through N games 
    # max_wins_df contains a team and the year it had its most wins through N games, nth_game is data for just the nth game
    max_wins = nth_game[["Team Name","Year", "Wins"]].sort_values(["Wins","Year"],ascending=False).drop_duplicates(subset=["Team Name"], keep="first")[["Team Name","Year"]]
    
    # Include Season in Team Name, makes merging easier
    max_wins["Team Name"] = max_wins['Year'].astype(str) + ' ' + max_wins['Team Name'].astype(str)

    # Same Idea but for worst start
    min_wins = nth_game[["Team Name","Year", "Wins"]].sort_values(["Wins","Year"],ascending=[True,False]).drop_duplicates(subset=["Team Name"], keep="first")[["Team Name","Year"]]    
    min_wins["Team Name"] = min_wins['Year'].astype(str) + ' ' + min_wins['Team Name'].astype(str)
    
    # Trick to ensure we have correct color scheme for each team
    first_n_games['color'] = first_n_games['Team Name'].apply(lambda x: team_color_map[x])
    first_n_games["Team Name"] = first_n_games['Year'].astype(str) + ' ' + first_n_games['Team Name'].astype(str)
    color_map = first_n_games[['Team Name','color']].drop_duplicates().set_index("Team Name").to_dict()['color']
    
    # Incorporate W-L visual for our charts
    first_n_games["Losses"] = first_n_games["Game No."] - first_n_games["Wins"]
    first_n_games["Win-Loss"] = first_n_games["Wins"].astype(str) + '-' + first_n_games["Losses"].astype(str)
    
    # Create indicators for the appropriate season
    first_n_games['best_start_bool'] = first_n_games['Team Name'].apply(lambda x: 1 if x in list(max_wins["Team Name"]) else 0)
    first_n_games['worst_start_bool'] = first_n_games['Team Name'].apply(lambda x: 1 if x in list(min_wins["Team Name"]) else 0)

    best_start_df = first_n_games[first_n_games['best_start_bool'] == 1]
    worst_start_df = first_n_games[first_n_games['worst_start_bool'] == 1]

    best_start_figure = px.line(best_start_df, x="Game No.", y="Wins", color="Team Name", color_discrete_map=color_map,
                               hover_data={'Game No.': False, 'Wins': False, 'Win-Loss': True},
                               title="Best Start to Season (through first N games)")

    worst_start_figure = px.line(worst_start_df, x="Game No.", y="Wins", color="Team Name", color_discrete_map=color_map,
                                hover_data={'Game No.': False, 'Wins': False, 'Win-Loss': True},
                                 title="Worst Start to Season (through first N games)")
    
    # Same idea, but now best/worst stretch through n games (doesn't have to be at the start)
    filtered_data['Rolling Wins'] = filtered_data[["Team Name", "Year", "Result"]].groupby(by=["Team Name", "Year"]).rolling(num_games).sum().reset_index()['Result']
    max_wins = filtered_data[["Team Name","Date", "Rolling Wins"]].sort_values(["Rolling Wins","Date"],ascending=False).drop_duplicates(subset=["Team Name"], keep="first")[["Team Name","Date"]]
    max_wins["Team Merge"] = max_wins['Date'].astype(str) + ' ' + max_wins['Team Name'].astype(str)
    
    min_wins = filtered_data[["Team Name","Date", "Rolling Wins"]].sort_values(["Rolling Wins","Date"],ascending=[True,False]).drop_duplicates(subset=["Team Name"], keep="first")[["Team Name","Date"]]
    min_wins["Team Merge"] = min_wins['Date'].astype(str) + ' ' + min_wins['Team Name'].astype(str)
    
    
    max_indices = [list(range(x,x-num_games,-1)) for x in list(filtered_data.iloc[max_wins.index].index)]
    max_indices = [j for i in max_indices for j in i]
    
    min_indices = [list(range(x,x-num_games,-1)) for x in list(filtered_data.iloc[min_wins.index].index)]
    min_indices = [j for i in min_indices for j in i]
    
    win_streak = filtered_data[filtered_data.index.isin(max_indices)]
    win_streak['Game No.'] = list(pd.concat([pd.DataFrame(
            win_streak[(win_streak["Team"] == team) & (win_streak["Year"] == year)][
                ['Team Name', 'Year', 'Differential']].reset_index(drop=True).groupby(
                by=["Team Name", "Year"]).cumsum().reset_index()['index']) for team in options for year in range(1990, 2024)])['index'])
    win_streak['Game No.'] = win_streak['Game No.'] + 1
   # win_streak.sort_values(['Team Name', 'Game No.'], ascending = True, inplace = True)

    win_streak["Wins"] = win_streak[["Team Name", "Year", "Result"]].groupby(by=["Team Name", "Year"]).cumsum()
    win_streak["Losses"] = win_streak["Game No."] - win_streak["Wins"]
    win_streak["Win-Loss"] = win_streak["Wins"].astype(str) + '-' + win_streak["Losses"].astype(str)

    best_stretch_figure = px.line(win_streak, x="Game No.", y="Wins", color="Team Name",color_discrete_map = team_color_map,
                                hover_data={'Game No.': False, 'Wins': False, 'Win-Loss': True, 'Date' : True},
                                title="Most Wins in N game stretch (single season)")
    
    losing_streak = filtered_data[filtered_data.index.isin(min_indices)]
    losing_streak['Game No.'] = list(pd.concat([pd.DataFrame(
            losing_streak[(losing_streak["Team"] == team) & (losing_streak["Year"] == year)][
                ['Team Name', 'Year', 'Differential']].reset_index(drop=True).groupby(
                by=["Team Name", "Year"]).cumsum().reset_index()['index']) for team in options for year in range(1990, 2024)])['index'])
    losing_streak['Game No.'] = losing_streak['Game No.'] + 1
   # losing_streak.sort_values(['Team Name', 'Game No.'], ascending = True, inplace = True)

    losing_streak["Wins"] = losing_streak[["Team Name", "Year", "Result"]].groupby(by=["Team Name", "Year"]).cumsum()
    losing_streak["Losses"] = losing_streak["Game No."] - losing_streak["Wins"]
    losing_streak["Win-Loss"] = losing_streak["Wins"].astype(str) + '-' + losing_streak["Losses"].astype(str)

    worst_stretch_figure = px.line(losing_streak, x="Game No.", y="Wins", color="Team Name",color_discrete_map = team_color_map,
                                hover_data={'Game No.': False, 'Wins': False, 'Win-Loss': True, 'Date' : True},
                                 title="Fewest Wins in N game stretch (single season)")
    
    return win_chart_figure, diff_chart_figure, best_start_figure, worst_start_figure, best_stretch_figure, worst_stretch_figure


if __name__ == "__main__":
    app.run_server(debug=True,use_reloader=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: on




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [None]:
%tb

In [None]:
options = ['NYA','CLE']
start_date=data["Date"].min()
end_date=data["Date"].max()
filtered_data = data[(data['Team'].isin(options)) & (data['Date'] >= start_date) & (data['Date'] <= end_date)]
filtered_data = filtered_data.reset_index(drop=True)
filtered_data["Wins"] = filtered_data[["Team Name", "Year", "Result"]].groupby(by=["Team Name", "Year"]).cumsum()

filtered_data['Rolling Wins'] = filtered_data[["Team Name", "Year", "Result"]].groupby(by=["Team Name", "Year"]).rolling(min_periods=22, window=22).sum().reset_index()['Result']

# filtered_data['Game No.'] = list(pd.concat([pd.DataFrame(
#         filtered_data[(filtered_data["Team"] == team) & (filtered_data["Year"] == year)][
#             ['Team Name', 'Year', 'Differential']].reset_index(drop=True).groupby(
#             by=["Team Name", "Year"]).cumsum().reset_index()['index']) for team in options for year in range(1990, 2024)])['index'])
# filtered_data['Game No.'] = filtered_data['Game No.'] + 1

In [None]:
max_wins = filtered_data[["Team Name","Date", "Rolling Wins"]].sort_values(["Rolling Wins","Date"],ascending=False).drop_duplicates(subset=["Team Name"], keep="first")[["Team Name","Date"]]
max_wins["Team Merge"] = max_wins['Date'].astype(str) + ' ' + max_wins['Team Name'].astype(str)
a = [list(range(x,x-22,-1)) for x in list(filtered_data.iloc[max_wins.index].index)]
a = [j for i in a for j in i]
filtered_data = filtered_data[filtered_data.index.isin(a)]

In [None]:
filtered_data
filtered_data['Game No.'] = list(pd.concat([pd.DataFrame(
        filtered_data[(filtered_data["Team"] == team) & (filtered_data["Year"] == year)][
            ['Team Name', 'Year', 'Differential']].reset_index(drop=True).groupby(
            by=["Team Name", "Year"]).cumsum().reset_index()['index']) for team in options for year in range(1990, 2024)])['index'])
filtered_data['Game No.'] = filtered_data['Game No.'] + 1

filtered_data["Wins"] = filtered_data[["Team Name", "Year", "Result"]].groupby(by=["Team Name", "Year"]).cumsum()
filtered_data["Losses"] = filtered_data["Game No."] - filtered_data["Wins"]
filtered_data["Win-Loss"] = filtered_data["Wins"].astype(str) + '-' + filtered_data["Losses"].astype(str)
    

In [None]:
worst_chart_figure = px.line(filtered_data, x="Game No.", y="Wins", color="Team Name",
                                hover_data={'Game No.': False, 'Wins': False, 'Win-Loss': True, 'Date' : True})

In [None]:
worst_chart_figure