In [None]:
"""
MLB Team & Player Analytics – Streamlit (local + PostgreSQL)
===========================================================
Run locally, stores 10 years of hitting & pitching in PostgreSQL, offers
historical analytics + current stats (refreshable), with Team and Player pages.

Quick start (terminal):
  1) Create a .env file alongside this script with:
        DATABASE_URL=postgresql+psycopg2://postgres:postgres@localhost:5432/mlb
  2) Install requirements (see README or pip install -r requirements.txt)
  3) Initialize DB & load data from the sidebar (Setup & Data) once.
  4) streamlit run app.py

Note: Uses pybaseball for historical data (season aggregates) and Statcast for
recent/current windows. You can re-run refresh tasks at any time.
"""

import os
import time
import datetime as dt
from typing import Optional, Tuple

import pandas as pd
import numpy as np
import streamlit as st
from sqlalchemy import (
    create_engine, Column, Integer, Float, String, Date, DateTime, Boolean,
    UniqueConstraint, text
)
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy.exc import IntegrityError

# Data sources
from pybaseball import batting_stats, pitching_stats
from pybaseball.statcast import statcast_batter, statcast_pitcher
from pybaseball.teamid_lookup import team_ID_map
from pybaseball.playerid_reverse_lookup import playerid_reverse_lookup

# --------------
# Configuration
# --------------
st.set_page_config(page_title="MLB Analytics – Teams & Players", page_icon="⚾", layout="wide")

DATABASE_URL = os.getenv(
    "DATABASE_URL",
    "postgresql+psycopg2://postgres:postgres@localhost:5432/mlb",
)
START_YEAR_DEFAULT = dt.date.today().year - 9  # inclusive window for 10 seasons
END_YEAR_DEFAULT = dt.date.today().year

# --------------
# Database Setup
# --------------
Base = declarative_base()

class Team(Base):
    __tablename__ = "teams"
    team_id = Column(Integer, primary_key=True, autoincrement=False)
    team = Column(String(50), index=True)
    team_abbr = Column(String(10), index=True)
    league = Column(String(10))
    division = Column(String(15))

class Player(Base):
    __tablename__ = "players"
    player_id = Column(Integer, primary_key=True, autoincrement=False)
    name = Column(String(100), index=True)
    bats = Column(String(5))
    throws = Column(String(5))
    primary_pos = Column(String(5))

class BattingSeason(Base):
    __tablename__ = "batting_seasons"
    id = Column(Integer, primary_key=True)
    season = Column(Integer, index=True)
    player_id = Column(Integer, index=True)
    team = Column(String(10), index=True)  # team abbreviation
    # core stats (pybaseball columns may evolve; keep essentials + rates we use)
    G = Column(Integer); PA = Column(Integer); AB = Column(Integer)
    H = Column(Integer); HR = Column(Integer); R = Column(Integer); RBI = Column(Integer)
    BB = Column(Integer); SO = Column(Integer); HBP = Column(Integer)
    SB = Column(Integer); CS = Column(Integer)
    BA = Column(Float); OBP = Column(Float); SLG = Column(Float); OPS = Column(Float)
    WAR = Column(Float)
    last_updated = Column(DateTime, default=dt.datetime.utcnow, index=True)
    __table_args__ = (UniqueConstraint("season", "player_id", "team", name="uq_batting_season"),)

class PitchingSeason(Base):
    __tablename__ = "pitching_seasons"
    id = Column(Integer, primary_key=True)
    season = Column(Integer, index=True)
    player_id = Column(Integer, index=True)
    team = Column(String(10), index=True)
    W = Column(Integer); L = Column(Integer); G = Column(Integer); GS = Column(Integer)
    IP = Column(Float); H = Column(Integer); R = Column(Integer); ER = Column(Integer)
    HR = Column(Integer); BB = Column(Integer); SO = Column(Integer); HBP = Column(Integer)
    ERA = Column(Float); WHIP = Column(Float); FIP = Column(Float)
    WAR = Column(Float)
    last_updated = Column(DateTime, default=dt.datetime.utcnow, index=True)
    __table_args__ = (UniqueConstraint("season", "player_id", "team", name="uq_pitching_season"),)

class RecentWindow(Base):
    __tablename__ = "recent_windows"
    id = Column(Integer, primary_key=True)
    player_id = Column(Integer, index=True)
    is_pitcher = Column(Boolean, index=True)
    start_date = Column(Date, index=True)
    end_date = Column(Date, index=True)
    # Hitting window metrics
    PA = Column(Integer); AB = Column(Integer); H = Column(Integer); HR = Column(Integer)
    R = Column(Integer); RBI = Column(Integer); BB = Column(Integer); SO = Column(Integer)
    BA = Column(Float); OBP = Column(Float); SLG = Column(Float); OPS = Column(Float)
    # Pitching window metrics
    IP = Column(Float); ER = Column(Integer); BF = Column(Integer)
    AVG_EV = Column(Float); K = Column(Integer); BBP = Column(Float)  # placeholder for examples
    last_updated = Column(DateTime, default=dt.datetime.utcnow, index=True)
    __table_args__ = (UniqueConstraint("player_id", "is_pitcher", "start_date", "end_date", name="uq_recent_window"),)

_engine = create_engine(DATABASE_URL, pool_pre_ping=True, future=True)
SessionLocal = sessionmaker(bind=_engine)

@st.cache_resource(show_spinner=False)
def init_db():
    Base.metadata.create_all(_engine)
    return True

# --------------
# Utility
# --------------

def _team_directory() -> pd.DataFrame:
    """Map MLB team abbreviations and ids from pybaseball's team_ID_map."""
    df = team_ID_map()
    # Normalize a minimal set for our use
    keep = ["teamIDBR", "teamIDfg", "teamIDlahman", "teamIDretro", "teamName", "mlbID"]
    df = df.reset_index().rename(columns={"index": "team_abbr"})
    if "mlbID" not in df.columns:
        df["mlbID"] = np.nan
    return df

@st.cache_data(show_spinner=False)
def list_teams_abbr() -> list:
    df = _team_directory()
    abbrs = sorted(df["team_abbr"].dropna().unique().tolist())
    return abbrs

@st.cache_data(show_spinner=False)
def reverse_lookup_players(ids: list[int]) -> pd.DataFrame:
    if not ids:
        return pd.DataFrame(columns=["key_mlbam", "name_first", "name_last"])
    # playerid_reverse_lookup expects list of MLBAM ids
    df = playerid_reverse_lookup(ids)
    df["name"] = df["name_first"].str.title() + " " + df["name_last"].str.title()
    return df

# --------------
# ETL: Historical Seasons
# --------------

def _clean_batting(df: pd.DataFrame) -> pd.DataFrame:
    # Ensure essential columns exist
    for c in ["G","PA","AB","H","HR","R","RBI","BB","SO","HBP","SB","CS","AVG","OBP","SLG","OPS","WAR","IDfg","Name","Team"]:
        if c not in df.columns:
            df[c] = np.nan
    df = df.rename(columns={
        "AVG": "BA",
        "IDfg": "player_id",
        "Name": "name",
        "Team": "team"
    })
    # Datatypes & NA fill
    for c in ["G","PA","AB","H","HR","R","RBI","BB","SO","HBP","SB","CS"]:
        df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0).astype(int)
    for c in ["BA","OBP","SLG","OPS","WAR"]:
        df[c] = pd.to_numeric(df[c], errors="coerce")
    df["team"] = df["team"].fillna("TOT")
    return df

def _clean_pitching(df: pd.DataFrame) -> pd.DataFrame:
    for c in ["W","L","G","GS","IP","H","R","ER","HR","BB","SO","HBP","ERA","WHIP","FIP","WAR","IDfg","Name","Team"]:
        if c not in df.columns:
            df[c] = np.nan
    df = df.rename(columns={
        "IDfg": "player_id",
        "Name": "name",
        "Team": "team"
    })
    # Types
    int_cols = ["W","L","G","GS","H","R","ER","HR","BB","SO","HBP"]
    for c in int_cols:
        df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0).astype(int)
    float_cols = ["IP","ERA","WHIP","FIP","WAR"]
    for c in float_cols:
        df[c] = pd.to_numeric(df[c], errors="coerce")
    df["team"] = df["team"].fillna("TOT")
    return df

@st.cache_data(show_spinner=False)
def fetch_batting_seasons(start_year: int, end_year: int) -> pd.DataFrame:
    frames = []
    for yr in range(start_year, end_year + 1):
        df = batting_stats(yr)
        df["season"] = yr
        frames.append(_clean_batting(df))
    out = pd.concat(frames, ignore_index=True)
    return out

@st.cache_data(show_spinner=False)
def fetch_pitching_seasons(start_year: int, end_year: int) -> pd.DataFrame:
    frames = []
    for yr in range(start_year, end_year + 1):
        df = pitching_stats(yr)
        df["season"] = yr
        frames.append(_clean_pitching(df))
    out = pd.concat(frames, ignore_index=True)
    return out

def upsert_df(df: pd.DataFrame, table: str, engine) -> int:
    """Simple upsert using pandas to_sql to temp table + MERGE via SQL (Postgres >=15) fallback REPLACE strategy."""
    tmp_table = f"tmp_{table}_{int(time.time())}"
    with engine.begin() as conn:
        df.to_sql(tmp_table, conn, index=False, if_exists="replace")
        if table == "batting_seasons":
            merge_sql = f"""
            INSERT INTO batting_seasons (season, player_id, team, G, PA, AB, H, HR, R, RBI, BB, SO, HBP, SB, CS, BA, OBP, SLG, OPS, WAR, last_updated)
            SELECT season, player_id, team, G, PA, AB, H, HR, R, RBI, BB, SO, HBP, SB, CS, BA, OBP, SLG, OPS, WAR, NOW()
            FROM {tmp_table}
            ON CONFLICT (season, player_id, team)
            DO UPDATE SET
                G = EXCLUDED.G, PA = EXCLUDED.PA, AB = EXCLUDED.AB, H = EXCLUDED.H, HR = EXCLUDED.HR,
                R = EXCLUDED.R, RBI = EXCLUDED.RBI, BB = EXCLUDED.BB, SO = EXCLUDED.SO, HBP = EXCLUDED.HBP,
                SB = EXCLUDED.SB, CS = EXCLUDED.CS, BA = EXCLUDED.BA, OBP = EXCLUDED.OBP, SLG = EXCLUDED.SLG,
                OPS = EXCLUDED.OPS, WAR = EXCLUDED.WAR, last_updated = NOW();
            DROP TABLE {tmp_table};
            """
        else:
            merge_sql = f"""
            INSERT INTO pitching_seasons (season, player_id, team, W, L, G, GS, IP, H, R, ER, HR, BB, SO, HBP, ERA, WHIP, FIP, WAR, last_updated)
            SELECT season, player_id, team, W, L, G, GS, IP, H, R, ER, HR, BB, SO, HBP, ERA, WHIP, FIP, WAR, NOW()
            FROM {tmp_table}
            ON CONFLICT (season, player_id, team)
            DO UPDATE SET
                W = EXCLUDED.W, L = EXCLUDED.L, G = EXCLUDED.G, GS = EXCLUDED.GS, IP = EXCLUDED.IP,
                H = EXCLUDED.H, R = EXCLUDED.R, ER = EXCLUDED.ER, HR = EXCLUDED.HR, BB = EXCLUDED.BB,
                SO = EXCLUDED.SO, HBP = EXCLUDED.HBP, ERA = EXCLUDED.ERA, WHIP = EXCLUDED.WHIP,
                FIP = EXCLUDED.FIP, WAR = EXCLUDED.WAR, last_updated = NOW();
            DROP TABLE {tmp_table};
            """
        conn.execute(text(merge_sql))
    return len(df)

# --------------
# ETL: Recent / "Current" Windows (Statcast)
# --------------

@st.cache_data(show_spinner=False)
def fetch_recent_batter(player_id_mlbam: int, start: dt.date, end: dt.date) -> pd.DataFrame:
    try:
        df = statcast_batter(start_dt=start.strftime("%Y-%m-%d"), end_dt=end.strftime("%Y-%m-%d"), player_id=player_id_mlbam)
    except Exception:
        return pd.DataFrame()
    return df

@st.cache_data(show_spinner=False)
def fetch_recent_pitcher(player_id_mlbam: int, start: dt.date, end: dt.date) -> pd.DataFrame:
    try:
        df = statcast_pitcher(start_dt=start.strftime("%Y-%m-%d"), end_dt=end.strftime("%Y-%m-%d"), player_id=player_id_mlbam)
    except Exception:
        return pd.DataFrame()
    return df

# --------------
# UI Components
# --------------

@st.cache_data(show_spinner=False)
def load_team_history(engine, team_abbr: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
    q1 = f"""
    SELECT season, team,
           SUM(G) AS G, SUM(PA) AS PA, SUM(AB) AS AB, SUM(H) AS H, SUM(HR) AS HR,
           SUM(R) AS R, SUM(RBI) AS RBI, SUM(BB) AS BB, SUM(SO) AS SO,
           ROUND(SUM(AB*BA)/NULLIF(SUM(AB),0), 3) AS BA,
           ROUND(SUM(PA*OBP)/NULLIF(SUM(PA),0), 3) AS OBP,
           ROUND(SUM(AB*SLG)/NULLIF(SUM(AB),0), 3) AS SLG,
           ROUND(SUM(PA*OPS)/NULLIF(SUM(PA),0), 3) AS OPS,
           ROUND(SUM(WAR)::numeric, 2) AS WAR
    FROM batting_seasons
    WHERE team = :team
    GROUP BY season, team
    ORDER BY season;
    """
    q2 = f"""
    SELECT season, team,
           SUM(G) AS G, SUM(GS) AS GS, ROUND(SUM(IP)::numeric, 1) AS IP,
           SUM(SO) AS SO, SUM(BB) AS BB, SUM(HR) AS HR,
           ROUND(SUM(ER)::numeric, 0) AS ER,
           ROUND(SUM(WAR)::numeric, 2) AS WAR,
           ROUND(SUM(ER)::numeric*9/NULLIF(SUM(IP),0), 2) AS ERA,
           ROUND((SUM(BB)+SUM(H)) / NULLIF(SUM(IP),0), 2) AS WHIP
    FROM pitching_seasons
    WHERE team = :team
    GROUP BY season, team
    ORDER BY season;
    """
    with engine.begin() as conn:
        bat = pd.read_sql(text(q1), conn, params={"team": team_abbr})
        pit = pd.read_sql(text(q2), conn, params={"team": team_abbr})
    return bat, pit

@st.cache_data(show_spinner=False)
def search_players(engine, query: str, is_pitcher: Optional[bool] = None) -> pd.DataFrame:
    like = f"%{query.lower()}%"
    q = "SELECT DISTINCT player_id, team FROM batting_seasons WHERE 1=1"
    if is_pitcher is True:
        q = "SELECT DISTINCT player_id, team FROM pitching_seasons WHERE 1=1"
    elif is_pitcher is None:
        # union
        q = "SELECT player_id, team FROM (SELECT DISTINCT player_id, team FROM batting_seasons UNION SELECT DISTINCT player_id, team FROM pitching_seasons) t"
    q = f"SELECT p.player_id, p.team, pl.name FROM ({q}) p LEFT JOIN players pl ON p.player_id = pl.player_id"
    with engine.begin() as conn:
        df = pd.read_sql(text(q), conn)
    if "name" not in df.columns or df["name"].isna().all():
        # fallback resolve names through reverse-lookup if missing
        m = reverse_lookup_players(df["player_id"].dropna().astype(int).tolist())
        df = df.merge(m[["key_mlbam","name"]], how="left", left_on="player_id", right_on="key_mlbam").drop(columns=["key_mlbam"]) 
    if query:
        df = df[df["name"].str.lower().str.contains(query.lower(), na=False)]
    return df.sort_values("name")

@st.cache_data(show_spinner=False)
def player_history(engine, player_id: int) -> Tuple[pd.DataFrame, pd.DataFrame]:
    with engine.begin() as conn:
        bat = pd.read_sql(text("SELECT * FROM batting_seasons WHERE player_id=:pid ORDER BY season"), conn, params={"pid": player_id})
        pit = pd.read_sql(text("SELECT * FROM pitching_seasons WHERE player_id=:pid ORDER BY season"), conn, params={"pid": player_id})
    return bat, pit

# --------------
# Pages
# --------------

def page_setup_and_data():
    st.header("Setup & Data Admin")
    st.caption("Configure DB, initialize schema, and (re)load data.")

    ok = init_db()
    st.success("Database schema is ready.")

    with st.expander("Database connection"):
        st.code(DATABASE_URL, language="bash")

    colA, colB = st.columns(2)
    with colA:
        st.subheader("Load Historical Batting")
        sy = st.number_input("Start season", value=START_YEAR_DEFAULT, min_value=1900, max_value=END_YEAR_DEFAULT)
        ey = st.number_input("End season", value=END_YEAR_DEFAULT, min_value=sy, max_value=END_YEAR_DEFAULT)
        if st.button("Fetch + Upsert Batting Seasons", use_container_width=True):
            with st.spinner("Fetching batting seasons..."):
                df = fetch_batting_seasons(sy, ey)
            st.write(df.head())
            with st.spinner("Upserting into PostgreSQL..."):
                n = upsert_df(
                    df[["season","player_id","team","G","PA","AB","H","HR","R","RBI","BB","SO","HBP","SB","CS","BA","OBP","SLG","OPS","WAR"]],
                    "batting_seasons",
                    _engine,
                )
            st.success(f"Inserted/updated {n} batting rows.")

    with colB:
        st.subheader("Load Historical Pitching")
        sy2 = st.number_input("Start season (Pitching)", value=START_YEAR_DEFAULT, min_value=1900, max_value=END_YEAR_DEFAULT, key="sy2")
        ey2 = st.number_input("End season (Pitching)", value=END_YEAR_DEFAULT, min_value=sy2, max_value=END_YEAR_DEFAULT, key="ey2")
        if st.button("Fetch + Upsert Pitching Seasons", use_container_width=True):
            with st.spinner("Fetching pitching seasons..."):
                df = fetch_pitching_seasons(sy2, ey2)
            st.write(df.head())
            with st.spinner("Upserting into PostgreSQL..."):
                n = upsert_df(
                    df[["season","player_id","team","W","L","G","GS","IP","H","R","ER","HR","BB","SO","HBP","ERA","WHIP","FIP","WAR"]],
                    "pitching_seasons",
                    _engine,
                )
            st.success(f"Inserted/updated {n} pitching rows.")

    st.divider()
    st.subheader("Optional: Build Player Directory")
    st.caption("Populates `players` table for nicer names (best-effort).")
    if st.button("Populate Players from current DB", use_container_width=True):
        with _engine.begin() as conn:
            ids = pd.read_sql(text("SELECT DISTINCT player_id FROM (SELECT player_id FROM batting_seasons UNION SELECT player_id FROM pitching_seasons) x WHERE player_id IS NOT NULL"), conn)
        ids_list = ids["player_id"].dropna().astype(int).tolist()
        names = reverse_lookup_players(ids_list)
        names = names.rename(columns={"key_mlbam": "player_id"})
        names["name"] = names["name"].fillna(names["name_first"].str.title() + " " + names["name_last"].str.title())
        keep = names[["player_id","name"]].drop_duplicates()
        with _engine.begin() as conn:
            tmp = "tmp_players"
            keep.to_sql(tmp, conn, if_exists="replace", index=False)
            conn.execute(text("""
                INSERT INTO players (player_id, name)
                SELECT player_id, name FROM tmp_players
                ON CONFLICT (player_id) DO UPDATE SET name = EXCLUDED.name;
                DROP TABLE tmp_players;
            """))
        st.success(f"Upserted {len(keep)} player names.")

    st.info("If you're starting fresh: run Batting, then Pitching, then Populate Players.")


def page_team_dashboard():
    st.header("Team Perspective")
    teams = list_teams_abbr()
    team = st.selectbox("Select Team", teams, index=teams.index("LAD") if "LAD" in teams else 0)
    bat, pit = load_team_history(_engine, team)

    if bat.empty and pit.empty:
        st.warning("No data yet. Visit Setup & Data to load seasons.")
        return

    c1, c2 = st.columns(2)
    with c1:
        st.subheader("Hitting – OPS & HR by Season")
        st.line_chart(bat.set_index("season")["OPS"], height=240)
        st.bar_chart(bat.set_index("season")["HR"], height=240)
    with c2:
        st.subheader("Pitching – ERA & SO by Season")
        st.line_chart(pit.set_index("season")["ERA"], height=240)
        st.bar_chart(pit.set_index("season")["SO"], height=240)

    st.subheader("Team Snapshot (last 10 seasons)")
    years = bat["season"].unique()
    cols = ["BA","OBP","SLG","OPS","WAR","H","HR","R","RBI","BB","SO"]
    st.dataframe(bat.set_index("season")[cols].style.format({k: "{:.3f}" for k in ["BA","OBP","SLG","OPS"]}), use_container_width=True)

    st.subheader("Leaderboards – Top Players by WAR (Team, last 10 years)")
    q = text("""
        SELECT season, player_id, team, WAR
        FROM batting_seasons WHERE team=:t AND season >= :minyr
        UNION ALL
        SELECT season, player_id, team, WAR
        FROM pitching_seasons WHERE team=:t AND season >= :minyr
    """)
    minyr = max(bat["season"].min() if not bat.empty else END_YEAR_DEFAULT-9, END_YEAR_DEFAULT-9)
    with _engine.begin() as conn:
        dfw = pd.read_sql(q, conn, params={"t": team, "minyr": int(minyr)})
    names = reverse_lookup_players(dfw["player_id"].dropna().astype(int).unique().tolist())
    names = names.rename(columns={"key_mlbam":"player_id"})
    dfw = dfw.merge(names[["player_id","name"]], how="left", on="player_id")
    top = dfw.groupby(["player_id","name"], dropna=False)["WAR"].sum().sort_values(ascending=False).head(15).reset_index()
    st.dataframe(top, use_container_width=True)


def page_player_dashboard():
    st.header("Player Perspective")
    q = st.text_input("Search player (by name substring)")
    df = search_players(_engine, q)
    if q and df.empty:
        st.warning("No matches yet; load data or try another query.")
        return
    if df.empty:
        st.info("Start typing a name to search across hitters & pitchers.")
        return
    options = {f"{r.name} (id {int(r.player_id)})": int(r.player_id) for r in df.itertuples()}
    label = st.selectbox("Pick player", list(options.keys()))
    pid = options[label]

    bat, pit = player_history(_engine, pid)
    tabs = st.tabs(["Hitting", "Pitching", "Recent Window"])

    with tabs[0]:
        if bat.empty:
            st.info("No hitting seasons found.")
        else:
            st.subheader("Seasonal OPS & HR")
            st.line_chart(bat.set_index("season")["OPS"], height=240)
            st.bar_chart(bat.set_index("season")["HR"], height=240)
            st.subheader("Career Batting Summary")
            agg = bat.agg({
                "G":"sum","PA":"sum","AB":"sum","H":"sum","HR":"sum","R":"sum","RBI":"sum","BB":"sum","SO":"sum","WAR":"sum"
            })
            rates = pd.Series({
                "BA": (bat["AB"]*bat["BA"]).sum()/max(1, bat["AB"].sum()),
                "OBP": (bat["PA"]*bat["OBP"]).sum()/max(1, bat["PA"].sum()),
                "SLG": (bat["AB"]*bat["SLG"]).sum()/max(1, bat["AB"].sum()),
                "OPS": (bat["PA"]*bat["OPS"]).sum()/max(1, bat["PA"].sum()),
            })
            tbl = pd.concat([agg, rates]).to_frame(name="value")
            st.dataframe(tbl.T, use_container_width=True)

    with tabs[1]:
        if pit.empty:
            st.info("No pitching seasons found.")
        else:
            st.subheader("Seasonal ERA & SO")
            st.line_chart(pit.set_index("season")["ERA"], height=240)
            st.bar_chart(pit.set_index("season")["SO"], height=240)
            st.subheader("Career Pitching Summary")
            agg = pit.agg({
                "W":"sum","L":"sum","G":"sum","GS":"sum","IP":"sum","SO":"sum","BB":"sum","HR":"sum","ER":"sum","WAR":"sum"
            })
            rates = pd.Series({
                "ERA": (pit["ER"].sum()*9)/max(1e-9, pit["IP"].sum()),
                "WHIP": (pit["BB"].sum()+pit["H"].sum())/max(1e-9, pit["IP"].sum()),
            })
            tbl = pd.concat([agg, rates]).to_frame(name="value")
            st.dataframe(tbl.T, use_container_width=True)

    with tabs[2]:
        st.caption("Pull recent Statcast performance over a date range. Requires MLBAM id (FanGraphs id commonly maps; if missing, try manual).")
        today = dt.date.today()
        start = st.date_input("Start date", today - dt.timedelta(days=14))
        end = st.date_input("End date", today)
        is_pitch = st.toggle("Treat as pitcher (Statcast pitcher endpoint)", value=not pit.empty)
        mlbam_id = pid  # heuristic; many tables align; user can override
        mlbam_id = st.number_input("MLBAM Player ID (if different)", value=int(mlbam_id))
        if st.button("Fetch Recent", use_container_width=True):
            if is_pitch:
                dfrec = fetch_recent_pitcher(mlbam_id, start, end)
            else:
                dfrec = fetch_recent_batter(mlbam_id, start, end)
            if dfrec.empty:
                st.warning("No Statcast data found in this window.")
            else:
                st.success(f"Fetched {len(dfrec)} Statcast rows.")
                st.dataframe(dfrec.head(100), use_container_width=True)

# --------------
# Sidebar Navigation
# --------------

PAGES = {
    "Setup & Data": page_setup_and_data,
    "Team": page_team_dashboard,
    "Player": page_player_dashboard,
}

with st.sidebar:
    st.title("⚾ MLB Analytics")
    st.caption("Local app • PostgreSQL • 10-year history")
    page = st.radio("Navigate", list(PAGES.keys()), index=0)
    st.markdown("---")
    st.caption("Tips:\n- Load data in Setup first.\n- Then explore Teams & Players.")

# Create schema on import
init_db()

# Route
PAGES[page]()


ModuleNotFoundError: No module named 'langchain'

In [4]:
from pybaseball import batting_stats, pitching_stats
# from pybaseball.statcast import statcast_batter, statcast_pitcher
# from pybaseball.teamid_lookup import team_ID_map
# from pybaseball.playerid_reverse_lookup import playerid_reverse_lookup

In [9]:
batting_stats(2025, 2026)

Unnamed: 0,IDfg,Season,Name,Team,Age,G,AB,PA,H,1B,...,maxEV,HardHit,HardHit%,Events,CStr%,CSW%,xBA,xSLG,xwOBA,L-WAR
0,15640,2025,Aaron Judge,NYY,33,110,398,487,134,70,...,118.1,153,0.554,276,0.146,0.298,,,,7.3
4,21534,2025,Cal Raleigh,SEA,28,117,441,518,108,47,...,115.2,154,0.507,304,0.108,0.253,,,,6.6
1,19755,2025,Shohei Ohtani,LAD,30,118,461,546,131,65,...,117.9,184,0.577,319,0.145,0.288,,,,5.5
33,25764,2025,Bobby Witt Jr.,KCR,25,118,465,515,133,75,...,115.7,176,0.474,371,0.139,0.256,,,,5.3
55,27769,2025,Pete Crow-Armstrong,CHC,23,116,452,485,116,54,...,111.8,141,0.412,342,0.080,0.247,,,,4.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134,21523,2025,Jonathan India,KCR,28,110,419,476,99,67,...,110.8,125,0.365,342,0.223,0.291,,,,-0.7
149,16376,2025,Michael Conforto,LAD,32,105,321,374,60,33,...,112.9,99,0.423,234,0.168,0.275,,,,-0.4
123,11737,2025,Nick Castellanos,PHI,33,116,445,475,115,76,...,110.2,121,0.348,348,0.091,0.240,,,,0.0
155,23395,2025,Eric Wagaman,MIA,27,106,374,406,86,57,...,113.5,134,0.450,298,0.192,0.293,,,,-0.7
