TODO: 
- Filter out the last year for every player
- Save a new result csv with batting average for every player by year
- Edit the home page to display those results. I guess? 

In [25]:
import polars as pl
players = (
    pl.scan_parquet("../../parquets/allplayers.parquet")
    .with_columns(
        (pl.col("first") + pl.lit(" ") + pl.col("last")).alias("name"),
        pl.count("season").over("id").alias("count"),
    )
    .filter(pl.col("count") > 5)
    .select(
        pl.col("name"),
        pl.col("id"),
        )
)
players.collect()

name,id
str,str
"""John Anderson""","""andej101"""
"""Jimmy Barrett""","""barrj103"""
"""Shad Barry""","""barrs101"""
"""Shad Barry""","""barrs101"""
"""Harry Bay""","""bay-h101"""
…,…
"""Christian Yelich""","""yelic001"""
"""Alex Young""","""youna002"""
"""Alex Young""","""youna002"""
"""Rob Zastryzny""","""zastr001"""


In [26]:
import polars as pl
batting = (
    pl.scan_parquet("../../parquets/batting.parquet")
    .with_columns(
        pl.col("date").cast(pl.String)
        .str.strptime(pl.Date, "%Y%m%d")
        .dt.year()
        .alias("year")
    )
    .group_by(
        "id", "year"
    )
    .agg(pl.col("b_h").sum(), pl.col("b_ab").sum())
    .with_columns((pl.col("b_h") / pl.col("b_ab")).alias("avg"))
    .filter(pl.col("b_ab") > 100)
    .with_columns(
        pl.count("year").over("id").alias("count")
    )
    .filter(pl.col("id")=="freef001")
    # .select(
    #     pl.col("id").alias("unique_id"),
    #     pl.col("year").alias("ds"),
    #     pl.col("avg").alias("y"),
    # )
).collect()
batting

id,year,b_h,b_ab,avg,count
str,i32,i64,i64,f64,u32
"""freef001""",2018,195,636,0.306604,14
"""freef001""",2021,198,658,0.300912,14
"""freef001""",2014,176,608,0.289474,14
"""freef001""",2017,135,440,0.306818,14
"""freef001""",2012,143,544,0.262868,14
…,…,…,…,…,…
"""freef001""",2022,204,627,0.325359,14
"""freef001""",2015,115,416,0.276442,14
"""freef001""",2020,85,257,0.330739,14
"""freef001""",2011,161,571,0.281961,14


In [5]:
import statsforecast.models
models = [
    statsforecast.models.AutoARIMA(),
    # statsforecast.models.AutoETS(),
    # statsforecast.models.AutoRegressive(10),
    # statsforecast.models.HoltWinters(),
    # statsforecast.models.HistoricAverage(),
]

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# Instantiate StatsForecast class as sf
sf = statsforecast.StatsForecast( 
    models=models,
    freq=1, 
    n_jobs=-1,
    verbose=True,
)

In [7]:
forecasts_df = sf.forecast(df=batting, h=1, level=[90])
forecasts_df.head()

Forecast: 100%|██████████| 1/1 [00:00<00:00, 15.42it/s]


unique_id,ds,AutoARIMA,AutoARIMA-lo-90,AutoARIMA-hi-90
str,i64,f64,f64,f64
"""freef001""",2025,0.300002,0.265624,0.33438


In [8]:
forecasts_df

unique_id,ds,AutoARIMA,AutoARIMA-lo-90,AutoARIMA-hi-90
str,i64,f64,f64,f64
"""freef001""",2025,0.300002,0.265624,0.33438


In [9]:
results = (
    forecasts_df
    .join(pl.read_parquet("../../parquets/allplayers.parquet").unique("id"), left_on="unique_id", right_on="id", how="left")
    .with_columns((pl.col("first") + pl.lit(" ") + pl.col("last")).alias("name"))
    .select(
        "name",
        "unique_id",
        "ds",
        "AutoARIMA",
        "AutoARIMA-lo-90",
        "AutoARIMA-hi-90",
    )
    )
results.write_parquet("arima_forecasts.parquet")
results

name,unique_id,ds,AutoARIMA,AutoARIMA-lo-90,AutoARIMA-hi-90
str,str,i64,f64,f64,f64
"""Freddie Freeman""","""freef001""",2025,0.300002,0.265624,0.33438
