In [None]:
import manim
import manim_ml
import random

## NBA Player Predictions 

This project is centered around generating predictions that can provide an edge when betting on the player prop lines set by DraftKings for categories like points, rebounds, assists, and three pointers made.

This project has many different moving parts including gathering data from apis and web scraping. Cleaning this data and processing it through the pipeline to generate predictions to then be displayed on a user facing dashboard. Throughout this notebook will go through each part of the process breaking down what is going on behind the scenes throughout the pipeline.


#### Gathering Data

In [None]:

%%manim -ql -v WARNING DataFlowScene


class DataFlowScene(Scene):
    def construct(self):

        nba = Rectangle().set_color(RED).scale(0.5)
        nba_text = Text("NBA.com", font_size=24)
        nba_text.move_to(nba.get_center())
        nba_group = Group(nba, nba_text).move_to(UP * 2+LEFT * 4)

        espn = Rectangle().set_color(ORANGE).scale(0.5)
        espn_text = Text("ESPN.com", font_size=24)
        espn_text.move_to(espn.get_center())
        espn_group = Group(espn, espn_text).move_to(UP * 2)

        dk = Rectangle().set_color(GREEN).scale(0.5)
        dk_text = Text("DraftKings", font_size=24).next_to(dk, RIGHT)
        dk_text.move_to(dk.get_center())
        dk_group = Group(dk, dk_text).move_to(UP * 2+RIGHT * 4)

        data_processing = Rectangle(width=3, height = 1).set_color(YELLOW_B)
        dp_text = Text("Data Processing",font_size=24).next_to(data_processing,DOWN)
        dp_text.move_to(data_processing.get_center())
        dp_group = Group(data_processing, dp_text).move_to(DOWN * 2)

        database = Rectangle(width=4, height=2).set_color(BLUE)
        db_text = Text("Processed Data", font_size=24).next_to(database, DOWN)
        db_text.move_to(database.get_center())
        database_group = Group(database, db_text).move_to(DOWN * 8)
        
        nba_start = nba.get_edge_center(DOWN)
        nba_corner = np.array([nba_start[0], data_processing.get_edge_center(LEFT)[1], 0])  # bend point
        nba_end = data_processing.get_edge_center(LEFT)

        nba_segment1 = Line(nba_start, nba_corner).set_color(RED)
        nba_segment2 = Line(nba_corner, nba_end).add_tip().set_color(RED)

        espn_start = espn.get_edge_center(DOWN)
        espn_corner = np.array([espn_start[0], data_processing.get_edge_center(UP)[1], 0])  # bend point
        espn_end = data_processing.get_edge_center(UP)

        espn_segment = Line(espn_start, espn_end).add_tip().set_color(ORANGE)

        dk_start = dk.get_edge_center(DOWN)
        dk_corner = np.array([dk_start[0], data_processing.get_edge_center(RIGHT)[1], 0])  # bend point
        dk_end = data_processing.get_edge_center(RIGHT)

        dk_segment1 = Line(dk_start, dk_corner).set_color(GREEN)
        dk_segment2 = Line(dk_corner, dk_end).add_tip().set_color(GREEN)

        self.play(FadeIn(nba_group, dk_group,espn_group))
        self.play(FadeIn(dp_group))
        self.play(FadeIn(database), FadeIn(db_text))
        self.play(Create(nba_segment1), Create(nba_segment2),Create(dk_segment1),Create(dk_segment2),Create(espn_segment))

        self.play(FadeToColor(dk_segment1,color=WHITE),
        FadeToColor(dk_segment2,color=WHITE),
        FadeToColor(dk_group,color=WHITE),
        FadeToColor(espn_group,color=WHITE),
        FadeToColor(espn_segment,color=WHITE))

        self.play(FadeOut(dk_segment1),FadeOut(dk_segment2),FadeOut(espn_segment))
        self.wait(.5)
        self.play(dk_group.animate.shift(UP * 4),espn_group.animate.shift(UP*4))
        self.wait(3)
        self.play(FadeOut(nba_segment1),FadeOut(nba_segment2))
        # self.play(dp_group.animate.shift(UP * 4),database_group.animate.shift(UP * 6),nba_group.animate.shift(UP * 4))
        data_processing_target_width = 1
        data_processing_target_height = 3

        database_target_width = 2
        database_target_height = 4

        # New positions
        nba_target_pos = DOWN * .02  + LEFT * 5
        target_nba = Rectangle(width=2.5, height=5).set_color(RED)
        target_nba.move_to(nba.get_center())  # Keep it in place
        target_nba_text = Text("NBA.com", font_size=24).move_to(target_nba.get_center())
        target_nba_group = Group(target_nba, target_nba_text).move_to(nba_target_pos)

        dp_target_pos = DOWN * .02
        target_data_processing = Rectangle(width=2.5, height=5).set_color(YELLOW_B)
        target_data_processing.move_to(data_processing.get_center())  # Keep it in place
        target_dp_text = Text("Data\nProcessing", font_size=24).move_to(target_data_processing.get_center())
        target_dp_group = Group(target_data_processing, target_dp_text).move_to(dp_target_pos)

        db_target_pos = RIGHT * 5 
        target_database = Rectangle(width=2.5, height=5).set_color(BLUE)
        target_database.move_to(database.get_center())  # Keep it in place
        target_db_text = Text("Database", font_size=24).move_to(target_database.get_center())
        target_db_group = Group(target_database,target_db_text).move_to(db_target_pos)
        # Animate transformation
        self.play(Transform(dp_group, target_dp_group),Transform(database_group,target_db_group), Transform(nba_group,target_nba_group), run_time=2)
        
        for _ in range(5):  # Number of bursts
            num_dots = 1
            animations = []

            for _ in range(num_dots):
                y_offset = random.uniform(-1.5, 1.5)
                start = data_processing.get_edge_center(RIGHT) + RIGHT * 0.5 + UP * y_offset
                end = database.get_edge_center(LEFT) + UP * y_offset * 0.3

                nba_start = nba.get_edge_center(RIGHT) + RIGHT * 0.5 + UP * y_offset
                nba_end = data_processing.get_edge_center(LEFT) + UP *  y_offset * .03

                path = Line(start, end)

                nba_path = Line(nba_start, nba_end)

                nba_dot = Dot(radius=0.07, color=WHITE).move_to(nba_start)
                self.add(nba_dot)

                data_dot = Dot(radius=0.07, color=WHITE).move_to(start)
                # Sequence: move ➝ scale + fade ➝ remove
            anim = Succession(
                # 1. Move NBA dot and scale it down
                MoveAlongPath(nba_dot, nba_path, rate_func=smooth),
                nba_dot.animate.scale(0.01),

                # 2. Add the data dot AFTER nba_dot finishes
                AnimationGroup(
                    # use `self.add(data_dot)` dynamically with lambda
                    Succession(
                        # Add data_dot only when this part begins
                        Wait(0),  # Hacky no-op to inject the dot
                        MoveAlongPath(data_dot, path, rate_func=smooth),
                        data_dot.animate.scale(0.01),
                    ),
                    lag_ratio=0
                )
            )

            # Instead of adding it up front, add dynamically during animation
            self.add(data_dot)  # Optional: move inside a delayed function for cleanest timing
            animations.append(anim)


            self.play(*animations, run_time=0.8)
            self.wait(0.3)



Above is an animation showcasing how data is gathered using 3 main sources. These sources being the NBA.com api, DraftKings.com and ESPN.com

We will first focus on the NBA data as this is the foundation of this project.

In [None]:

%%manim -ql -v WARNING TableHighlight

class TableHighlight(Scene):

    def construct(self):
        # Parameters
        num_rows = 4
        table_width = 6
        table_height = 4

        model_title = Text("Modeling Dataset Feature Calculation",font_size=24)
        model_title.to_edge(UP)
        self.play(FadeIn(model_title))

        # Draw table outline
        table = Rectangle(width=table_width, height=table_height)
        self.play(Create(table))

        # Draw horizontal lines to divide into rows
        row_height = table_height / num_rows
        row_lines = VGroup()
        for i in range(1, num_rows):
            y = table.get_bottom()[1] + i * row_height
            line = Line(
                start=[table.get_left()[0], y, 0],
                end=[table.get_right()[0], y, 0]
            )
            row_lines.add(line)

        self.play(Create(row_lines))

        # Highlight the first 3 rows
        highlights = VGroup()
        for i in range(3):  # First 3 rows
            y_center = table.get_top()[1] - (i + 0.5) * row_height
            highlight = SurroundingRectangle(
                Rectangle(width=table_width, height=row_height).move_to([0, y_center, 0]),
                color=YELLOW,
                buff=0.05
            )
            highlights.add(highlight)
        
        self.play(Create(highlights))
        self.wait(3)
        self.play(FadeOut(highlights,model_title))

        
        predict_title = Text("Prediction Dataset Feature Calculation",font_size=24)
        predict_title.to_edge(UP)
        self.play(FadeIn(predict_title))

        highlights = VGroup()
        for i in range(3):  # First 3 rows
            y_center = table.get_bottom()[1] + (i + 0.5) * row_height
            highlight = SurroundingRectangle(
                Rectangle(width=table_width, height=row_height).move_to([0, y_center, 0]),
                color=YELLOW,
                buff=0.05
            )
            highlights.add(highlight)
        
        self.play(Create(highlights))
        self.wait(3)

We first begin by getting the past 10 years of NBA data from the NBA.com API. The reason for behind the choice for 10 years was simply we wanted as much data as we could get while also bearing in mind how the game has changed as far as in the way it is played. The 3pt revolution first started in 2015 so this felt like a good place to start.

The data we get from API is then past through a script that cleans both the team data and player data. The approach that is used for cleaning the prediction data compared to the data that is modeling is treated differently. Specifically, features that are calculated like rolling averages or season averages up to that data point need to be treated differently as to prevent data leakage. 

Above is a visualization of the differences in how these are calculated in both datasets as reading it or saying it out loud can be confusing. The highlighted rows are calculated to then output the bottom row which is representing the most recent data point. 

Example:

Prediction Data Calculation

`data.rolling(3,min_periods=3).mean()`

Modeling Data Calculation

`data.shift(1).rolling(3,min_periods=3).mean()`


We utilize the shift(1) function to only gather the previous 3 data points from the current data point. 

This data includes the standard box score stats like minutes played, points, shooting efficiency from three, free throws and all around field goal percentage, assists, and rebounds. Additionally, some advanced stats like plus minus are also provided from these data sets. 

The main features that are calculated at the time of cleaning the data. These Feature are:
-   feature_3_game_rolling_avg
-   feature_season_avg
-   feature_momentum (calculated as feature_3_game_rolling_avg - feature_season_avg)


In [None]:

%%manim -ql -v WARNING Espn

class Espn(Scene):
    def construct(self):
        espn = Rectangle().set_color(ORANGE).scale(0.5)
        espn_text = Text("ESPN.com", font_size=24)
        espn_text.move_to(espn.get_center())
        espn_group = Group(espn, espn_text).move_to(UP * 2)

        
        schedule = Rectangle(width=7).set_color(RED).scale(0.5)
        schedule_text = Text("NBA Schedule", font_size=24)
        schedule_text.move_to(schedule.get_center())
        schedule_group = Group(schedule, schedule_text).move_to(DOWN * 2)
        
        roster = Rectangle(width=4).set_color(BLUE).scale(0.5)
        roster_text = Text("Roster", font_size=24)
        roster_text.move_to(roster.get_center())
        roster_group = Group(roster, roster_text).move_to(DOWN * 2 + LEFT * 4)

        logos = Rectangle(width=4).set_color(GREEN).scale(0.5)
        logos_text = Text("Logos", font_size=24)
        logos_text.move_to(logos.get_center())
        logos_group = Group(logos, logos_text).move_to(DOWN * 2 + RIGHT * 4)

        espn_start = espn.get_edge_center(DOWN)
        espn_corner = np.array([espn_start[0], schedule.get_edge_center(UP)[1], 0])  # bend point
        
        espn_end = schedule.get_edge_center(UP)
        espn_segment = Line(espn_start, espn_end).add_tip().set_color(RED)



        roster_start = espn.get_edge_center(LEFT)
        roster_corner = np.array([roster.get_edge_center(UP)[0], roster_start[1], 0])  # go left, then down
        roster_end = roster.get_edge_center(UP)

        roster_segment1 = Line(roster_start, roster_corner).set_color(BLUE)
        roster_segment2 = Line(roster_corner, roster_end).add_tip().set_color(BLUE)

        logos_start = espn.get_edge_center(RIGHT)
        logos_corner = np.array([logos.get_edge_center(UP)[0], logos_start[1], 0])  # go left, then down
        logos_end = logos.get_edge_center(UP)

        logos_segment1 = Line(logos_start, logos_corner).set_color(GREEN)
        logos_segment2 = Line(logos_corner, logos_end).add_tip().set_color(GREEN)

        self.play(FadeIn(espn_group))
        self.play(FadeIn(roster_group),FadeIn(logos_group),FadeIn(schedule_group),Create(espn_segment),Create(roster_segment1),Create(roster_segment2),Create(logos_segment1),Create(logos_segment2))
        self.wait(10)

Next we will look into what we are gathering from ESPN.com

We use web scraping from ESPN.com in multiple different ways throughout the pipeline. 

First, when running our predictions script for a given day, we go out and retrieve the schedule to see what teams are playing that day. This also let's us see the matchup that day which is valuable to gather the opposing teams stats from our dataset.

Second, the NBA is a volatile league like any sport where players are moved from team to team at a moments notice. Seeing what teams are playing then passing this through our scraping script for rosters allows us to stay up to date about what team stats should be used for each player.

Lastly, we scraped each NBA logo and the NBA logo itself from ESPN.com as it provided an easy source to do so. This is only used in our dashboard.


In [None]:

%%manim -ql -v WARNING Espn

class Espn(Scene):
    def construct(self):

        dk = Rectangle().set_color(GREEN).scale(0.5)
        dk_text = Text("DraftKings", font_size=24).next_to(dk, RIGHT)
        dk_text.move_to(dk.get_center())
        dk_group = Group(dk, dk_text).move_to(UP * 2)

        
        line = Rectangle(width=4).set_color(PURPLE).scale(0.5)
        line_text = Text("Line", font_size=24)
        line_text.move_to(line.get_center())
        line_group = Group(line, line_text).move_to(DOWN * 2)
        
        over = Rectangle(width=4).set_color(BLUE).scale(0.5)
        over_text = Text("Over", font_size=24)
        over_text.move_to(over.get_center())
        over_group = Group(over, over_text).move_to(DOWN * 2 + LEFT * 4)

        under = Rectangle(width=4).set_color(RED).scale(0.5)
        under_text = Text("Under", font_size=24)
        under_text.move_to(under.get_center())
        under_group = Group(under, under_text).move_to(DOWN * 2 + RIGHT * 4)

        line_start = dk.get_edge_center(DOWN)
        line_corner = np.array([dk.get_edge_center(UP)[0], line_start[1], 0])  # go left, then down
        line_end = line.get_edge_center(UP)

        line_segment1 = Line(line_start, line_corner).set_color(PURPLE)
        line_segment2 = Line(line_corner, line_end).add_tip().set_color(PURPLE)

        over_start = dk.get_edge_center(LEFT)
        over_corner = np.array([over.get_edge_center(UP)[0], over_start[1], 0])  # go left, then down
        over_end = over.get_edge_center(UP)

        over_segment1 = Line(over_start, over_corner).set_color(BLUE)
         
        over_segment2 = Line(over_corner, over_end).add_tip().set_color(BLUE)

        under_start = dk.get_edge_center(RIGHT)
        under_corner = np.array([under.get_edge_center(UP)[0], under_start[1], 0])  # go left, then down
        under_end = under.get_edge_center(UP)

        under_segment1 = Line(under_start, under_corner).set_color(RED)
         
        under_segment2 = Line(under_corner, under_end).add_tip().set_color(RED)
        self.play(FadeIn(dk_group))
        self.play(FadeIn(line_group),FadeIn(over_group),FadeIn(under_group),Create(line_segment1),Create(line_segment2),Create(over_segment1),Create(over_segment2),Create(under_segment1),Create(under_segment2))
        self.wait(10)

Our final data source is DraftKings. 

From here, each day we gather the lines,over and under for each player in the categories of points, assists, rebounds, and threes made. 

The line is set at a number that we then need to determine whether to bet if a player is going to fall above or above this line for the next game. 

Example: 16.5 points

The over will have our odds if we decide to bet the over. The over tells us what either what we will receive if we bet $100 and are correct in the case of + or how much we need to bet to receive $100 if we are correct.

Example:

+120 means we would receive $120 for a $100 bet if we are correct.

-120 means we would need to bet $120 to receive $100 in return if we are correct.

### Modeling

In [None]:

%%manim -ql -v WARNING modeling

class modeling(Scene):
    def construct(self):

        model = Rectangle(height=11,width=6).set_color(GREEN).scale(0.5)
        model_text = Text("Modeling Data", font_size=24).next_to(model, RIGHT)
        model_text.move_to(model.get_center())
        model_group = Group(model, model_text).move_to(LEFT * 5)
        

        linear = Rectangle(width = 6).set_color(RED).scale(0.5)
        linear_text = Text("Linear Regression", font_size=24).next_to(model, RIGHT)
        linear_text.move_to(linear.get_center())
        linear_group = Group(linear, linear_text).move_to(UP * 2)

        lgb = Rectangle(width=6).set_color(BLUE).scale(0.5)
        lgb_text = Text("LightGBM", font_size=24).next_to(model, RIGHT)
        lgb_text.move_to(lgb.get_center())
        lgb_group = Group(lgb, lgb_text).move_to(DOWN*2)

        ensemble = Rectangle(height=11,width=6).set_color(PURPLE).scale(0.5)
        ensemble_text = Text("Ensemble Model", font_size=24).next_to(ensemble, RIGHT)
        ensemble_text.move_to(ensemble.get_center())
        ensemble_group = Group(ensemble, ensemble_text).move_to(RIGHT * 5)

        self.play(FadeIn(model_group),FadeIn(linear_group),FadeIn(lgb_group),FadeIn(ensemble_group))

        model_to_linear = Line(model_group.get_right(), linear_group.get_left())
        model_to_lgb = Line(model_group.get_right(), lgb_group.get_left())

        # 2. Create lines from linear & lgb to ensemble
        linear_to_ensemble = Line(linear_group.get_right(), ensemble_group.get_left())
        lgb_to_ensemble = Line(lgb_group.get_right(), ensemble_group.get_left())

        self.play(Create(model_to_linear), Create(model_to_lgb))
        self.play(Create(linear_to_ensemble), Create(lgb_to_ensemble))

        # 3. Create and animate dots moving along the lines in sync
        dot1 = Dot(color=WHITE).move_to(model_to_linear.get_start())
        dot2 = Dot(color=WHITE).move_to(model_to_lgb.get_start())
        self.add(dot1, dot2)

        self.play(
            MoveAlongPath(dot1, model_to_linear),
            MoveAlongPath(dot2, model_to_lgb),
            run_time=1
        )

        dot3 = Dot(color=WHITE).move_to(linear_to_ensemble.get_start())
        dot4 = Dot(color=WHITE).move_to(lgb_to_ensemble.get_start())
        self.add(dot3, dot4)

        self.play(
            MoveAlongPath(dot3, linear_to_ensemble),
            MoveAlongPath(dot4, lgb_to_ensemble),
            run_time=1
        )

        self.wait(1)

The modeling approach that was taken involved linear regression and then combining this with a light gradient boosting machine or better known as LightGBM. LightGBM is a fast, tree-based boosting model. 

We then combine these two using a meta-model in our case ridge regression. Ridge regression is a regularized linear model that learns the optimal weights to combine these two based models.

Using these two models allows us to capture different aspects of the data and the meta-model allows us to learn how much to trust each model. 

Imagine LightGBM as complex expert who sees all the nonlinear patterns and linear regression as the calm, simple expert that does not overreact to patterns. The meta-model then acts as a sort of moderator who listens to both sides and then decides.

Example:

linear regression outputs 2

lightgbm outputs 3 

the weights for both are .5
we then do the simple equation below

`2 * .5 + 3 * .5 = 2.5`

So our ensemble output would be 2.5

This allows us to leverage the strengths of both models and hopefully reduces the weaknesses if we were to only use one of these models.

All of this is then saved to a pkl to be used later.

### Generating Predictions

In [None]:

%%manim -ql -v WARNING predictions 

class predictions(Scene):
    def construct(self):
        # --- Rectangles and Labels ---
        model = Rectangle(height=11, width=6).set_color(GREEN).scale(0.5)
        model_text = Text("Saved Model", font_size=24).move_to(model.get_center())
        model_group = Group(model, model_text).move_to(RIGHT)

        odds = Rectangle(width=9).set_color(RED).scale(0.5)
        odds_text = Text("Today's Odds", font_size=24).move_to(odds.get_center())
        odds_group = Group(odds, odds_text).move_to(LEFT * 4 + UP * 2)

        espn = Rectangle(width=9).set_color(ORANGE).scale(0.5)
        espn_roster = Text("Current Schedule/Roster", font_size=24).move_to(espn.get_center())
        espn_group = Group(espn, espn_roster).move_to(LEFT * 4)

        game_data = Rectangle(width=9).set_color(BLUE).scale(0.5)
        game_data_text = Text("Most Recent Game Data", font_size=24).move_to(game_data.get_center())
        game_data_group = Group(game_data, game_data_text).move_to(LEFT * 4 + DOWN * 2)

        dashboard = Rectangle(height=11, width=6).set_color(YELLOW).scale(0.5)
        dashboard_text = Text("Dashboard", font_size=24).move_to(dashboard.get_center())
        dashboard_group = Group(dashboard, dashboard_text).move_to(RIGHT * 5)

        # --- Show All Boxes ---
        self.play(
            FadeIn(model_group, shift=UP),
            FadeIn(odds_group, shift=LEFT),
            FadeIn(game_data_group, shift=LEFT),
            FadeIn(espn_group, shift=LEFT),
            FadeIn(dashboard_group, shift=RIGHT)
        )

        # --- Create Lines Between Boxes ---
        odds_to_model = Line(odds_group.get_right(), model_group.get_left())
        game_to_model = Line(game_data_group.get_right(), model_group.get_left())
        espn_to_model = Line(espn_group.get_right(), model_group.get_left())
        model_to_dashboard = Line(model_group.get_right(), dashboard_group.get_left())

        self.play(Create(odds_to_model), Create(game_to_model), Create(espn_to_model))
        self.play(Create(model_to_dashboard))

        # --- Create Dots (Inputs) and Animate Them Together ---
        dot1 = Dot(color=WHITE).move_to(odds_to_model.get_start())
        dot2 = Dot(color=WHITE).move_to(game_to_model.get_start())
        dot3 = Dot(color=WHITE).move_to(espn_to_model.get_start())
        self.add(dot1, dot2, dot3)

        self.play(
            MoveAlongPath(dot1, odds_to_model),
            MoveAlongPath(dot2, game_to_model),
            MoveAlongPath(dot3, espn_to_model),
            run_time=1
        )

        # --- Output Dot from Model to Dashboard ---
        dot4 = Dot(color=WHITE).move_to(model_to_dashboard.get_start())
        self.add(dot4)
        self.play(MoveAlongPath(dot4, model_to_dashboard), run_time=1)

        self.wait(10)


This then culminates into our predictions script. This runs after we collect the odds every day. 

We pull in the current games for that day and then to pull in that days schedule. We then pull in the data necessary to run our models.

We then pull in the odds for that day and filter our data down to the players necessary. We then run this data through the model.

The model then uploads these predictions to the dashboard for users to observe and use for their bets that day.