In [1]:
#we want to create the team portion of the predictor 

In [203]:
from sqlalchemy import create_engine, text
import pandas as pd
import math

#function for recent form
#offensive and defensice ratings 
#rolling averages for the last 5, 10, 15 game windows 
#@params : teamName= 'Atlanta Hawks', date = '2025-02-20'
def calculate_recent_form_scores(engine, date, teamName):

    games_df = pd.DataFrame()
    
    with engine.connect() as conn:
        with conn.begin():
            query = text("""
                with recent_games as (
                	select *, 
                	ROW_NUMBER() OVER (
                		PARTITION BY "TEAM_ID"
                		ORDER BY "GAME_DATE" DESC
                	) as game_number
                from "2024-25_historic_game_data"
                )
                select 
                	"TEAM_ID",
                	"TEAM_NAME",
                	"GAME_ID",
                	"GAME_DATE",
                    "TEAM_ABBREVIATION",
                    "FT_PCT"
                from recent_games
                where "TEAM_NAME" = :team_name
                and game_number <= :window
                ORDER BY "GAME_DATE" DESC;
            """)
            query2 = text("""
                select * 
                from "2024-25_team_advanced_game_data"
                where "GAME_ID" = :game_id
            """)
            recent_games = pd.read_sql_query(query, engine, params={'team_name': teamName, 'window': 15})
            gameIds = recent_games['GAME_ID'].tolist()
            abvs = recent_games['TEAM_ABBREVIATION'].tolist()
            abv = abvs[0]

        #query all the games from the games table: 2024-25_team_advanced_game_data   
        for i in range(15):
            gameId = gameIds[i]
            curr_game = pd.read_sql_query(query2, engine, params={'game_id' : gameId})
            individual_team_df = curr_game.loc[curr_game['TEAM_ABBREVIATION'] == abv]
            games_df = pd.concat([games_df, individual_team_df], ignore_index=True)

    #now we want to calculate rolling averages for the most recent games 
    def calculate_net_rating(games_df):
        netRatingFive = 0
        netRatingTen = 0
        netRatingFifteen = 0
        #5 window
        for index, row in games_df.iloc[0:5].iterrows():
            netRatingFive += row["NET_RATING"]
        netRatingFive = netRatingFive/5
        #print(netRatingFive)
            
        #10 window
        for index, row in games_df.iloc[0:10].iterrows():
            netRatingTen += row["NET_RATING"]
        netRatingTen = netRatingTen/10
        #print(netRatingTen)
    
        #15 window 
        # for index, row in games_df.iloc[0:15].iterrows():
        #     netRatingFifteen += row["NET_RATING"]
        # netRatingFifteen = netRatingFifteen/15
        # print(netRatingFifteen)
    
        netRating = (0.6 * netRatingTen) + (0.4 * netRatingFive)
        print(f"Net Rating Score: {netRating}")
        
        return netRating
        
    def calculate_four_factors(games_df, recent_games):
        #calculate the 4 factors in 5 game range 
        ftr5 = 0
        oreb5 = 0
        dreb5 = 0
        tov5 = 0
        efg5  = 0
        for index, row in games_df.iloc[0:5].iterrows():
            oreb5  += row["OREB_PCT"]
            dreb5  += row["DREB_PCT"]
            tov5 += row["TM_TOV_PCT"]
            efg5  += row["EFG_PCT"]

        for index, row in recent_games.iloc[0:5].iterrows():
            ftr5 += row["FT_PCT"]

        ftr5 = ftr5/5
        oreb5 = oreb5/5
        dreb5 = dreb5/5
        tov5 = tov5/5
        efg5  = efg5/5

        #perform same operation for a 10 game frame 
        ftr10 = 0
        oreb10 = 0
        dreb10 = 0
        tov10 = 0
        efg10 = 0
        for index, row in games_df.iloc[0:10].iterrows():
            oreb10  += row["OREB_PCT"]
            dreb10  += row["DREB_PCT"]
            tov10 += row["TM_TOV_PCT"]
            efg10  += row["EFG_PCT"]

        for index, row in recent_games.iloc[0:5].iterrows():
            ftr10 += row["FT_PCT"]
        ftr10 = ftr10/10
        oreb10 = oreb10/10
        dreb10 = dreb10/10
        tov10 = tov10/10
        efg10 = efg10/10
        
        FreeThrowRate = (0.6 * ftr10) + (0.4 * ftr5)
        OREB_PCT = (0.6 * oreb10) + (0.4 * oreb5)
        DREB_PCT = (0.6 * dreb10) + (0.4 * dreb5)
        TOV = ((0.6 * tov10) + (0.4 * tov5))/100
        EFG = (0.6 * efg10) + (0.4 * efg5)
        # print(FreeThrowRate)
        # print(OREB_PCT)
        # print(DREB_PCT)
        # print(TOV)
        # print(EFG)
        fourFactorsScore = (0.6 * EFG) - (0.25*TOV) + ((0.1*OREB_PCT) + (0.1*DREB_PCT)) + (0.15 * FreeThrowRate)
        print(f"Recent Four Factors Score: {fourFactorsScore}")
        return fourFactorsScore
        
    recentNet = calculate_net_rating(games_df)
    recentFour = calculate_four_factors(games_df, recent_games)

    recentFormScore = (0.75 * recentNet) + (0.25 * recentFour)
    
    return recentFormScore
    
engine = create_engine('postgresql://chase:yourpassword@localhost:5433/TeamData')
recentFormScore = calculate_recent_form_scores(engine, '2025-02-20', 'Atlanta Hawks')
print(recentFormScore)

Net Rating Score: 0.7040000000000001
Recent Four Factors Score: 0.476151
0.64703775


In [240]:
#functin for 4 factors 
from sqlalchemy import create_engine, text
import pandas as pd
import math

#function for 4 factors 
#offensive and defensice ratings 
#rolling averages for the last 5, 10, 15 game windows 
#@params : teamName= 'Atlanta Hawks', date = '2025-02-20'
def calculate_net_four(engine, date, teamName):

    games_df = pd.DataFrame()
    
    with engine.connect() as conn:
        with conn.begin():
            query = text("""
                 select 
                	"TEAM_ID",
                	"TEAM_NAME",
                	"GAME_ID",
                	"GAME_DATE",
                	"TEAM_ABBREVIATION",
                	"FT_PCT"
                from "2024-25_historic_game_data"
                where "TEAM_NAME" = :team_name
                and "GAME_DATE" < :date
                ORDER BY "GAME_DATE" DESC;
            """)
            query2 = text("""
                select * 
                from "2024-25_team_advanced_game_data"
                where "GAME_ID" = :game_id
            """)
            recent_games = pd.read_sql_query(query, engine, params={'team_name': teamName, 'date' : date})
            gameIds = recent_games['GAME_ID'].tolist()
            abvs = recent_games['TEAM_ABBREVIATION'].tolist()
            abv = abvs[0]

            for gameId in gameIds:
                #gameId = gameIds[i]
                curr_game = pd.read_sql_query(query2, engine, params={'game_id' : gameId})
                individual_team_df = curr_game.loc[curr_game['TEAM_ABBREVIATION'] == abv]
                games_df = pd.concat([games_df, individual_team_df], ignore_index=True)

    def calculate_net_rating(games_df):
        netRating = 0
        #iterate through all rows to average netRating
        for index, row in games_df.iterrows():
            netRating += row["NET_RATING"]
        netRating = netRating/len(games_df)
        
        print(f"Net Rating Score: {netRating}")
        return netRating

    def calculate_four_factors(games_df, recent_games):
        FreeThrowRate = 0
        OREB_PCT = 0
        DREB_PCT = 0
        TOV = 0
        EFG = 0
        for index, row in games_df.iterrows():
            OREB_PCT += row["OREB_PCT"]
            DREB_PCT += row["DREB_PCT"]
            TOV += row["TM_TOV_PCT"]
            EFG += row["EFG_PCT"]
        
        for index, row in recent_games.iterrows():
            FreeThrowRate += row["FT_PCT"]

        OREB_PCT = OREB_PCT/len(games_df)
        DREB_PCT = DREB_PCT/len(games_df)
        TOV = TOV/len(games_df)
        EFG = EFG/len(games_df)
        FreeThrowRate = FreeThrowRate/len(recent_games)
        
        fourFactorsScore = (0.6 * EFG) - (0.25*TOV) + ((0.1*OREB_PCT) + (0.1*DREB_PCT)) + (0.15 * FreeThrowRate)
        print(f"Recent Four Factors Score: {fourFactorsScore}")
        return fourFactorsScore

    seasonNetRating = calculate_net_rating(games_df)
    seasonFourFactors = calculate_four_factors(games_df, recent_games)
    
    return seasonNetRating, seasonFourFactors
    
engine = create_engine('postgresql://chase:yourpassword@localhost:5433/TeamData') 
teamNetRating, seasonFourFactors = calculate_net_four(engine, '2025-1-20', 'Atlanta Hawks')

Net Rating Score: -2.3285714285714283
Recent Four Factors Score: -3.247013392857143


In [295]:
from sqlalchemy import create_engine, text
import pandas as pd
import math
from nba_api.stats.static import teams
from datetime import datetime

yearsList = ['2024-25', '2023-24', '2022-23', '2021-22', '2020-21', '2019-20']

#historic head to head matchups 
def calculate_historic_matchups(engine, date, goodTeam, badTeam):
    
    yearsList = ['2024-25', '2023-24', '2022-23', '2021-22', '2020-21', '2019-20']

    tableName = f"{yearsList[0]}_team_advanced_game_data"
    
    nba_teams = teams.get_teams()
    team_mapping = {}
    
    for team in nba_teams:
        team_mapping[team['full_name']] = team['abbreviation']
    team_mapping['Brooklyn Nets'] = 'BRK'
    team_mapping['Phoenix Suns'] = 'PHO'
    team_mapping['Charlotte Hornets'] = 'CHO'

    goodAbv = team_mapping[goodTeam]
    badAbv = team_mapping[badTeam]
    print(goodAbv)
    print(badAbv)

    games_df = pd.DataFrame()
    
    with engine.connect() as conn:
        with conn.begin():
            query = text(f"""
                SELECT 
                    a."GAME_ID",
                    
                    -- Team 1 (DET) Columns
                    a."TEAM_ID" AS T1_TEAM_ID,
                    a."TEAM_NAME" AS T1_TEAM_NAME,
                    a."TEAM_ABBREVIATION" AS T1_TEAM_ABBREVIATION,
                    a."TEAM_CITY" AS T1_TEAM_CITY,
                    a."MIN" AS T1_MIN,
                    a."E_OFF_RATING" AS T1_E_OFF_RATING,
                    a."OFF_RATING" AS T1_OFF_RATING,
                    a."E_DEF_RATING" AS T1_E_DEF_RATING,
                    a."DEF_RATING" AS T1_DEF_RATING,
                    a."E_NET_RATING" AS T1_E_NET_RATING,
                    a."NET_RATING" AS T1_NET_RATING,
                    a."AST_PCT" AS T1_AST_PCT,
                    a."AST_TOV" AS T1_AST_TOV,
                    a."AST_RATIO" AS T1_AST_RATIO,
                    a."OREB_PCT" AS T1_OREB_PCT,
                    a."DREB_PCT" AS T1_DREB_PCT,
                    a."REB_PCT" AS T1_REB_PCT,
                    a."E_TM_TOV_PCT" AS T1_E_TM_TOV_PCT,
                    a."TM_TOV_PCT" AS T1_TM_TOV_PCT,
                    a."EFG_PCT" AS T1_EFG_PCT,
                    a."TS_PCT" AS T1_TS_PCT,
                    a."USG_PCT" AS T1_USG_PCT,
                    a."E_USG_PCT" AS T1_E_USG_PCT,
                    a."E_PACE" AS T1_E_PACE,
                    a."PACE" AS T1_PACE,
                    a."PACE_PER40" AS T1_PACE_PER40,
                    a."POSS" AS T1_POSS,
                    a."PIE" AS T1_PIE,
                
                    -- Team 2 (ATL) Columns
                    b."TEAM_ID" AS T2_TEAM_ID,
                    b."TEAM_NAME" AS T2_TEAM_NAME,
                    b."TEAM_ABBREVIATION" AS T2_TEAM_ABBREVIATION,
                    b."TEAM_CITY" AS T2_TEAM_CITY,
                    b."MIN" AS T2_MIN,
                    b."E_OFF_RATING" AS T2_E_OFF_RATING,
                    b."OFF_RATING" AS T2_OFF_RATING,
                    b."E_DEF_RATING" AS T2_E_DEF_RATING,
                    b."DEF_RATING" AS T2_DEF_RATING,
                    b."E_NET_RATING" AS T2_E_NET_RATING,
                    b."NET_RATING" AS T2_NET_RATING,
                    b."AST_PCT" AS T2_AST_PCT,
                    b."AST_TOV" AS T2_AST_TOV,
                    b."AST_RATIO" AS T2_AST_RATIO,
                    b."OREB_PCT" AS T2_OREB_PCT,
                    b."DREB_PCT" AS T2_DREB_PCT,
                    b."REB_PCT" AS T2_REB_PCT,
                    b."E_TM_TOV_PCT" AS T2_E_TM_TOV_PCT,
                    b."TM_TOV_PCT" AS T2_TM_TOV_PCT,
                    b."EFG_PCT" AS T2_EFG_PCT,
                    b."TS_PCT" AS T2_TS_PCT,
                    b."USG_PCT" AS T2_USG_PCT,
                    b."E_USG_PCT" AS T2_E_USG_PCT,
                    b."E_PACE" AS T2_E_PACE,
                    b."PACE" AS T2_PACE,
                    b."PACE_PER40" AS T2_PACE_PER40,
                    b."POSS" AS T2_POSS,
                    b."PIE" AS T2_PIE
                
                FROM "{tableName}" a
                JOIN "{tableName}" b 
                    ON a."GAME_ID" = b."GAME_ID"
                WHERE a."TEAM_ABBREVIATION" = :badAbv 
                  AND b."TEAM_ABBREVIATION" = :goodAbv
              AND a."TEAM_ID" != b."TEAM_ID";
            """)
            query2 = text("""
                select 
                	"TEAM_ID",
                	"TEAM_NAME",
                	"GAME_ID",
                	"GAME_DATE",
                	"TEAM_ABBREVIATION"
                from "2024-25_historic_game_data"
                where "TEAM_NAME" = :team_name
                and "GAME_ID" = :gameId
            """)
            recent_games = pd.read_sql_query(query.format(table_name=f"{yearsList[0]}_team_advanced_game_data"), 
                                            engine, params={'badAbv': badAbv, 'goodAbv': goodAbv})
            gameIds = recent_games['GAME_ID'].tolist()

            #get all the games dates
            for gameId in gameIds:
                #gameId = gameIds[i]
                curr_game = pd.read_sql_query(query2, engine, params={'gameId' : gameId, 'team_name' : goodTeam})
                games_df = pd.concat([games_df, curr_game], ignore_index=True)

            valid_games = []
            checked_valid = []
            #check that the date is within our range 
            for index, row in games_df.iterrows():
                validDate = datetime.strptime(date, "%Y-%m-%d")
                currDate = datetime.strptime(row["GAME_DATE"], "%Y-%m-%d")
                if currDate < validDate:
                    valid_games.append(row["GAME_ID"])

            print(valid_games)

            #check if the number of valid games is less than 3 - in which case we need to go 
            #back one year to find more matchups
            games_df2 = pd.DataFrame()
            if len(valid_games) < 3:

                #create a new tableName, and requery on the previous year 
                #tableName = f"{yearsList[1]}_team_advanced_game_data"
                recent_gamesTwo =  pd.read_sql_query(query.format(table_name=f"{yearsList[1]}_team_advanced_game_data"), 
                                            engine, params={'badAbv': badAbv, 'goodAbv': goodAbv})
                gameIdsTwo = recent_gamesTwo['GAME_ID'].tolist()
                
                #get all the games dates for new table
                for gameId in gameIdsTwo:
                    #gameId = gameIds[i]
                    curr_game = pd.read_sql_query(query2, engine, params={'gameId' : gameId, 'team_name' : goodTeam})
                    games_df2 = pd.concat([games_df2, curr_game], ignore_index=True)
    
                valid_games2 = []
                checked_valid = []
                #check that the date is within our range again
                for index, row in games_df2.iterrows():
                    validDate = datetime.strptime(date, "%Y-%m-%d")
                    currDate = datetime.strptime(row["GAME_DATE"], "%Y-%m-%d")
                    if currDate < validDate:
                        valid_games2.append(row["GAME_ID"])

                #now we have a new valid games table with more games. 
                #first find how many more we need 
                numNeeded = 3 - len(valid_games)
                #add the games we already had
                for i in range(len(valid_games)):
                    checked_valid.append(valid_games[i])
                #add new games 
                i = 1
                for i in range(numNeeded):
                    checked_valid.append(valid_games2[i-1])
                    
            elif len(valid_games) > 3:
                for i in range(3):
                    checked_valid.append(valid_games[i])

            print(checked_valid)
            
                    
    print(len(recent_games))
    return recent_gamesTwo


engine = create_engine('postgresql://chase:yourpassword@localhost:5433/TeamData') 
pd.set_option('display.max_columns', 58)
calculate_historic_matchups(engine, '2025-1-20', 'Atlanta Hawks', 'Detroit Pistons')

ATL
DET


AttributeError: 'TextClause' object has no attribute 'format'

In [None]:
#historic head to head matchups 


In [None]:
#Strength of schedule 
