# NBA PrizePicks Analysis

### Overview
This notebook analyzes NBA PrizePicks data, providing insights and trends to optimize predictions and strategies.

---

**Author:** Elan Hashem
**Date:** 12/15/2024 
**Purpose:** To explore, visualize, and analyze NBA PrizePicks data for actionable insights.

In [43]:
# import pandas & numpy library
import pandas as pd
import numpy as np

# Import seaborn and apply its plotting styles
import seaborn as sns
sns.set(font_scale=2, style="white")

# import matplotlib
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.style as style

In [44]:
url = 'https://raw.githubusercontent.com/NocturneBear/NBA-Data-2010-2024/refs/heads/main/regular_season_box_scores_2010_2024_part_2.csv'
df = pd.read_csv(url)
print(df.columns)
df

Index(['season_year', 'game_date', 'gameId', 'matchup', 'teamId', 'teamCity',
       'teamName', 'teamTricode', 'teamSlug', 'personId', 'personName',
       'position', 'comment', 'jerseyNum', 'minutes', 'fieldGoalsMade',
       'fieldGoalsAttempted', 'fieldGoalsPercentage', 'threePointersMade',
       'threePointersAttempted', 'threePointersPercentage', 'freeThrowsMade',
       'freeThrowsAttempted', 'freeThrowsPercentage', 'reboundsOffensive',
       'reboundsDefensive', 'reboundsTotal', 'assists', 'steals', 'blocks',
       'turnovers', 'foulsPersonal', 'points', 'plusMinusPoints'],
      dtype='object')


Unnamed: 0,season_year,game_date,gameId,matchup,teamId,teamCity,teamName,teamTricode,teamSlug,personId,...,reboundsOffensive,reboundsDefensive,reboundsTotal,assists,steals,blocks,turnovers,foulsPersonal,points,plusMinusPoints
0,2010-11,2010-12-11,21000347,HOU vs. CLE,1610612745,Houston,Rockets,HOU,rockets,2449,...,6,8,14,1,1,0,4,3,14,10
1,2010-11,2010-11-19,21000173,HOU @ TOR,1610612745,Houston,Rockets,HOU,rockets,2449,...,1,4,5,3,1,1,2,3,19,0
2,2011-12,2012-03-18,21100667,HOU @ PHX,1610612745,Houston,Rockets,HOU,rockets,2449,...,3,4,7,3,0,0,0,2,18,-6
3,2010-11,2010-12-22,21000429,HOU @ LAC,1610612745,Houston,Rockets,HOU,rockets,2449,...,1,2,3,2,2,2,4,5,22,-3
4,2010-11,2011-02-22,21000840,HOU @ DET,1610612745,Houston,Rockets,HOU,rockets,2449,...,3,6,9,1,1,2,3,3,8,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141488,2011-12,2012-01-25,21100263,OKC vs. NOH,1610612760,Oklahoma City,Thunder,OKC,thunder,2555,...,1,2,3,2,0,2,2,3,7,11
141489,2013-14,2014-01-19,21300603,OKC vs. SAC,1610612760,Oklahoma City,Thunder,OKC,thunder,2555,...,3,4,7,2,0,0,1,3,10,3
141490,2015-16,2016-01-24,21500664,OKC @ BKN,1610612760,Oklahoma City,Thunder,OKC,thunder,2555,...,3,1,4,1,0,1,0,4,3,-6
141491,2013-14,2014-03-17,21300999,OKC @ CHI,1610612760,Oklahoma City,Thunder,OKC,thunder,2555,...,1,1,2,1,2,0,1,4,2,-7


In [45]:
type(df['season_year'][0])

str

In [46]:
# Turning the season column into an integer
df['year'] = df['season_year'].apply(lambda x: int(x[:2]+x[5:]))

In [47]:
#Filtering out all the seasons before 2023
df['game_date'] = pd.to_datetime(df['game_date'])
df  = df[df['game_date'].dt.year >= 2023]
df

Unnamed: 0,season_year,game_date,gameId,matchup,teamId,teamCity,teamName,teamTricode,teamSlug,personId,...,reboundsDefensive,reboundsTotal,assists,steals,blocks,turnovers,foulsPersonal,points,plusMinusPoints,year
1965,2023-24,2024-03-12,22300942,HOU @ SAS,1610612745,Houston,Rockets,HOU,rockets,201145,...,1,1,0,1,0,0,0,8,-5,2024
1967,2023-24,2024-04-04,22301113,HOU vs. GSW,1610612745,Houston,Rockets,HOU,rockets,201145,...,0,1,1,1,0,0,0,8,-4,2024
1968,2023-24,2023-12-22,22300385,HOU vs. DAL,1610612745,Houston,Rockets,HOU,rockets,201145,...,1,1,0,0,2,0,0,2,5,2024
1969,2023-24,2024-04-07,22301131,HOU @ DAL,1610612745,Houston,Rockets,HOU,rockets,201145,...,0,1,1,2,0,0,0,7,-20,2024
1970,2023-24,2023-12-27,22300418,HOU vs. PHX,1610612745,Houston,Rockets,HOU,rockets,201145,...,1,1,1,0,1,1,2,2,-9,2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140862,2023-24,2023-12-29,22300429,NYK @ ORL,1610612752,New York,Knicks,NYK,knicks,1631376,...,0,0,0,0,0,0,0,0,0,2024
140863,2023-24,2023-12-25,22300401,NYK vs. MIL,1610612752,New York,Knicks,NYK,knicks,1631376,...,0,0,0,0,0,0,0,0,0,2024
140864,2023-24,2023-12-27,22300419,NYK @ OKC,1610612752,New York,Knicks,NYK,knicks,1631376,...,0,0,0,0,0,0,0,0,0,2024
140865,2023-24,2023-12-23,22300388,NYK vs. MIL,1610612752,New York,Knicks,NYK,knicks,1631376,...,0,0,0,0,0,0,0,0,-3,2024


## Getting data set number 2 of the current season

In [48]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("eduardopalmieri/nba-player-stats-season-2425")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\elan0\.cache\kagglehub\datasets\eduardopalmieri\nba-player-stats-season-2425\versions\32


In [49]:
import os

# Check the contents of the folder
print("Contents of the dataset folder:")
print(os.listdir(path))

Contents of the dataset folder:
['database_24_25.csv']


In [50]:
csv_file = os.path.join(path, "database_24_25.csv")
current_season = pd.read_csv(csv_file)
print(current_season.columns)
current_season

Index(['Player', 'Tm', 'Opp', 'Res', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA',
       '3P%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', 'PTS', 'GmSc', 'Data'],
      dtype='object')


Unnamed: 0,Player,Tm,Opp,Res,MP,FG,FGA,FG%,3P,3PA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,Data
0,Jayson Tatum,BOS,NYK,W,30.30,14,18,0.778,8,11,...,4,4,10,1,1,1,1,37,38.1,2024-10-22
1,Anthony Davis,LAL,MIN,W,37.58,11,23,0.478,1,3,...,13,16,4,1,3,1,1,36,34.0,2024-10-22
2,Derrick White,BOS,NYK,W,26.63,8,13,0.615,6,10,...,3,3,4,1,0,0,1,24,22.4,2024-10-22
3,Jrue Holiday,BOS,NYK,W,30.52,7,9,0.778,4,6,...,2,4,4,1,0,0,2,18,19.5,2024-10-22
4,Miles McBride,NYK,BOS,L,25.85,8,10,0.800,4,5,...,0,0,2,0,0,1,1,22,17.8,2024-10-22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11137,Eric Gordon,PHI,BRK,W,6.95,0,1,0.000,0,1,...,0,0,0,1,0,1,0,0,-0.7,2025-01-04
11138,Dalen Terry,CHI,NYK,W,11.87,1,3,0.333,0,0,...,1,1,1,0,0,1,4,2,-1.3,2025-01-04
11139,Micah Potter,UTA,MIA,W,6.65,0,3,0.000,0,3,...,0,0,1,0,0,0,0,0,-1.4,2025-01-04
11140,Bones Hyland,LAC,ATL,W,5.10,0,2,0.000,0,1,...,0,1,0,0,0,1,0,0,-1.7,2025-01-04


# Making a data frame of all players with height and position 

In [51]:
players = pd.read_csv("C:\\Users\\elan0\\Downloads\\Drafted_Players_Info.csv")
players = players.drop(columns=["School/Club","Status","Country","Age"])
players

Unnamed: 0,Name,Position,Height,Weight
0,Melvin Ajinca,F,6-8,218 lbs
1,Adem Bona,C,6-8,243 lbs
2,Matas Buzelis,F,6-9,197 lbs
3,Carlton Carrington,G,6-4,195 lbs
4,Devin Carter,G,6-2,193 lbs
5,Stephon Castle,G,6-6,210 lbs
6,Ulrich Chomche,F,6-10,232 lbs
7,Cam Christie,G,6-5,190 lbs
8,Donovan Clingan,C,7-2,282 lbs
9,Isaiah Collier,G,6-3,205 lbs


In [52]:
for index, row in current_season.iterrows():
    if row["Player"] not in players["Name"].values:
        players.loc[len(players)] = [row["Player"],df.loc[df["personName"] == row["Player"]]["position"],"NaN","NaN"]


In [53]:
players

Unnamed: 0,Name,Position,Height,Weight
0,Melvin Ajinca,F,6-8,218 lbs
1,Adem Bona,C,6-8,243 lbs
2,Matas Buzelis,F,6-9,197 lbs
3,Carlton Carrington,G,6-4,195 lbs
4,Devin Carter,G,6-2,193 lbs
...,...,...,...,...
549,Nick Smith Jr.,"Series([], Name: position, dtype: object)",,
550,Wendell Moore Jr.,112125 NaN 112126 NaN 112127 NaN 1121...,,
551,Kawhi Leonard,35948 F 35950 F 35956 F 35957 F 35...,,
552,TyTy Washington Jr.,12710 NaN 12711 NaN 12712 NaN 12713 ...,,


In [54]:
sf = pd.read_csv("C:\\Users\\elan0\\Downloads\\small_forwards_full.csv")
sf

Unnamed: 0,NAME,TEAM,COLLEGE
0,"Anderson, Kyle",Golden State Warriors,UCLA
1,"Anunoby, OG",New York Knicks,Indiana
2,"Avdija, Deni",Portland Trail Blazers,
3,"Baldwin Jr., Patrick",Washington Wizards,Milwaukee
4,"Barlow, Dominick",Atlanta Hawks,
...,...,...,...
144,"Williams, Kenrich",Oklahoma City Thunder,TCU
145,"Williams, Ziaire",Brooklyn Nets,Stanford
146,"Wilson, Jalen",Brooklyn Nets,Kansas
147,"Wood, Christian",Los Angeles Lakers,UNLV


In [55]:
sf["NAME"] = sf["NAME"].apply(lambda x: x[x.find(',')+1:]+" "+x[:x.find(',')])
sf

Unnamed: 0,NAME,TEAM,COLLEGE
0,Kyle Anderson,Golden State Warriors,UCLA
1,OG Anunoby,New York Knicks,Indiana
2,Deni Avdija,Portland Trail Blazers,
3,Patrick Baldwin Jr.,Washington Wizards,Milwaukee
4,Dominick Barlow,Atlanta Hawks,
...,...,...,...
144,Kenrich Williams,Oklahoma City Thunder,TCU
145,Ziaire Williams,Brooklyn Nets,Stanford
146,Jalen Wilson,Brooklyn Nets,Kansas
147,Christian Wood,Los Angeles Lakers,UNLV


In [56]:
pg = pd.read_csv("C:\\Users\\elan0\\Downloads\\point_guards.csv")
pg

Unnamed: 0,NAME,TEAM,COLLEGE
0,"Agbaji, Ochai",Toronto Raptors,Kansas
1,"Alexander, Trey",Denver Nuggets,Creighton
2,"Alvarado, Jose",New Orleans Pelicans,Georgia Tech
3,"Anthony, Cole",Orlando Magic,North Carolina
4,"Ball, LaMelo",Charlotte Hornets,
...,...,...,...
155,"Williams, Brandon",Dallas Mavericks,Arizona
156,"Williams, Nate",Houston Rockets,Buffalo
157,"Williams Jr., Vince",Memphis Grizzlies,VCU
158,"Wong, Isaiah",Charlotte Hornets,Miami


In [57]:
pg["NAME"] = pg["NAME"].apply(lambda x: x[x.find(',')+1:]+" "+x[:x.find(',')])
pg

Unnamed: 0,NAME,TEAM,COLLEGE
0,Ochai Agbaji,Toronto Raptors,Kansas
1,Trey Alexander,Denver Nuggets,Creighton
2,Jose Alvarado,New Orleans Pelicans,Georgia Tech
3,Cole Anthony,Orlando Magic,North Carolina
4,LaMelo Ball,Charlotte Hornets,
...,...,...,...
155,Brandon Williams,Dallas Mavericks,Arizona
156,Nate Williams,Houston Rockets,Buffalo
157,Vince Williams Jr.,Memphis Grizzlies,VCU
158,Isaiah Wong,Charlotte Hornets,Miami


In [58]:
sg = pd.read_csv("C:\\Users\\elan0\\Downloads\\shooting_guards.csv")
sg

Unnamed: 0,NAME,TEAM,COLLEGE
0,"Agbaji, Ochai",Toronto Raptors,Kansas
1,"Alexander, Trey",Denver Nuggets,Creighton
2,"Alexander-Walker, Nickeil",Minnesota Timberwolves,Virginia Tech
3,"Allen, Grayson",Phoenix Suns,Duke
4,"Bane, Desmond",Memphis Grizzlies,TCU
...,...,...,...
181,"Williams, Brandon",Dallas Mavericks,Arizona
182,"Williams, Nate",Houston Rockets,Buffalo
183,"Williams Jr., Vince",Memphis Grizzlies,VCU
184,"Wong, Isaiah",Charlotte Hornets,Miami


In [59]:
sg["NAME"] = sg["NAME"].apply(lambda x: x[x.find(',')+1:]+" "+x[:x.find(',')])
sg

Unnamed: 0,NAME,TEAM,COLLEGE
0,Ochai Agbaji,Toronto Raptors,Kansas
1,Trey Alexander,Denver Nuggets,Creighton
2,Nickeil Alexander-Walker,Minnesota Timberwolves,Virginia Tech
3,Grayson Allen,Phoenix Suns,Duke
4,Desmond Bane,Memphis Grizzlies,TCU
...,...,...,...
181,Brandon Williams,Dallas Mavericks,Arizona
182,Nate Williams,Houston Rockets,Buffalo
183,Vince Williams Jr.,Memphis Grizzlies,VCU
184,Isaiah Wong,Charlotte Hornets,Miami


In [60]:
pf = pd.read_csv("C:\\Users\\elan0\\Downloads\\power_forward.csv")
pf

Unnamed: 0,NAME,TEAM,COLLEGE
0,"Achiuwa, Precious",New York Knicks,Memphis
1,"Aldama, Santi",Memphis Grizzlies,Loyola (MD)
2,"Antetokounmpo, Giannis",Milwaukee Bucks,
3,"Bagley III, Marvin",Washington Wizards,Duke
4,"Baldwin Jr., Patrick",Washington Wizards,Milwaukee
...,...,...,...
152,"Williams, Ziaire",Brooklyn Nets,Stanford
153,"Williamson, Zion",New Orleans Pelicans,Duke
154,"Wilson, Jalen",Brooklyn Nets,Kansas
155,"Wood, Christian",Los Angeles Lakers,UNLV


In [61]:
pf["NAME"] = pf["NAME"].apply(lambda x: x[x.find(',')+1:]+" "+x[:x.find(',')])
pf

Unnamed: 0,NAME,TEAM,COLLEGE
0,Precious Achiuwa,New York Knicks,Memphis
1,Santi Aldama,Memphis Grizzlies,Loyola (MD)
2,Giannis Antetokounmpo,Milwaukee Bucks,
3,Marvin Bagley III,Washington Wizards,Duke
4,Patrick Baldwin Jr.,Washington Wizards,Milwaukee
...,...,...,...
152,Ziaire Williams,Brooklyn Nets,Stanford
153,Zion Williamson,New Orleans Pelicans,Duke
154,Jalen Wilson,Brooklyn Nets,Kansas
155,Christian Wood,Los Angeles Lakers,UNLV


In [62]:
c = pd.read_csv("C:\\Users\\elan0\\Downloads\\centers.csv")
c

Unnamed: 0,NAME,TEAM,COLLEGE
0,"Adams, Steven",Houston Rockets,Pittsburgh
1,"Adebayo, Bam",Miami Heat,Kentucky
2,"Allen, Jarrett",Cleveland Cavaliers,Texas
3,"Ayton, Deandre",Portland Trail Blazers,Arizona
4,"Bamba, Mo",LA Clippers,Texas
...,...,...,...
75,"Williams, Mark",Charlotte Hornets,Duke
76,"Williams III, Robert",Portland Trail Blazers,Texas A&M
77,"Wiseman, James",Indiana Pacers,Memphis
78,"Zeller, Cody",Atlanta Hawks,Indiana


In [63]:
c["NAME"] = c["NAME"].apply(lambda x: x[x.find(',')+1:]+" "+x[:x.find(',')])
c

Unnamed: 0,NAME,TEAM,COLLEGE
0,Steven Adams,Houston Rockets,Pittsburgh
1,Bam Adebayo,Miami Heat,Kentucky
2,Jarrett Allen,Cleveland Cavaliers,Texas
3,Deandre Ayton,Portland Trail Blazers,Arizona
4,Mo Bamba,LA Clippers,Texas
...,...,...,...
75,Mark Williams,Charlotte Hornets,Duke
76,Robert Williams III,Portland Trail Blazers,Texas A&M
77,James Wiseman,Indiana Pacers,Memphis
78,Cody Zeller,Atlanta Hawks,Indiana


In [64]:
players.rename(columns={"Position": "General Position"}, inplace=True)
players

Unnamed: 0,Name,General Position,Height,Weight
0,Melvin Ajinca,F,6-8,218 lbs
1,Adem Bona,C,6-8,243 lbs
2,Matas Buzelis,F,6-9,197 lbs
3,Carlton Carrington,G,6-4,195 lbs
4,Devin Carter,G,6-2,193 lbs
...,...,...,...,...
549,Nick Smith Jr.,"Series([], Name: position, dtype: object)",,
550,Wendell Moore Jr.,112125 NaN 112126 NaN 112127 NaN 1121...,,
551,Kawhi Leonard,35948 F 35950 F 35956 F 35957 F 35...,,
552,TyTy Washington Jr.,12710 NaN 12711 NaN 12712 NaN 12713 ...,,


Messing around with how to access specific players in the data frames

In [65]:
type(players["General Position"].iloc[0])

str

In [66]:
c["NAME"] = c["NAME"].astype(str)
type(c.query('NAME == "Steven Adams"'))
sg["NAME"].str.contains("Nick Smith Jr.").any()

True

In [67]:
print(c["NAME"].str.contains("Steven Adms").any())
row = c[c["NAME"].str.contains("Steven Adams")]
row

False


Unnamed: 0,NAME,TEAM,COLLEGE
0,Steven Adams,Houston Rockets,Pittsburgh


In [68]:
players["Position"] = ""
players["Position"] = players["Position"].apply(lambda x: [])
players["General Position"].str.contains("F-C").any()
players

Unnamed: 0,Name,General Position,Height,Weight,Position
0,Melvin Ajinca,F,6-8,218 lbs,[]
1,Adem Bona,C,6-8,243 lbs,[]
2,Matas Buzelis,F,6-9,197 lbs,[]
3,Carlton Carrington,G,6-4,195 lbs,[]
4,Devin Carter,G,6-2,193 lbs,[]
...,...,...,...,...,...
549,Nick Smith Jr.,"Series([], Name: position, dtype: object)",,,[]
550,Wendell Moore Jr.,112125 NaN 112126 NaN 112127 NaN 1121...,,,[]
551,Kawhi Leonard,35948 F 35950 F 35956 F 35957 F 35...,,,[]
552,TyTy Washington Jr.,12710 NaN 12711 NaN 12712 NaN 12713 ...,,,[]


Copying guards into big data frame 

In [69]:
for index, row in players.iterrows():
    player = row["Name"]
    if pg["NAME"].str.contains(player).any():
        row["Position"].append("PG")
    if sg["NAME"].str.contains(player).any():
        row["Position"].append("SG")
    if sf["NAME"].str.contains(player).any():
        row["Position"].append("SF")
    if pf["NAME"].str.contains(player).any():
        row["Position"].append("PF")
    if c["NAME"].str.contains(player).any():
        row["Position"].append("C")

In [70]:
players

Unnamed: 0,Name,General Position,Height,Weight,Position
0,Melvin Ajinca,F,6-8,218 lbs,[]
1,Adem Bona,C,6-8,243 lbs,[C]
2,Matas Buzelis,F,6-9,197 lbs,"[SF, PF]"
3,Carlton Carrington,G,6-4,195 lbs,"[PG, SG]"
4,Devin Carter,G,6-2,193 lbs,[]
...,...,...,...,...,...
549,Nick Smith Jr.,"Series([], Name: position, dtype: object)",,,"[PG, SG]"
550,Wendell Moore Jr.,112125 NaN 112126 NaN 112127 NaN 1121...,,,"[PG, SG]"
551,Kawhi Leonard,35948 F 35950 F 35956 F 35957 F 35...,,,[SF]
552,TyTy Washington Jr.,12710 NaN 12711 NaN 12712 NaN 12713 ...,,,"[PG, SG]"


In [71]:
players = players.drop(columns=["General Position","Weight","Height"])


In [72]:
players.head(200)

Unnamed: 0,Name,Position
0,Melvin Ajinca,[]
1,Adem Bona,[C]
2,Matas Buzelis,"[SF, PF]"
3,Carlton Carrington,"[PG, SG]"
4,Devin Carter,[]
...,...,...
195,Kyle Anderson,[SF]
196,Aaron Nesmith,[SF]
197,Kris Dunn,"[PG, SG]"
198,Jalen Johnson,[SF]


In [73]:
from IPython.display import FileLink


In [74]:
players.to_csv("positionData.csv", index=False)
FileLink("positionData.csv")

In [95]:
players[players["Name"].str.contains("Nikola Djurisic")]

Unnamed: 0,Name,Position
12,Nikola Djurisic,[]
