In [1]:
# Import packages Here:
import math, random, string, time, numpy as np

In [2]:
#  (skip header and use comma delimiter)
# Change path to your specific file location
stats = 'players_stats_by_season_full_details.csv'

# Load the header separately to find column indices
with open(stats, 'r') as f:
    header = f.readline().strip().split(',')

# Identify column indices for necessary stats
# FGM/FGA (Field Goals), 3PM/3PA (3-Pointers), FTM/FTA (Free Throws)
cols = {name: header.index(name) for name in ['Player', 'Season', 'FGM', 'FGA', '3PM', '3PA', 'FTM', 'FTA']}

# Load numeric data for calculation (handling missing values as 0)
data = np.genfromtxt(stats, delimiter=',', skip_header=1, usecols=(cols['FGM'], cols['FGA'], cols['3PM'], cols['3PA'], cols['FTM'], cols['FTA']), filling_values=0)

# Extract individual columns for clarity
fgm, fga = data[:, 0], data[:, 1]
tpm, tpa = data[:, 2], data[:, 3]
ftm, fta = data[:, 4], data[:, 5]

# Vectorized accuracy calculations with NumPy
# Use np.divide to handle division by zero (set resulting NaNs to 0)
fg_acc = np.divide(fgm, fga, out=np.zeros_like(fgm), where=fga!=0)
tp_acc = np.divide(tpm, tpa, out=np.zeros_like(tpm), where=tpa!=0)
ft_acc = np.divide(ftm, fta, out=np.zeros_like(ftm), where=fta!=0)

# Example output: Printing first 5 rows | After testing, later generate a list for 100 players for each calculation.
# Reload names and seasons as strings
metadata = np.genfromtxt(stats, delimiter=',', skip_header=1, usecols=(cols['Player'], cols['Season']), dtype=str)

print(f"{'Player':<20} | {'Season':<10} | {'FG%':<7} | {'3P%':<7} | {'FT%':<7}")
print("-" * 65)
for i in range(5):
    # Using math.round-like formatting via f-strings
    print(f"{metadata[i,0]:<20} | {metadata[i,1]:<10} | {fg_acc[i]:.3f}   | {tp_acc[i]:.3f}   | {ft_acc[i]:.3f}")


Player               | Season     | FG%     | 3P%     | FT%    
-----------------------------------------------------------------
Shaquille O'Neal     | 1999 - 2000 | 0.574   | 0.000   | 0.524
Vince Carter         | 1999 - 2000 | 0.465   | 0.403   | 0.791
Karl Malone          | 1999 - 2000 | 0.509   | 0.250   | 0.797
Allen Iverson        | 1999 - 2000 | 0.421   | 0.341   | 0.713
Gary Payton          | 1999 - 2000 | 0.448   | 0.340   | 0.735


In [4]:
#Calculates for average points per minute (PPM).
# Identify column indices for Points and Minutes
cols.update({name: header.index(name) for name in ['PTS', 'MIN']})

# Load the numeric data for these stats
# Genfromtxt handles missing values by filling with 0
perf_data = np.genfromtxt(stats, delimiter=',', skip_header=1, 
                          usecols=(cols['PTS'], cols['MIN']), filling_values=0)

pts = perf_data[:, 0]
minutes = perf_data[:, 1]

# Calculate Points Per Minute (PPM)
# Use np.divide to safely handle cases where a player has 0 minutes
ppm = np.divide(pts, minutes, out=np.zeros_like(pts), where=minutes!=0)

# Sort to find the Top 100 most efficient scorers (by PPM)
top_20_ppm_indices = np.argsort(ppm)[::-1][:100]

# Display the Results
print(f"{'Rank':<5} | {'Player':<20} | {'Season':<10} | {'PPM'}")
print("-" * 50)
for i in range(100):
    idx = top_20_ppm_indices[i]
    print(f"{i+1:<5} | {metadata[idx,0]:<20} | {metadata[idx,1]:<10} | {ppm[idx]:.3f}")


Rank  | Player               | Season     | PPM
--------------------------------------------------
1     | "Jaren Jackson       | 2019 - 2020 | 1.614
2     | "Jaren Jackson       | 2018 - 2019 | 1.414
3     | "Wendell Carter      | 2018 - 2019 | 1.318
4     | "James Harrison      | 2019 - 2020 | 1.071
5     | Jabari Brown         | 2016 - 2017 | 1.028
6     | Jonathan Gibson      | 2015 - 2016 | 1.026
7     | Erik Borg            | 2019 - 2020 | 1.023
8     | "Robert Carter       | 2016 - 2017 | 1.000
9     | Darius Adams         | 2019 - 2020 | 0.986
10    | James Harden         | 2018 - 2019 | 0.983
11    | Russ Smith           | 2017 - 2018 | 0.977
12    | Errick McCollum      | 2014 - 2015 | 0.974
13    | MarShon Brooks       | 2017 - 2018 | 0.974
14    | Errick McCollum      | 2016 - 2017 | 0.970
15    | Giannis Antetokounmpo | 2019 - 2020 | 0.969
16    | Pierre Jackson       | 2018 - 2019 | 0.967
17    | Donovan Mitchell     | 2019 - 2020 | 0.964
18    | Willie Warren        | 20

In [None]:
#Calculates for overall shooting accuracy of each player in each season (ts_perc).
# 1. Identify indices for necessary columns
cols.update({name: header.index(name) for name in ['PTS', 'FGA', 'FTA']})

# 2. Load the numeric data
# usecols identifies the columns for Points, Field Goal Attempts, and Free Throw Attempts
ts_data = np.genfromtxt(stats, delimiter=',', skip_header=1, 
                        usecols=(cols['PTS'], cols['FGA'], cols['FTA']), filling_values=0)

pts = ts_data[:, 0]
fga = ts_data[:, 1]
fta = ts_data[:, 2]

# 3. Calculate True Shooting Percentage (TS%)
# The 0.44 multiplier is the standard NBA/FIBA coefficient for free throw possessions
denominator = 2 * (fga + 0.44 * fta)
ts_perc = np.divide(pts, denominator, out=np.zeros_like(pts), where=denominator!=0)

# 4. Sort and display the Top 100 Overall Most Accurate Shooters
top_100_ts_indices = np.argsort(ts_perc)[::-1][:100]

print(f"{'Rank':<5} | {'Player':<20} | {'Season':<10} | {'TS%'}")
print("-" * 50)
for i in range(100):
    idx = top_100_ts_indices[i]
    print(f"{i+1:<5} | {metadata[idx,0]:<20} | {metadata[idx,1]:<10} | {ts_perc[idx]:.3f}")


Rank  | Player               | Season     | TS%
--------------------------------------------------
1     | Erik Borg            | 2019 - 2020 | 1.887
2     | Randel-Rainer Lille  | 2019 - 2020 | 1.182
3     | Tomas Pomikalek      | 2015 - 2016 | 1.042
4     | Kohei Takahashi      | 2019 - 2020 | 1.031
5     | Loic Schwartz        | 2016 - 2017 | 1.031
6     | Itay Segev           | 2015 - 2016 | 1.000
7     | Dmytro Gliebov       | 2012 - 2013 | 1.000
8     | Tommi Huolila        | 2016 - 2017 | 1.000
9     | Muhamed Pasalic      | 2016 - 2017 | 0.989
10    | Marcin Malczyk       | 2019 - 2020 | 0.958
11    | Samuel Taiwo         | 2019 - 2020 | 0.935
12    | Giannis Sidiroilias  | 2019 - 2020 | 0.929
13    | Peter Lorant         | 2015 - 2016 | 0.914
14    | Marc Marius Judith   | 2013 - 2014 | 0.901
15    | Mike Miller          | 2014 - 2015 | 0.900
16    | Denis Marconato      | 2010 - 2011 | 0.895
17    | Meyers Leonard       | 2014 - 2015 | 0.891
18    | Davor Kus            | 201

In [6]:
#Calculates for average number of blocks and the average number of steals per game (bpg, spg).

# 1. Update dictionary with new column indices
cols.update({name: header.index(name) for name in ['BLK', 'STL', 'GP']})

# 2. Load the defensive and game-count data
# genfromtxt will fill missing values with 0
defensive_data = np.genfromtxt(stats, delimiter=',', skip_header=1, 
                               usecols=(cols['BLK'], cols['STL'], cols['GP']), 
                               filling_values=0)

blocks_total = defensive_data[:, 0]
steals_total = defensive_data[:, 1]
games_played = defensive_data[:, 2]

# 3. Calculate Averages Per Game (BPG and SPG)
# Safely handle division by zero for players with 0 games played
bpg = np.divide(blocks_total, games_played, out=np.zeros_like(blocks_total), where=games_played!=0)
spg = np.divide(steals_total, games_played, out=np.zeros_like(steals_total), where=games_played!=0)

# 4. Display the Top 100 Leaders for both categories
print(f"{'Player':<20} | {'Season':<10} | {'BPG':<6} | {'SPG':<6}")
print("-" * 55)
for i in range(100):
    # This example sorts by BPG; swap indices as needed for SPG
    idx = np.argsort(bpg)[::-1][i]
    print(f"{metadata[idx,0]:<20} | {metadata[idx,1]:<10} | {bpg[idx]:.2f}   | {spg[idx]:.2f}")


Player               | Season     | BPG    | SPG   
-------------------------------------------------------
Tim Duncan           | 2001 - 2002 | 4.33   | 0.67
Volodymyr Koniev     | 2018 - 2019 | 4.00   | 0.00
Myles Turner         | 2019 - 2020 | 4.00   | 0.50
Justin Williams      | 2015 - 2016 | 3.92   | 0.55
Hamady Ndiaye        | 2012 - 2013 | 3.90   | 0.58
Mouhamadou N'doye    | 2014 - 2015 | 3.80   | 0.60
Ekpe Udoh            | 2019 - 2020 | 3.75   | 1.88
Alonzo Mourning      | 1999 - 2000 | 3.72   | 0.51
Hassan Whiteside     | 2015 - 2016 | 3.68   | 0.60
Serge Ibaka          | 2011 - 2012 | 3.65   | 0.50
Theo Ratliff         | 2003 - 2004 | 3.61   | 0.64
Robert Upshaw        | 2016 - 2017 | 3.61   | 0.44
Marcus Camby         | 2007 - 2008 | 3.61   | 1.08
Anthony Stover       | 2015 - 2016 | 3.51   | 0.36
Dwight Howard        | 2009 - 2010 | 3.50   | 0.79
Ben Wallace          | 2001 - 2002 | 3.46   | 1.73
Dwight Howard        | 2007 - 2008 | 3.40   | 0.80
Andre Roberson       | 20

In [None]:
# This function to generate the Top 100 based on FG%
top_100_FG = np.argsort(fg_acc)[::-1][:100]

# Use these indices to pull the corresponding top data
top_players = metadata[top_100_FG, 0]
top_seasons = metadata[top_100_FG, 1]
top_fg_acc = fg_acc[top_100_FG]
top_tp_acc = tp_acc[top_100_FG]
top_ft_acc = ft_acc[top_100_FG]

# Print the top 100 Leaderboard
print(f"{'Rank':<5} | {'Player':<20} | {'Season':<10} | {'FG%'}")
print("-" * 50)
for i in range(100):
    print(f"{i+1:<5} | {top_players[i]:<20} | {top_seasons[i]:<10} | {top_fg_acc[i]:.3f}")

Rank  | Player               | Season     | FG%
--------------------------------------------------
1     | "Bruce Brown         | 2018 - 2019 | 11.594
2     | "Gary Trent          | 2019 - 2020 | 9.556
3     | "Gary Bell           | 2017 - 2018 | 9.233
4     | "Juan Davis          | 2019 - 2020 | 8.969
5     | "Ruben Boykin        | 2012 - 2013 | 8.950
6     | "Gary Bell           | 2018 - 2019 | 8.762
7     | "Kelly Oubre         | 2016 - 2017 | 8.630
8     | "Dexter Williams     | 2018 - 2019 | 8.457
9     | "Mike Holton         | 2015 - 2016 | 8.441
10    | "Doug Herring        | 2016 - 2017 | 8.281
11    | "Doug Herring        | 2018 - 2019 | 8.255
12    | "Bruce Brown         | 2019 - 2020 | 8.253
13    | "Kelly Oubre         | 2017 - 2018 | 8.233
14    | "Ruben Boykin        | 2011 - 2012 | 7.917
15    | "Lenzelle Smith      | 2019 - 2020 | 7.908
16    | "Alan Wiggins        | 2015 - 2016 | 7.598
17    | "Larry Nance         | 2016 - 2017 | 7.588
18    | "Lamond Murray       | 20

In [None]:
# 1. Calculate the top 100 indices for 3P%
top_100_3p = np.argsort(tp_acc)[::-1][:100]

# Print the top 100 Leaderboard
print(f"{'Rank':<5} | {'Player':<20} | {'Season':<10} | {'3P%'}")
print("-" * 50)

for i in range(100):
    # Get the specific index for the player at this rank
    idx = top_100_3p[i] 
    
    # Use 'idx' to pull from the master arrays, NOT the 'top_' variables from before
    player_name = metadata[idx, 0]
    season = metadata[idx, 1]
    accuracy = tp_acc[idx]
    
    print(f"{i+1:<5} | {player_name:<20} | {season:<10} | {accuracy:.3f}")


Rank  | Player               | Season     | 3P%
--------------------------------------------------
1     | "Larry Nance         | 2017 - 2018 | 206.500
2     | "Wendell Carter      | 2018 - 2019 | 61.833
3     | "James Harrison      | 2019 - 2020 | 52.000
4     | "Cedric Council      | 2019 - 2020 | 44.000
5     | "Dwayne Brown        | 2019 - 2020 | 36.500
6     | "Larry Nance         | 2016 - 2017 | 36.100
7     | "Alan Wiggins        | 2007 - 2008 | 35.500
8     | "Dexter Williams     | 2018 - 2019 | 22.250
9     | "Lamond Murray       | 2018 - 2019 | 20.667
10    | "Lamond Murray       | 2017 - 2018 | 18.250
11    | "Alan Wiggins        | 2018 - 2019 | 16.526
12    | "Dexter Williams     | 2019 - 2020 | 15.727
13    | "Anthony Gaines      | 2018 - 2019 | 15.516
14    | "Reggie Upshaw       | 2018 - 2019 | 15.222
15    | "Larry Nance         | 2018 - 2019 | 14.515
16    | "Alan Wiggins        | 2015 - 2016 | 14.429
17    | "Bonzie Colson       | 2019 - 2020 | 14.143
18    | "Bruce B

In [13]:
# This function to generate the Top 100 based on FT%.
top_100_FT = np.argsort(ft_acc)[::-1][:100]

# Use these indices to pull the corresponding top data
top_players = metadata[top_100_FT, 0]
top_seasons = metadata[top_100_FT, 1]
top_fg_acc = fg_acc[top_100_FT]
top_tp_acc = tp_acc[top_100_FT]
top_ft_acc = ft_acc[top_100_FT]

# Print the top 100 Leaderboard
print(f"{'Rank':<5} | {'Player':<20} | {'Season':<10} | {'FT%'}")
print("-" * 50)
for i in range(100):
    print(f"{i+1:<5} | {top_players[i]:<20} | {top_seasons[i]:<10} | {top_ft_acc[i]:.3f}")

Rank  | Player               | Season     | FT%
--------------------------------------------------
1     | "Earvin Morris       | 2019 - 2020 | 8.500
2     | "Gary Trent          | 2019 - 2020 | 7.243
3     | "Mike Holton         | 2015 - 2016 | 6.607
4     | "Kenny Lawson        | 2014 - 2015 | 6.000
5     | "Reggie Upshaw       | 2019 - 2020 | 5.900
6     | "Michael Evans       | 2011 - 2012 | 5.688
7     | "Kwan Cheatham       | 2017 - 2018 | 5.073
8     | "Reggie Upshaw       | 2019 - 2020 | 4.818
9     | "Earvin Morris       | 2017 - 2018 | 4.333
10    | "Kelly Oubre         | 2016 - 2017 | 4.286
11    | "Gary Trent          | 2019 - 2020 | 4.000
12    | "Demarkus Stuckey    | 2019 - 2020 | 3.880
13    | "Reggie Upshaw       | 2018 - 2019 | 3.778
14    | "Gary Bell           | 2018 - 2019 | 3.769
15    | "Anthony Beane       | 2019 - 2020 | 3.609
16    | "Gary Bell           | 2017 - 2018 | 3.444
17    | "Michael Porter      | 2019 - 2020 | 3.423
18    | "Anthony Beane       | 201