In [7]:
import utils
import csv

# The following parses the data from "NCAA_Statistics.csv" into a list of lists and a header
# Uses my utils.py from the rest of the course

full_header, full_table = utils.load_from_file("input_data/NCAA_Statistics.csv")
full_table = utils.convert_to_numeric(full_table)

# Let's get some attributes and create a new table. We'll make good use of my transpose method.
parsed_table = []
parsed_header = []

# Some preliminary columns...
team_col = utils.get_column(full_table, full_header, "Team")
parsed_table.append(team_col)
parsed_header.append("Team")

# ATTRIBUTE 1: Scoring Margin
# Expressed as a ratio of tPPG to oPPG

# Grabbing the important columns...
ppg_col = utils.get_column(full_table, full_header, "PTS")
oppg_col = utils.get_column(full_table, full_header, "OPP PTS")

# Parsing the stat...
scoring_margin_col = []
for i in range(len(ppg_col)):
    scoring_margin_col.append(ppg_col[i] / oppg_col[i])

# Adding...
parsed_table.append(scoring_margin_col)
parsed_header.append("Scoring Margin")

# ATTRIBUTE 2: Effective Field Goal Percentage
# Counts 3-Pointers as worth 1.5 times 2-Pointers

# Grabbing the important columns...
# We can be a bit smart here, and convert the formula
# to (FGM + 3PM/2) / FGA
fg_made_col = utils.get_column(full_table, full_header, "FGM")
tpfg_made_col = utils.get_column(full_table, full_header, "3FG")
fg_att_col = utils.get_column(full_table, full_header, "FGA")

# Parsing the stat...
efgp_col = []
for i in range(len(fg_made_col)):
    efgp_col.append((fg_made_col[i] + (0.5*tpfg_made_col[i])) / fg_att_col[i])

# Adding...
parsed_table.append(efgp_col)
parsed_header.append("eFG%")

# ATTRIBUTE 3: Steals + Blocks per game
# Counts the number of total steals and blocks per game

# Grabbing the important columns...
stl_col = utils.get_column(full_table, full_header, "ST")
blk_col = utils.get_column(full_table, full_header, "BLKS")
gms_col = utils.get_column(full_table, full_header, "GM")

# Parsing the stat...
spbpg_col = []
for i in range(len(stl_col)):
    spbpg_col.append((stl_col[i] + blk_col[i]) / gms_col[i])
    
# Adding...
parsed_table.append(spbpg_col)
parsed_header.append("SPG+BPG")

# Attribute 4: Rebound Margin
# Similar to Scoring Margin in calculation

# Grabbing the important columns...
reb_col = utils.get_column(full_table, full_header, "REB")
opp_reb_col = utils.get_column(full_table, full_header, "OPP REB")

# Parsing the stat...
reb_margin_col = []
for i in range(len(stl_col)):
    reb_margin_col.append(reb_col[i] / opp_reb_col[i])
    
# Adding...
parsed_table.append(reb_margin_col)
parsed_header.append("Rebound Margin")

# Classification: Win Percentage
# We're going to give the actual win percentage here, and parse that later

# Grabbing the important columns...
w_col = utils.get_column(full_table, full_header, "W")
l_col = utils.get_column(full_table, full_header, "L")

# Parsing the stat...
wp_col = []
for i in range(len(w_col)):
    wp_col.append(w_col[i] / l_col[i])
    
parsed_table.append(wp_col)
parsed_header.append("Win Percentage")

# Now we can save this to a csv file
# First we gotta transpose the table
parsed_table = utils.transpose(parsed_table)
utils.save_to_file(parsed_header, parsed_table, "input_data/NCAA_Statistics_Parsed.csv")