In [82]:
import config
from numpy import ones,vstack
from numpy.linalg import lstsq
import pandas as pd
import numpy as np
np.warnings.filterwarnings('ignore')


In [83]:
def calculatingFormula(x, x1):
    points = [(x,config.MAX_RATING),(x1,config.MIN_RATING)]  
    x_coords, y_coords = zip(*points)
    A = vstack([x_coords,ones(len(x_coords))]).T
    m, c = lstsq(A, y_coords)[0]
    return [m, c]

def pointSlopeValue(x, m, b):
    y = (m)*(x) + (b)
    return y

def statPerGame(columnHeader, currentCol):
    # Get per game metric
    tempArray = []
    for index, row in df.iterrows():
        tempArray.append(row[currentCol] / row.G)

    # Add per game metric to data frame
    df[columnHeader] = tempArray

    # Calculate rating for stat
    statRating(columnHeader)

def statRating(columnHeader):
    # Sort data for given stat
    sortedFor = df.sort_values(by=[columnHeader], ascending=False)

    # Calculate slope equation values
    formulaVals = calculatingFormula(sortedFor[columnHeader].iloc[0], sortedFor[columnHeader].iloc[len(df.index) - 1])

    # Get rating for each player using equation
    rating = []
    for index, row in df.iterrows():
        rating.append(int(pointSlopeValue(row[columnHeader], formulaVals[0], formulaVals[1])))

    # Add ratings to data frame
    df[((columnHeader + 'Rating'))] = rating
    
def scaledRating(columnHeader):
    # Sort data for given stat
    sortedFor = df.sort_values(by=[columnHeader], ascending=False)

    # Calculate slope equation values
    formulaVals = calculatingFormula(sortedFor[columnHeader].iloc[0], sortedFor[columnHeader].iloc[len(df.index) - 1])

    # Get rating for each player using equation
    rating = []
    for index, row in df.iterrows():
        rating.append(int(pointSlopeValue(row[columnHeader], formulaVals[0], formulaVals[1])))

    # Add ratings to data frame
    df[(("scaled" + columnHeader))] = rating

    

In [84]:
def calculateForYears(df, fileName):

    # Setting Stats and Per Games
    perGameHeaders = ["DRBPG", "DRB", "BPG", "BLK", "SPG", "STL", "PPG", "PTS", "APG", "AST", "ORBPG", "ORB"]
    headers = ["STLpc", "BLKpc", "DWS", "DBPM", "ASTpc", "OWS", "TSpc", "PER", "OBPM", "PTS", "VORP", "USGpc", "TPAr"]

    for x in range(0, len(perGameHeaders), 2):
        statPerGame(perGameHeaders[x], perGameHeaders[x + 1])

    for x in headers:
        statRating(x)

    # Get Defensive Ratings
    defensiveRating = []
    for index, row in df.iterrows():
        defensiveRate = int(((row.BPGRating * config.defense["blkWeight"]) + (row.SPGRating * config.defense["stlWeight"]) + (row.DWSRating * config.defense["dwsWeight"]) + (row.DBPMRating * config.defense["dbpmWeight"]) + (row.STLpcRating * config.defense["stlWeight"]) + (row.BLKpcRating * config.defense["blkpcWight"]) + (row.DRBPGRating * config.defense["drbWeight"])) / defSum)
        defensiveRating.append(defensiveRate)

    df['defensiveRating'] = defensiveRating

    # Get Offensive Ratings
    offensiveRating = []
    for index, row in df.iterrows():
        offensiveRate = int(((row.PPGRating * config.offense["ppgWeight"]) + (row.APGRating * config.offense["astWeight"]) + (row.TSpcRating * config.offense["tspcWeight"]) + (row.OWSRating * config.offense["owsWeight"]) + (row.PERRating * config.offense["perWeight"]) + (row.OBPMRating * config.offense["obpmWeight"]) + (row.ASTpcRating * config.offense["astpcWeight"]) + (row.PTSRating * config.offense["ptsWeight"]) + (row.USGpcRating * config.offense["usgpcWeight"])) / offSum)
        offensiveRating.append(offensiveRate)

    df['offensiveRating'] = offensiveRating

    # Calculate Player Ratings
    scaledRating("defensiveRating")
    scaledRating("offensiveRating")

    playerRating = []
    for index, row in df.iterrows():
        playerRate = int(((row.offensiveRating * 1.15) + (row.defensiveRating * .85)))
        playerRating.append(playerRate)

    df['playerRating'] = playerRating

    # Export To CSV
    sortedBy = df.sort_values(by=["playerRating"], ascending=False)

    sortedBy.to_csv("ratings/withRatings_" + fileName)

In [85]:
# Weighted Average Divisors
offSum = 0
defSum = 0
for x in config.offense:
    offSum += config.offense[x]
for x in config.defense:
    defSum += config.defense[x]

In [86]:
years = list(range(config.startingYear, config.finalYear + 1))

# Create dataframe and rate players for each stat
for year in years:
    fileName = "stats_{}.csv".format(year)

    df = pd.read_csv("csvStats/" + fileName)
    df.dropna(inplace=True) # Drops NaN
    df.columns=df.columns.str.replace('%', 'pc') # Replace % symbols w/ pc
    df.columns=df.columns.str.replace('3', 'T') # Replace 3 with T

    # Drop players who played less than 20 games
    for index, row in df.iterrows():
        if row.G < 20:
            print(row.Player + " dropped")
            df.drop(index, inplace=True)

    # Calculate Rating For Each Year
    calculateForYears(df, fileName)

Justin Anderson dropped
Paris Bass dropped
Bol Bol dropped
Isaac Bonga dropped
Chaundee Brown Jr. dropped
Vlatko Čančar dropped
Devin Cannady dropped
Petr Cornelie dropped
Mamadi Diakite dropped
Devon Dotson dropped
Sekou Doumbouya dropped
Jeff Dowtin dropped
PJ Dozier dropped
Kris Dunn dropped
Carsen Edwards dropped
James Ennis III dropped
Melvin Frazier dropped
Tim Frazier dropped
Markelle Fultz dropped
Marcus Garrett dropped
Freddie Gillespie dropped
Hassani Gravett dropped
Kyle Guy dropped
Jared Harper dropped
Joe Harris dropped
Udonis Haslem dropped
Haywood Highsmith dropped
Malcolm Hill dropped
Jaylen Hoard dropped
Wes Iwundu dropped
Justin Jackson dropped
Carlik Jones dropped
Mason Jones dropped
Georgios Kalaitzakis dropped
Frank Kaminsky dropped
Braxton Key dropped
George King dropped
Louis King dropped
Brandon Knight dropped
Luke Kornet dropped
Isaiah Livers dropped
Brook Lopez dropped
Didi Louzada dropped
Mac McClung dropped
Cameron McGriff dropped
Alfonzo McKinnie dropped
Sa

In [87]:
# Create combined csv with stats over time
finalDataFrame = pd.DataFrame()

for year in years:
    fileName = "ratings/withRatings_stats_{}.csv".format(year)
    df = pd.read_csv(fileName)

    data = []

    for index, row in df.iterrows():
        data.append([row.Player, year, row.G, row.Pos, row.Age, row.Tm, row.scaledoffensiveRating, row.scaleddefensiveRating, row.playerRating])

    tempDataFrame = pd.DataFrame(data, columns=['PlayerName','Season','Games', 'Position', 'Age', 'Team', 'OffensiveRating', 'DefensiveRating', 'RatingSum'])

    finalDataFrame = pd.concat([finalDataFrame, tempDataFrame])

df = finalDataFrame

scaledRating("RatingSum")

sortedBy = finalDataFrame.sort_values(by=["scaledRatingSum"], ascending=False)

sortedBy.to_csv("ratingsOverTime.csv")

In [88]:
# Create yearly ratings csvs
for year in years:
    dataFrame = df.loc[df['Season'] == year]

    dataFrame.to_csv("finalRatings/fr_" + str(year) + ".csv")
