# Weighted Averages for March Madness


- Read in `scaled` and `pca` data and try weighted average search

In [1]:
# import libraries
import pandas as pd
import numpy as np
from mm_utils import calculate_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

In [2]:
# read in data
df_s = pd.read_csv("processed_data/scaled_data.csv", index_col = 0)
df_p = pd.read_csv("processed_data/pca_data.csv", index_col = 0)

In [3]:
# topCorrelations returns the column names of x(int) columns that have the highest R^2 correlation values to column cor
def topCorrelations(x,df,cor):
    top = df.corr()[cor]
    top = top**2
    return list(top.sort_values(ascending = False)[1:x+1].index)

In [4]:
# scaleData scales the data in columns cols of dataframe df based on the kind of scaling selected
def scaleData(df, cols, kind):
    if kind == "Std":
        for c in cols:
            df[c] = StandardScaler().fit_transform(df[[c]])
    elif kind == "MinMax":
        for c in cols:
            df[c] = MinMaxScaler((0,1)).fit_transform(df[[c]])
    return df

In [5]:
# weightTests tests each possible combination of coefficients for the weighted averages based on the coefficients in 
# coef_range, storing and returning the results in Dataframe results.
# Dataframe df contains the columns that are going to be weighted, the total number of tournament wins, 
# Cinderella, and a Rank column for the calculated rank of each team.

def weightTests(df, curr_coefs, coef_range, n_columns, results, scale, year):
    # if final coef has been set
    if len(curr_coefs) == n_columns:

        # calculate new rank based on coefficients
        df.loc[:,'Rank'] = 0
        for cl in range(0, n_columns):
            df.loc[:,'Rank'] += df.iloc[:,cl] * curr_coefs[cl]
        # end for

        # calculate new score
        df = df.sort_values(['Rank'],ascending=False)
        endScore = calculate_score(data = df)

        # create new results entry
        res = {"Score":endScore, 'Year':year, "Scale":scale}
        for cl in range(n_columns):
            res[df.columns[cl]] = curr_coefs[cl]
        #end for

        results = results.append(res, ignore_index = True)
    # end if
    
    # need to set more coefficients
    else:
        for c in coef_range:
            curr_coefs.append(c)
            # call weightTests with new coefficients and same previous values
            results = weightTests(df = df, curr_coefs = curr_coefs,
                        coef_range = coef_range,results = results, 
                        n_columns = n_columns, year = year, scale = scale)
            curr_coefs.pop()
        #end for
    #end else
    
    return results

In [6]:
# runModel takes a number of correlations to look at (topCorr(int)), which column to correlate to (corVar),
# range of weights (weightRange), a Dataframe (data), and the specific year of data (year).
# runModel gets the top correlations, creates 2 copies of the data and scales them through MinMax and Standard scalers.
# After this, weightTests are run on all 3 copies of the data and stored in the results dataframe that is subsequently
# returned.

def runModel(topCorr, weightRange, data, corVar, year):
    # Get top correlation columns
    cols = topCorrelations(x=topCorr,df=data.drop(['Cinderella'],axis=1),cor=corVar)
    cols.append('Rank')
    print(cols)
    
    # Create results dataframe
    results = pd.DataFrame(columns = cols)
    
    
    # Scale data Std
    df_s = data.copy()
    df_s = scaleData(df = df_s,cols = cols[:-1],kind = "Std")
    # Scale data MinMax
    df_m = data.copy()
    df_m = scaleData(df = df_m, cols = cols[:-1],kind = "MinMax")
    
    cols.append(corVar)
    if 'Cinderella' not in cols:
        cols.append('Cinderella')
        
    data = data[cols]
    df_s = df_s[cols]
    df_m = df_m[cols]
    
    # normal tests
    print("Normal Tests", year)
    results = results.append(weightTests(df = data, curr_coefs = [], coef_range = weightRange, 
                                         results = results, n_columns = topCorr, year = year, scale = "None"))
    # std tests
    print("Std Tests", year)
    results = results.append(weightTests(df = df_s, curr_coefs = [], coef_range = weightRange, 
                                        results = results, n_columns = topCorr, year = year, scale = "Std"))
    # minmax tests
    print("MinMax Tests", year)
    results = results.append(weightTests(df = df_m, curr_coefs = [], coef_range = weightRange, 
                                         results = results, n_columns = topCorr, year = year, scale = "MinMax"))
    return results

In [7]:
# Model parameters
# weights range
ranges = [0,0.25,0.5,0.75,1]
# number of top features to use for making weighted average
topCorr = 5
# which column to correlate to
corrTo = "Number of Tournament Wins"

In [10]:
# run tests on each year of data and store in an excel file 
results = pd.DataFrame()
df_s['Rank'] = 0
# loop through years
for y in df_s['Year'].unique():
    yr = df_s[df_s['Year'] == y]
    results = results.append(runModel(topCorr = topCorr, weightRange = ranges, data = yr.copy(), corVar = corrTo, year = y).copy())

# save results to excel
results.to_csv("./processed_data/Weighted_Results.csv")

['Wins vs Top Teams', 'Wins', 'Total Scoring Differential', 'ESPN Strength of Schedule', 'Scoring Differential Per Game', 'Rank']
Normal Tests 2014


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2014
MinMax Tests 2014
['Total Scoring Differential', 'Scoring Differential Per Game', 'Wins vs Top Teams', 'Wins', 'ESPN Strength of Schedule', 'Rank']
Normal Tests 2015


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2015
MinMax Tests 2015
['Wins vs Top Teams', 'Free Throw Percentage', 'ESPN Strength of Schedule', 'Wins', 'Losses', 'Rank']
Normal Tests 2016


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2016
MinMax Tests 2016
['Wins vs Top Teams', 'Total Scoring Differential', 'Scoring Differential Per Game', 'ESPN Strength of Schedule', 'Rebound Differential', 'Rank']
Normal Tests 2017


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2017
MinMax Tests 2017
['ESPN Strength of Schedule', 'Total Scoring Differential', 'Scoring Differential Per Game', 'Wins', 'Wins vs Top Teams', 'Rank']
Normal Tests 2018


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2018
MinMax Tests 2018
['Wins vs Top Teams', 'Scoring Differential Per Game', 'Total Scoring Differential', 'ESPN Strength of Schedule', 'Losses', 'Rank']
Normal Tests 2019


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2019
MinMax Tests 2019
['Average PPG', 'Total Scoring Differential', 'Scoring Differential Per Game', 'Total Points', '3-Point Percentage', 'Rank']
Normal Tests 2021


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2021
MinMax Tests 2021


In [11]:
# run tests on each year of data and store in an excel file 
results = pd.DataFrame()
df_p['Rank'] = 0
# loop through years
for y in df_p['Year'].unique():
    yr = df_p[df_p['Year'] == y]
    results = results.append(runModel(topCorr = topCorr, weightRange = ranges, data = yr.copy(), corVar = corrTo, year = y).copy())

# save results to excel
results.to_csv("./processed_data/Weighted_Results_PCA.csv")

['Win-Loss_0', 'Scoring Differential_0', 'Schedule_0', 'PPG_1', 'Free Throw-Rebound_1', 'Rank']
Normal Tests 2014


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2014
MinMax Tests 2014
['Scoring Differential_0', 'Win-Loss_0', 'Assist_0', 'Schedule_0', 'Free Throw-Rebound_1', 'Rank']
Normal Tests 2015


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2015
MinMax Tests 2015
['Schedule_0', 'Win-Loss_0', 'Scoring Differential_0', 'Free Throw-Rebound_2', 'Assist_0', 'Rank']
Normal Tests 2016


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2016
MinMax Tests 2016
['Scoring Differential_0', 'Schedule_0', 'Win-Loss_0', 'Free Throw-Rebound_1', 'Assist_0', 'Rank']
Normal Tests 2017


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2017
MinMax Tests 2017
['Scoring Differential_0', 'Win-Loss_0', 'Schedule_0', 'Assist_0', 'PPG_1', 'Rank']
Normal Tests 2018


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2018
MinMax Tests 2018
['Scoring Differential_0', 'Schedule_0', 'Win-Loss_0', 'Assist_0', 'Free Throw-Rebound_1', 'Rank']
Normal Tests 2019


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2019
MinMax Tests 2019
['Scoring Differential_0', 'PPG_0', 'Win-Loss_0', '3pt_0', 'Assist_0', 'Rank']
Normal Tests 2021


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item_labels[indexer[info_axis]]] = value


Std Tests 2021
MinMax Tests 2021
