In [47]:
import pandas as pd
import sklearn as sk
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn import svm
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.model_selection import cross_val_score, train_test_split, cross_val_predict
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score
pd.set_option('display.max_columns', 25)
pd.set_option('display.max_rows', 25)
Df = pd.read_csv("12Buckets.csv")
FA = pd.read_csv("FreeAgents.csv")
CapSpace = pd.read_csv("CapSpace.csv")

In [43]:
global Groups
global GroupToPos
global SalaryCaps
Groups = ['Safeties','Corners','InsideBackers','Edge','InteriorDLine','WRs','TEs','InteriorOLine','Tackles','QB','RB','Special']
GroupToPos = {'Safeties': ['SS','S','FS'], 'Corners': ['CB'], 'InsideBackers': ['LB','ILB'], 'InteriorDLine': ['DT']
             , 'Edge': ['OLB', 'DE'], 'TEs': ['TE'], 'WRs': ['WR'], 'InteriorOLine': ['G', 'C', 'OL'], 'Tackles': ['T','LT','RT'],
             'QB': ['QB'], 'RB': ['RB'], 'Special': ['K','P','KR','PR','LS']}
SalaryCaps = {2009: 123000000, 2011: 120000000, 2012: 120600000, 2013: 123000000, 2014: 133000000, 2015: 143280000, 2016: 155270000, 2017: 167000000
             , 2018: 177200000, 2019: 188200000, 2020: 198200000}

Df.head()

Unnamed: 0,Year,Team,Safeties,Corners,InsideBackers,Edge,InteriorDLine,WRs,TEs,InteriorOLine,Tackles,QB,RB,Special,Dead,IR,Prac,Susp,Wins,Playoff Wins,SuperBowl Win,Dist
0,2005,cardinals,1.402731,-0.207893,-0.704751,0.637538,-0.524513,0.666781,-0.808983,3.703935,0.513275,-0.200323,-1.050627,1.269564,8.08,0.16,0.09,0.0,5,0,0,5.249688
1,2006,cardinals,1.968584,-0.894881,-0.620196,1.020491,-0.200519,0.858693,-0.797375,4.10473,-0.310605,-0.153475,2.053163,0.192998,1.18,0.61,0.15,0.0,5,0,0,4.777084
2,2007,cardinals,1.9272,-1.231017,-0.68046,0.750435,-0.728735,0.483166,-0.943145,1.903609,0.42525,0.211767,1.018352,-0.611292,2.07,13.01,0.08,0.0,8,0,0,4.250393
3,2008,cardinals,1.401572,-1.141278,0.977356,-0.113338,-0.431277,0.562258,-0.953613,1.76909,2.042454,-0.268646,0.86636,-1.143908,4.09,0.23,0.32,0.0,9,3,0,5.175942
4,2009,cardinals,1.503735,0.174713,1.021642,-0.924559,0.278701,1.189041,-1.609632,0.77547,0.26515,0.469486,-0.344347,-0.131045,6.92,5.68,0.21,0.0,10,1,0,4.841595


In [11]:
DfUngrouped = pd.read_csv('SalaryData.csv')

{'Safeties': ['SS', 'S', 'FS'],
 'Corners': 'CB',
 'InsideBackers': ['LB', 'ILB'],
 'InteriorDLine': ['DT'],
 'Edge': ['OLB', 'DE'],
 'TEs': 'TE',
 'WRs': 'WR',
 'InteriorOLine': ['G', 'C', 'OL'],
 'Tackles': ['T', 'LT', 'RT'],
 'QB': 'QB',
 'RB': 'RB',
 'Special': ['K', 'P', 'KR', 'PR', 'LS']}

In [4]:
#Normalize spending percentages to the year each team existed in, allowing for comparison and for reasonable distance calculation
def Normalize(x):
    RetList = []
    for position in Groups:
        dfyear = Df[Df['Year'] == x['Year']]
        val = (x[position] - np.mean(dfyear[position]))/(np.std(dfyear[position]))
        
        RetList.append(val)
    
    return pd.Series(RetList)
    
    
    
Df[Groups] = Df.apply(Normalize, axis = 1)

In [33]:
def DistAllTeams(x, Team, Year):
    TheTeam = Df[(Df['Team'] == Team)&(Df['Year'] == Year)]
    TotD = 0
    if ((x['Team'] == Team)|(x['Year'] >= Year)):
        Dist = 100
    
    else:
        for Position in Groups:
            Di = (x[Position]-TheTeam[Position])**2
            TotD += Di
        Dist = np.sqrt(TotD)
    return Dist

def BasicRecommend(x):
    if (x['Wins'] > 0)&(x['Playoff Wins'] > 0):
        message = "Increase spending in this position"
    
    elif (x['Wins'] < 0)&(x['Playoff Wins'] < 0):
        message = "Decrease spending in this position"
        
    else:
        message = "No clear direction from historical data"
        
    return message
    

def FullComparable(Team, Year):
    ChangeFrame = pd.DataFrame()  
    Groups2 = Groups.copy()
    Groups2.extend(['Dead','IR','Prac','Susp','Wins','Playoff Wins'])
    Df['Dist'] = Df.apply(DistAllTeams, axis = 1, args = (Team, Year))
    
    #Check the 5 teams that are most similar to the team in question
    
    ######
    for l in range(0,11):
        
        TopTeams = Df.sort_values(by = "Dist")[l:l+1]
        NextYear = Df[(Df['Team'] == TopTeams.iloc[0,1])&(Df['Year'] == TopTeams.iloc[0,0]+1)]
        
        Result2 = TopTeams.copy()
        for i in range(0,len(Groups2)):
            Position = Groups2[i]
            NewVal = NextYear[Position].values - TopTeams[Position].values
            Result2.iloc[0,i+2] = NewVal

        Result2.iloc[0,0] = str(TopTeams['Year'].values[0]) + " to " +  str(NextYear['Year'].values[0])

        ChangeFrame = ChangeFrame.append(Result2)

    X = ChangeFrame.copy()
    display(X)
    cor = X.corr(method = "spearman").fillna(0)
    cor['Recommendation'] = cor.apply(BasicRecommend, axis = 1)
    #PlayerRecommendations = SpecificRecs(cor)
    display(cor[['Wins','Playoff Wins','Recommendation']][0:12])
    return cor
    



In [34]:
cor = FullComparable('texans',2019)

Unnamed: 0,Year,Team,Safeties,Corners,InsideBackers,Edge,InteriorDLine,WRs,TEs,InteriorOLine,Tackles,QB,RB,Special,Dead,IR,Prac,Susp,Wins,Playoff Wins,SuperBowl Win,Dist
147,2017 to 2018,broncos,-0.227726,-0.221059,0.485421,-0.923957,0.176749,-1.961223,-0.427315,0.270024,-0.233475,1.210933,-1.335786,0.568758,0.06,3.84,0.0,0.0,1,0,0,2.552742
68,2013 to 2014,panthers,-0.599165,-1.303598,-1.431763,0.92609,0.014885,-1.618021,1.560332,-1.389068,-0.075853,-0.120452,0.354302,-0.732605,-14.19,-5.79,3.28,0.0,-5,0,0,2.892511
372,2017 to 2018,jets,0.479181,1.438337,0.611306,-0.853272,0.164478,1.416239,0.323308,0.168854,0.778616,0.849767,-0.493766,0.208528,13.36,4.68,0.0,1.43,-1,0,0,2.962586
350,2010 to 2011,giants,-0.49056,-0.5784,-0.051792,0.05703,-0.163821,-0.612397,-0.559156,-0.104974,0.096666,1.096531,1.305026,0.208135,2.73,-2.41,-0.07,0.0,-1,4,0,2.978551
8,2013 to 2014,cardinals,0.379439,1.289573,-0.166973,0.785393,0.294426,-0.382292,0.135165,-2.225174,-0.095647,1.371731,-0.909775,-1.207306,-0.25,14.89,0.15,2.25,1,0,0,3.08476
373,2018 to 2019,jets,0.145432,-1.373145,2.545242,-1.117656,0.392088,-0.208207,-0.283069,-0.838383,-0.564164,-0.924091,0.602079,-0.199796,-13.29,11.55,0.0,-1.43,3,0,0,3.088746
36,2011 to 2012,ravens,0.500458,-0.311136,0.253507,-0.906201,0.684663,-0.264289,-0.602289,-1.629572,0.274676,0.374219,1.181989,-0.764452,6.47,6.74,-0.18,0.0,-2,2,0,3.266323
102,2017 to 2018,bengals,-0.776392,-0.090792,-0.294344,0.464682,0.703372,-0.565916,0.528613,-1.118845,2.56263,-0.244166,-0.412581,-0.067922,-0.67,14.9,0.0,0.0,-1,0,0,3.407493
75,2005 to 2006,bears,-0.291316,-0.199126,-0.045566,-0.454203,-0.328375,0.265262,-0.187331,0.109776,0.373157,-0.158845,0.323016,-0.062739,2.73,-0.21,0.0,0.0,2,2,0,3.414698
466,2006 to 2007,redskins,2.651799,0.068489,0.43342,-3.356136,0.273794,-0.116366,0.199372,-0.956799,-0.27422,-0.414782,-0.914128,0.231155,-10.29,7.65,-0.08,0.0,4,0,0,3.416787


Unnamed: 0,Wins,Playoff Wins,Recommendation
Safeties,0.308066,-0.095053,No clear direction from historical data
Corners,-0.110352,-0.49639,Decrease spending in this position
InsideBackers,0.533369,0.06865,Increase spending in this position
Edge,-0.717289,-0.242914,Decrease spending in this position
InteriorDLine,-0.110352,-0.385494,Decrease spending in this position
WRs,0.491987,0.237633,Increase spending in this position
TEs,-0.262086,-0.623127,Decrease spending in this position
InteriorOLine,0.174724,0.036965,Increase spending in this position
Tackles,-0.16093,0.512232,No clear direction from historical data
QB,-0.308066,0.14258,No clear direction from historical data


In [40]:
def SpecificRecs(cor, Team, Year, FreeAgents):
    cor = cor[['Wins','Playoff Wins','Recommendation']][0:12]
    PossibleFA = FA[FA['Year'] == Year]
    RecommendedFA = pd.DataFrame(columns = ['AAV','Age','CapPerc','Dollars','From','Player','Position','To','Year','Years'])
    for i in range(0,len(cor)):
        row = cor.iloc[i]
        if row['Recommendation'] == 'Increase spending in this position':
            group = cor.index[i]
            Positions = GroupToPos[group]
            RecommendedFA = RecommendedFA.append(PossibleFA[PossibleFA['Position'].isin(Positions)])
            SalaryCap = SalaryCaps[Year]
            
            
    return RecommendedFA
            
            
            
    
    
    
    
    
    
    
    
SpecificRecs(cor, 'texans', 2018, FA)

Unnamed: 0,AAV,Age,CapPerc,Dollars,From,Player,Position,To,Year,Years
2515,9000000.0,26.0,5.08,45000000.0,DAL,Anthony Hitchens,ILB,KC,2018.0,5.0
2540,7500000.0,26.0,4.23,22500000.0,TEN,Avery Williamson,ILB,NYJ,2018.0,3.0
2542,7000000.0,28.0,3.95,21000000.0,WAS,Zach Brown,ILB,WAS,2018.0,3.0
2547,6333333.0,28.0,3.57,19000000.0,DET,Tahir Whitehead,LB,OAK,2018.0,3.0
2556,5000000.0,26.0,2.82,15000000.0,DEN,Todd Davis,ILB,DEN,2018.0,3.0
...,...,...,...,...,...,...,...,...,...,...
2841,790000.0,28.0,0.45,790000.0,HOU,Jeff Allen,G,KC,2018.0,1.0
2844,790000.0,28.0,0.45,790000.0,KC,Bryan Witzmann,G,MIN,2018.0,1.0
2861,790000.0,29.0,0.45,790000.0,NO,Josh LeRibeus,C,NO,2018.0,1.0
2878,705000.0,24.0,0.40,705000.0,NYG,Ereck Flowers,G,JAC,2018.0,1.0


In [46]:
Df[(Df['Team'] == 'texans')&(Df['Year'] == 2019)]

Unnamed: 0,Year,Team,Safeties,Corners,InsideBackers,Edge,InteriorDLine,WRs,TEs,InteriorOLine,Tackles,QB,RB,Special,Dead,IR,Prac,Susp,Wins,Playoff Wins,SuperBowl Win,Dist
194,2019,texans,-0.224017,0.94517,-0.068272,1.498085,-1.199815,0.785443,-1.067176,0.937151,-1.407212,-1.379927,1.635236,-0.127661,12.71,11.62,0.0,0.0,10,1,0,100.0


The teams most similar to the 2018 Texans (that aren't the Texans or in the future) are the 2017 Broncos (5 Wins), the 2011 Ravens (12 Wins), and the 2007 Steelers (10 Wins). 

# Quickly break down a few examples one-by-one

### 2017 Broncos (5 Wins)

In [320]:
TopTeams = Df.sort_values(by = "Dist")[:1]

NextYear = Df[(Df['Team'] == TopTeams.iloc[0,1])&(Df['Year'] == TopTeams.iloc[0,0]+1)]

Result = NextYear.copy()

Groups2 = Groups.copy()
Groups2.extend(['Dead','IR','Prac','Susp','Wins','Playoff Wins'])
for i in range(0,len(Groups2)):
    Position = Groups2[i]
    NewVal = NextYear[Position].values - TopTeams[Position].values
    Result.iloc[0,i+2] = NewVal
    
Result.iloc[0,0] = '2017 to 2018'
Result

Unnamed: 0,Year,Team,Safeties,Corners,InsideBackers,Edge,InteriorDLine,WRs,TEs,InteriorOLine,Tackles,QB,RB,Special,Dead,IR,Prac,Susp,Wins,Playoff Wins,SuperBowl Win,Dist
148,2017 to 2018,broncos,-0.227726,-0.221059,0.485421,-0.923957,0.176749,-1.961223,-0.427315,0.270024,-0.233475,1.210933,-1.335786,0.568758,0.06,3.84,0.0,0.0,1,0,0,100.0


#### The 2018 Broncos spent considerably less on WRs, Edge Rushers, and RBs, while spending considerably more on their QB, which ultimately only led to a 1-win improvement

In [321]:
TopTeams = Df.sort_values(by = "Dist")[1:2]

NextYear = Df[(Df['Team'] == TopTeams.iloc[0,1])&(Df['Year'] == TopTeams.iloc[0,0]+1)]

Result2 = NextYear.copy()

for i in range(0,len(Groups2)):
    Position = Groups2[i]
    NewVal = NextYear[Position].values - TopTeams[Position].values
    Result2.iloc[0,i+2] = NewVal
    
Result2.iloc[0,0] = str(TopTeams['Year'].values[0]) + " to " +  str(NextYear['Year'].values[0])
Result2

Unnamed: 0,Year,Team,Safeties,Corners,InsideBackers,Edge,InteriorDLine,WRs,TEs,InteriorOLine,Tackles,QB,RB,Special,Dead,IR,Prac,Susp,Wins,Playoff Wins,SuperBowl Win,Dist
37,2011 to 2012,ravens,0.500458,-0.311136,0.253507,-0.906201,0.684663,-0.264289,-0.602289,-1.629572,0.274676,0.374219,1.181989,-0.764452,6.47,6.74,-0.18,0.0,-2,2,1,3.949323


#### The 2012 Ravens spend much less on Interior Offensive Line and Edge and much more on RB and Interior Defensive Line. The 2012 offseason saw the Ravens lose G Ben Grubbs and DE/NT Cory Redding and drafting future all-pros in Courtney Upshaw and Kelechi Osemele. The Ravens also re-signed RB Ray Rice to a large deal and Interior D-Lineman Haloti Ngata's contract value increased significantly. This ultimately led to fewer regular season wins, but a Super Bowl Win

In [365]:
ChangeFrame = pd.DataFrame()
for j in range(0,11):
    TopTeams = Df.sort_values(by = "Dist")[j:j+1]

    NextYear = Df[(Df['Team'] == TopTeams.iloc[0,1])&(Df['Year'] == TopTeams.iloc[0,0]+1)]

    Result2 = NextYear.copy()

    for i in range(0,len(Groups2)):
        Position = Groups2[i]
        NewVal = NextYear[Position].values - TopTeams[Position].values
        Result2.iloc[0,i+2] = NewVal

    Result2.iloc[0,0] = str(TopTeams['Year'].values[0]) + " to " +  str(NextYear['Year'].values[0])
    ChangeFrame = ChangeFrame.append(Result2)

display(ChangeFrame)
    
X = ChangeFrame.copy()

cor = X.corr()
sns.heatmap(cor, xticklabels=cor.columns,yticklabels=cor.columns)
cor[['Wins','Playoff Wins','SuperBowl Win']]

KeyError: 'Dist'

In [13]:
FA

Unnamed: 0,AAV,Age,CapPerc,Dollars,From,Player,Position,To,Year,Years
0,12666667.0,25.0,10.52,76000000.0,CAR,Charles Johnson,DE,CAR,2011.0,6.0
1,12000000.0,30.0,9.97,60000000.0,OAK,Nnamdi Asomugha,CB,PHI,2011.0,5.0
2,7571429.0,27.0,6.29,53000000.0,TB,Davin Joseph,G,TB,2011.0,7.0
3,9750000.0,27.0,8.10,48750000.0,CIN,nathan Joseph,CB,HOU,2011.0,5.0
4,9000000.0,27.0,7.48,45000000.0,NYJ,Santonio Holmes,WR,NYJ,2011.0,5.0
...,...,...,...,...,...,...,...,...,...,...
3290,645000.0,26.0,0.34,645000.0,ATL,Marvin Hall,WR,CHI,2019.0,1.0
3291,570000.0,26.0,0.30,570000.0,NYG,Jordan Williams,LB,TEN,2019.0,1.0
3292,570000.0,25.0,0.30,570000.0,GB,Nico Siragusa,G,IND,2019.0,1.0
3293,570000.0,26.0,0.30,570000.0,IND,DeShawn Williams,DT,DEN,2019.0,1.0


Unnamed: 0.1,Unnamed: 0,Year,Team,CapRoom
0,0,2005,cardinals,35.43
1,1,2006,cardinals,17.89
2,2,2007,cardinals,28.35
3,3,2008,cardinals,14.77
4,4,2009,cardinals,6.41
...,...,...,...,...
475,475,2015,redskins,5.67
476,476,2016,redskins,20.11
477,477,2017,redskins,8.16
478,478,2018,redskins,11.54
