In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import lil_matrix
import scipy.spatial.distance

## Load in 2022 Team Stats Data

In [2]:
NFL2022_df = pd.read_csv('Full_NFL_Stats_2022.csv')
NFL2022_df.head()

Unnamed: 0.1,Unnamed: 0,teamName,displayName,description,abbreviation,value,perGameValue,rank
0,0,Falcons,Fumbles,The number of times a player/team has fumbled ...,FUM,21.0,1.0,13.0
1,1,Falcons,Fumbles Lost,The number of times a fumble is recovered by t...,LST,12.0,,4.0
2,2,Falcons,Forced Fumbles,The total number of forced fumbles.,FF,8.0,,62.0
3,3,Falcons,Fumbles Recovered,The number of fumbles recovered.,FR,7.0,,23.0
4,4,Falcons,Fumbles Touchdowns,The number of times a fumbles is recovered and...,FTD,1.0,,8.0


In [3]:
NFL2022_df.dropna(subset='rank',inplace=True)

In [4]:
NFL2022_df.drop(columns=['Unnamed: 0', 'perGameValue'], inplace=True)

In [5]:
NFL2022_df.head()

Unnamed: 0,teamName,displayName,description,abbreviation,value,rank
0,Falcons,Fumbles,The number of times a player/team has fumbled ...,FUM,21.0,13.0
1,Falcons,Fumbles Lost,The number of times a fumble is recovered by t...,LST,12.0,4.0
2,Falcons,Forced Fumbles,The total number of forced fumbles.,FF,8.0,62.0
3,Falcons,Fumbles Recovered,The number of fumbles recovered.,FR,7.0,23.0
4,Falcons,Fumbles Touchdowns,The number of times a fumbles is recovered and...,FTD,1.0,8.0


## Ranking of Teams Based on Position

### QB Ranking

In [6]:
interceptions = NFL2022_df.loc[NFL2022_df['displayName'] == 'Interception Percentage']
interceptions = interceptions.sort_values(by='rank')

In [7]:
completion = NFL2022_df.loc[NFL2022_df['displayName'] == 'Completion Percentage']
completion = completion.sort_values(by='rank')

In [8]:
pass_rating = NFL2022_df.loc[NFL2022_df['displayName'] == 'Passer Rating']
pass_rating = pass_rating.sort_values(by='value',ascending=False)

In [9]:
touchdowns = NFL2022_df.loc[NFL2022_df['abbreviation'] == 'TD%']
touchdowns = touchdowns.drop('rank',axis=1)
touchdowns = touchdowns.sort_values(by='value',ascending=False)

In [10]:
pass_yard_per_game = NFL2022_df.loc[NFL2022_df['displayName'] == 'Passing Yards Per Game']
pass_yard_per_game = pass_yard_per_game.sort_values(by='rank',ascending=True)

In [11]:
bottom_qb = []

for df in [interceptions, completion, pass_rating, touchdowns, pass_yard_per_game]:

    bottom_names = list(df['teamName'].tail(16))

    bottom_qb += bottom_names

name_counts = {name: bottom_qb.count(name) for name in set(bottom_qb)}

final_names = [name for name in name_counts.keys() if name_counts[name] == 5]

print(final_names)

['Ravens', 'Steelers', 'Panthers', 'Texans']


### RB Ranking

In [12]:
ypg = NFL2022_df.loc[NFL2022_df['displayName'] == 'Rushing Yards Per Game']
ypg = ypg.sort_values(by='rank')

In [13]:
fumbles = NFL2022_df.loc[NFL2022_df['displayName'] == 'Rushing Fumbles Lost']
fumbles = fumbles.sort_values(by='rank',ascending=False)

In [14]:
downs = NFL2022_df.loc[NFL2022_df['displayName'] == 'Rushing 1st downs']
downs = downs.sort_values(by='rank')
downs = downs.drop_duplicates(subset=['teamName'])

In [15]:
td = NFL2022_df.loc[NFL2022_df['abbreviation'] == 'RUSH']
td = td.sort_values(by='rank')
td = td.drop_duplicates(subset=['teamName'])

In [16]:
espnrbrate = NFL2022_df.loc[NFL2022_df['displayName'] == 'ESPN RB Rating']
espnrbrate = espnrbrate.sort_values(by='rank',ascending=True)

In [17]:
bottom_rb = []

for df in [ypg, fumbles, downs, td, espnrbrate]:

    bottom_names = list(df['teamName'].tail(16))

    bottom_rb += bottom_names

name_counts = {name: bottom_rb.count(name) for name in set(bottom_rb)}

final_names = [name for name in name_counts.keys() if name_counts[name] == 5]

print(final_names)

['Saints', 'Colts', 'Chargers', 'Texans', 'Broncos']


### WR Ranking

In [18]:
rec_rat = NFL2022_df.loc[NFL2022_df['displayName'] == 'ESPN Widereceiver Rating']
rec_rat = rec_rat.sort_values(by='rank')

In [19]:
receptions = NFL2022_df.loc[NFL2022_df['displayName'] == 'Receptions']
receptions = receptions.sort_values(by='rank')

In [20]:
ypg = NFL2022_df.loc[NFL2022_df['displayName'] == 'Receiving Yards Per Game']
ypg = ypg.sort_values(by='rank')

In [21]:
rec_td = NFL2022_df.loc[NFL2022_df['displayName'] == 'Receiving Touchdowns']
rec_td = rec_td.sort_values(by='value',ascending=False)
rec_td = rec_td.drop_duplicates(subset=['teamName'])

In [22]:
rec_big = NFL2022_df.loc[NFL2022_df['abbreviation'] == 'BIG']
rec_big = rec_big.sort_values(by='value',ascending=False)

In [23]:
bottom_wr = []

for df in [rec_rat, receptions, ypg, rec_td, rec_big]:

    bottom_names = list(df['teamName'].tail(16))

    bottom_wr += bottom_names

name_counts = {name: bottom_wr.count(name) for name in set(bottom_wr)}

final_names = [name for name in name_counts.keys() if name_counts[name] == 5]

print(final_names)

['Rams', 'Texans']


### LB Ranking

In [24]:
sacks = NFL2022_df.loc[NFL2022_df['displayName'] == 'Sacks']
sacks = sacks.sort_values(by='rank')

In [25]:
tottackles = NFL2022_df.loc[NFL2022_df['displayName'] == 'Total Tackles']
tottackles = tottackles.sort_values(by='value', ascending=False)

In [26]:
interceptions = NFL2022_df.loc[NFL2022_df['description'] == 'The total number of interceptions.']
interceptions = interceptions.sort_values(by='rank')

In [27]:
tfl = NFL2022_df.loc[NFL2022_df['displayName'] == 'Tackles For Loss']
tfl = tfl.sort_values(by='rank')

In [28]:
bottom_lb = []

for df in [interceptions, tottackles, tfl, sacks]:

    bottom_names = list(df['teamName'].tail(16))

    bottom_lb += bottom_names

name_counts = {name: bottom_lb.count(name) for name in set(bottom_lb)}

final_names = [name for name in name_counts.keys() if name_counts[name] == 4]

print(final_names)

['Browns', 'Bengals', 'Lions']


### DB Ranking

In [29]:
passdef = NFL2022_df.loc[NFL2022_df['displayName'] == 'Passes Defended']
passdef = passdef.sort_values(by='rank')

In [30]:
solotackles = NFL2022_df.loc[NFL2022_df['displayName'] == 'Solo Tackles']
solotackles = solotackles.sort_values(by='value', ascending=False)

In [31]:
interceptions = NFL2022_df.loc[NFL2022_df['description'] == 'The total number of interceptions.']
interceptions = interceptions.sort_values(by='rank')

In [32]:
iy = NFL2022_df.loc[NFL2022_df['displayName'] == 'Interception Yards']
iy = iy.sort_values(by='rank')

In [33]:
bottom_db = []

for df in [interceptions, solotackles, iy, passdef]:

    bottom_names = list(df['teamName'].tail(16))

    bottom_db += bottom_names

name_counts = {name: bottom_db.count(name) for name in set(bottom_db)}

final_names = [name for name in name_counts.keys() if name_counts[name] == 4]

print(final_names)

['Saints', 'Falcons', 'Raiders', 'Commanders']


### D-Line Ranking

In [34]:
stuff = NFL2022_df.loc[NFL2022_df['description'] == 'The number of times that a runner is stuffed at or behind the line of scrimmage.']
stuff = stuff.sort_values(by='rank')

In [35]:
tfl = NFL2022_df.loc[NFL2022_df['displayName'] == 'Tackles For Loss']
tfl = tfl.sort_values(by='rank')

In [36]:
stuffyar = NFL2022_df.loc[NFL2022_df['displayName'] == 'Stuff Yards']
stuffyar = stuffyar.sort_values(by='rank')

In [37]:
sacks = NFL2022_df.loc[NFL2022_df['displayName'] == 'Sacks']
sacks = sacks.sort_values(by='rank')

In [38]:
bottom_dline = []

for df in [stuffyar, tfl, sacks, stuff]:

    bottom_names = list(df['teamName'].tail(16))

    bottom_dline += bottom_names

name_counts = {name: bottom_dline.count(name) for name in set(bottom_dline)}

final_names = [name for name in name_counts.keys() if name_counts[name] == 4]

print(final_names)

['Rams', 'Packers', 'Browns', 'Bengals', 'Broncos']


## Finding Similarity For The Offense

In [39]:
passing = pd.read_csv('QBData.csv')
passing.drop(columns='Unnamed: 0', inplace=True)
passing.head()

Unnamed: 0,playerName,completionPct,completions,ESPNQBRating,interceptionPct,interceptions,longPassing,netPassingYards,netPassingYardsPerGame,netTotalYards,...,totalTouchdowns,totalYards,twoPointPassConvs,twoPtPass,twoPtPassAttempts,yardsPerCompletion,yardsPerGame,yardsPerPassAttempt,netYardsPerPassAttempt,QBR
0,Patrick Mahomes,67.129997,435.0,7760.0,1.852,12.0,67.0,5062.0,297.764709,5420.0,...,45.0,5614.0,-1.0,2.0,3.0,12.069,330.235291,8.102,7.510386,77.57
1,Justin Herbert,68.239998,477.0,6189.0,1.431,10.0,55.0,4533.0,266.647064,4680.0,...,25.0,4876.0,-1.0,1.0,1.0,9.935,286.823517,6.78,6.150611,59.64
2,Tom Brady,66.848999,490.0,6174.0,1.228,9.0,63.0,4534.0,266.705872,4533.0,...,26.0,4693.0,-1.0,2.0,6.0,9.58,276.058838,6.404,6.005298,52.5
3,Kirk Cousins,65.941002,424.0,6157.0,2.177,14.0,66.0,4218.0,248.117645,4315.0,...,31.0,4644.0,-1.0,3.0,4.0,10.724,273.176483,7.072,6.121916,51.76
4,Joe Burrow,68.317001,414.0,6565.0,1.98,12.0,60.0,4216.0,263.5,4473.0,...,40.0,4732.0,-1.0,3.0,5.0,10.809,295.75,7.384,6.516229,56.97


In [40]:
rushing = pd.read_csv('RBData.csv')
rushing.drop(columns='Unnamed: 0', inplace=True)
rushing.head()

Unnamed: 0,PlayerName,ESPNRBRating,LongRushing,NetTotalYards,NetYardsPerGame,RushingAttempts,YardRushingPlays,Rushing1stdowns,RushingFumbles,RushingFumblesLost,...,TotalPointsPerGame,TotalTouchdowns,TotalYards,TotalYardsFromScrimmage,TwoPointRushConversion,TwoPointRush,TwoPointRushAttempts,YardsFromScrimmagePerGame,YardsPerGame,YardsPerRushAttempt
0,Josh Jacobs,2058.0,86.0,1653.0,97.235291,340.0,7.0,93.0,2.0,0.0,...,4.235294,12.0,2053.0,2053.0,-1.0,0.0,1.0,120.764709,120.764709,4.862
1,Derrick Henry,1948.0,56.0,1542.0,96.375,349.0,10.0,65.0,4.0,2.0,...,4.875,14.0,1940.0,1936.0,-1.0,0.0,0.0,121.0,121.25,4.407
2,Nick Chubb,1930.0,41.0,1525.0,89.705879,302.0,13.0,69.0,1.0,1.0,...,4.588235,13.0,1764.0,1764.0,-1.0,1.0,1.0,103.764709,103.764709,5.05
3,Saquon Barkley,1662.0,68.0,1312.0,82.0,295.0,9.0,62.0,1.0,0.0,...,3.75,10.0,1650.0,1650.0,-1.0,0.0,0.0,103.125,103.125,4.447
4,Miles Sanders,1624.0,40.0,1269.0,74.647057,259.0,9.0,62.0,1.0,1.0,...,3.882353,11.0,1347.0,1347.0,-1.0,0.0,1.0,79.235291,79.235291,4.9


In [41]:
receiving = pd.read_csv('WRData.csv')
receiving.drop(columns='Unnamed: 0', inplace=True)
receiving.head()

Unnamed: 0,PlayerName,ESPNWidereceiverRating,LongReception,YardReceivingPlays,ReceivingFirstDowns,ReceivingFumbles,ReceivingFumblesLost,ReceivingTargets,ReceivingTouchdowns,ReceivingYards,...,TotalPointsPerGame,TotalTouchdowns,TotalYards,TotalYardsFromScrimmage,TwoPointReceivingConversion,TwoPointReceptions,TwoPointReceptionAttempts,YardsFromScrimmagePerGame,YardsPerGame,YardsPerReception
0,Justin Jefferson,2729.0,64.0,28.0,80.0,0.0,0.0,184.0,8.0,1809.0,...,3.176471,9.0,1867.0,1833.0,-1.0,1.0,1.0,107.823532,109.823532,14.133
1,Tyreek Hill,2550.0,64.0,25.0,77.0,1.0,0.0,170.0,7.0,1710.0,...,3.176471,9.0,1742.0,1742.0,-1.0,0.0,0.0,102.470589,102.470589,14.37
2,Davante Adams,2506.0,60.0,24.0,65.0,1.0,0.0,180.0,14.0,1516.0,...,4.941176,14.0,1515.0,1515.0,-1.0,0.0,1.0,89.117645,89.117645,15.16
3,A.J. Brown,2321.0,78.0,23.0,59.0,2.0,2.0,145.0,11.0,1496.0,...,3.882353,11.0,1496.0,1496.0,-1.0,0.0,0.0,88.0,88.0,17.0
4,Stefon Diggs,2354.0,53.0,17.0,72.0,1.0,0.0,154.0,11.0,1429.0,...,4.125,11.0,1426.0,1426.0,-1.0,0.0,0.0,89.125,89.125,13.231


### QB Similarity

In [42]:
qbsimdex_df = passing.set_index('playerName')

In [43]:
t_player = 'Patrick Mahomes'
t_player_stat = qbsimdex_df.loc[t_player]
dist = scipy.spatial.distance.cdist(qbsimdex_df, [t_player_stat], metric = "cosine")[:,0]
query_distances = list(zip(passing.index, dist))

In [44]:
for sim_play, sim_stats in sorted(query_distances, key = lambda x: x[1], reverse = False)[:11]:
    print(passing['playerName'].iloc[sim_play], sim_stats)

Patrick Mahomes 0.0
Jared Goff 0.00041775745230643313
Joe Burrow 0.0007407890259990557
Aaron Rodgers 0.000871552931898445
Jimmy Garoppolo 0.0011185239836456473
Trevor Lawrence 0.0013677707864143684
Justin Herbert 0.001585473486567679
Andy Dalton 0.001918646556036907
Tom Brady 0.0020447975863114864
Derek Carr 0.002721963799469762
Dak Prescott 0.002762679520320721


### RB Similarity

In [45]:
rbsimdex_df = rushing.set_index('PlayerName')

In [46]:
t_player = 'Josh Jacobs'
t_player_stat = rbsimdex_df.loc[t_player]
dist = scipy.spatial.distance.cdist(rbsimdex_df, [t_player_stat], metric = "cosine")[:,0]
query_distances = list(zip(rushing.index, dist))

In [47]:
for sim_play, sim_stats in sorted(query_distances, key = lambda x: x[1], reverse = False)[:11]:
    print(rushing['PlayerName'].iloc[sim_play], sim_stats)

Josh Jacobs 0.0
Saquon Barkley 0.00012204952696026616
Derrick Henry 0.0001265037863835694
Jeff Wilson Jr. 0.00028978930158740646
Dalvin Cook 0.0003672104258061193
AJ Dillon 0.00039675909541514454
Isiah Pacheco 0.0005759997930764316
Damien Harris 0.0006293338121381442
Najee Harris 0.0006688505884584472
Latavius Murray 0.0008185457201282942
Nick Chubb 0.0008230645034551509


### WR Similarity

In [48]:
wrsimdex_df = receiving.set_index('PlayerName')

In [49]:
t_player = 'Justin Jefferson'
t_player_stat = wrsimdex_df.loc[t_player]
dist = scipy.spatial.distance.cdist(wrsimdex_df, [t_player_stat], metric = "cosine")[:,0]
query_distances = list(zip(receiving.index, dist))

In [50]:
for sim_play, sim_stats in sorted(query_distances, key = lambda x: x[1], reverse = False)[:11]:
    print(receiving['PlayerName'].iloc[sim_play], sim_stats)

Justin Jefferson 0.0
Terry McLaurin 0.000128843647382193
Garrett Wilson 0.00019177225136413067
Mike Williams 0.00027199323078308524
Jaylen Waddle 0.0003652294342800788
A.J. Brown 0.0004237986059427712
Tyreek Hill 0.000588906425817215
Olamide Zaccheaus 0.0006034702578423534
Darius Slayton 0.0006212144331259051
CeeDee Lamb 0.0006829809378626761
Tyler Boyd 0.0007645157343814102


## Finding Similarity For The Defense

In [51]:
tackles = pd.read_csv('TotalTackles.csv')
tackles.drop(columns='Unnamed: 0', inplace=True)
tackles.head()

Unnamed: 0,PlayerName,Position,assistTackles,avgInterceptionYards,avgSackYards,avgStuffYards,blockedFieldGoalTouchdowns,blockedPuntTouchdowns,hurries,kicksBlocked,...,safeties,soloTackles,stuffs,stuffYards,tacklesForLoss,tacklesYardsLost,teamGamesPlayed,totalTackles,yardsAllowed,pointsAllowed
0,Foyesade Oluokun,LB,0.0,6.0,1.7,0.0,0.0,0.0,0.0,0.0,...,128.0,18.5,17.0,12.0,26.0,17.0,184.0,0.0,0.0,0.0
1,Nick Bolton,LB,7.5,9.5,1.5,0.0,0.0,0.0,0.0,1.0,...,108.0,12.0,12.0,9.0,22.0,17.0,180.0,0.0,0.0,0.0
2,Roquan Smith,LB,13.667,5.0,1.857,0.0,0.0,0.0,0.0,1.0,...,103.0,18.5,13.0,11.0,29.0,17.0,169.0,0.0,0.0,0.0
3,Zaire Franklin,LB,0.0,5.833,1.9,0.0,0.0,0.0,0.0,0.0,...,102.0,14.5,19.0,12.0,24.0,17.0,167.0,0.0,0.0,0.0
4,Alex Singleton,LB,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,100.0,7.5,12.0,6.0,10.0,17.0,163.0,0.0,0.0,0.0


In [52]:
sacks = pd.read_csv('SacksData.csv')
sacks.drop(columns='Unnamed: 0', inplace=True)
sacks.head()

Unnamed: 0,PlayerName,Position,assistTackles,avgInterceptionYards,avgSackYards,avgStuffYards,blockedFieldGoalTouchdowns,blockedPuntTouchdowns,hurries,kicksBlocked,...,safeties,soloTackles,stuffs,stuffYards,tacklesForLoss,tacklesYardsLost,teamGamesPlayed,totalTackles,yardsAllowed,pointsAllowed
0,Nick Bosa,DE,0.0,7.432,2.667,0.0,0.0,0.0,0.0,0.0,...,41.0,5.0,8.0,19.0,129.0,17.0,51.0,0.0,0.0,0.0
1,Haason Reddick,LB,0.0,8.281,1.667,0.0,0.0,0.0,0.0,0.0,...,35.0,4.0,5.0,11.0,64.0,17.0,49.0,0.0,0.0,0.0
2,Myles Garrett,DE,0.0,5.688,3.0,0.0,0.0,0.0,0.0,0.0,...,37.0,8.5,12.0,18.0,85.0,17.0,60.0,0.0,0.0,0.0
3,Chris Jones,DT,0.0,6.806,2.4,0.0,0.0,0.0,0.0,0.0,...,30.0,5.5,12.0,17.0,94.0,17.0,44.0,0.0,0.0,0.0
4,Matthew Judon,LB,0.0,6.613,1.333,0.0,0.0,0.0,0.0,0.0,...,36.0,7.0,4.0,14.0,99.0,17.0,60.0,0.0,0.0,0.0


### LB Similarity

In [53]:
lbstt = tackles.loc[tackles['Position'] == 'LB']
lbstt = lbstt.set_index('PlayerName')
lbstt = lbstt.reset_index()
lbstt.head()

Unnamed: 0,PlayerName,Position,assistTackles,avgInterceptionYards,avgSackYards,avgStuffYards,blockedFieldGoalTouchdowns,blockedPuntTouchdowns,hurries,kicksBlocked,...,safeties,soloTackles,stuffs,stuffYards,tacklesForLoss,tacklesYardsLost,teamGamesPlayed,totalTackles,yardsAllowed,pointsAllowed
0,Foyesade Oluokun,LB,0.0,6.0,1.7,0.0,0.0,0.0,0.0,0.0,...,128.0,18.5,17.0,12.0,26.0,17.0,184.0,0.0,0.0,0.0
1,Nick Bolton,LB,7.5,9.5,1.5,0.0,0.0,0.0,0.0,1.0,...,108.0,12.0,12.0,9.0,22.0,17.0,180.0,0.0,0.0,0.0
2,Roquan Smith,LB,13.667,5.0,1.857,0.0,0.0,0.0,0.0,1.0,...,103.0,18.5,13.0,11.0,29.0,17.0,169.0,0.0,0.0,0.0
3,Zaire Franklin,LB,0.0,5.833,1.9,0.0,0.0,0.0,0.0,0.0,...,102.0,14.5,19.0,12.0,24.0,17.0,167.0,0.0,0.0,0.0
4,Alex Singleton,LB,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,100.0,7.5,12.0,6.0,10.0,17.0,163.0,0.0,0.0,0.0


In [54]:
lbstt = lbstt.drop(columns='Position')
lbttsimdex_df = lbstt.set_index(['PlayerName'])

In [55]:
t_player = 'Foyesade Oluokun'
t_player_stat = lbttsimdex_df.loc[t_player]
dist = scipy.spatial.distance.cdist(lbttsimdex_df, [t_player_stat], metric = "cosine")[:,0]
query_distances = list(zip(lbstt.index, dist))

In [56]:
for sim_play, sim_stats in sorted(query_distances, key = lambda x: x[1], reverse = False)[:16]:
    print(lbstt['PlayerName'].iloc[sim_play], sim_stats)

Foyesade Oluokun 0.0
Zaire Franklin 0.0027508244068337184
Eric Kendricks 0.0028219192369823842
Bobby Okereke 0.003987542895135809
Nick Bolton 0.004287539420890885
Tremaine Edmunds 0.004495210550057971
T.J. Edwards 0.004573338283919592
Alex Anzalone 0.004702219708321853
Jordyn Brooks 0.004782288089118514
Cody Barton 0.005725631477929705
Malcolm Rodriguez 0.006265427202595375
Roquan Smith 0.00657164961235257
Quay Walker 0.006829748653799594
Alex Singleton 0.006929882841380186
Dre Greenlaw 0.007526802476184313
C.J. Mosley 0.008105301934881237


In [57]:
lbssc = sacks.loc[sacks['Position'] == 'LB']
lbssc = lbssc.set_index('PlayerName')
lbssc = lbssc.reset_index()
lbssc.head()

Unnamed: 0,PlayerName,Position,assistTackles,avgInterceptionYards,avgSackYards,avgStuffYards,blockedFieldGoalTouchdowns,blockedPuntTouchdowns,hurries,kicksBlocked,...,safeties,soloTackles,stuffs,stuffYards,tacklesForLoss,tacklesYardsLost,teamGamesPlayed,totalTackles,yardsAllowed,pointsAllowed
0,Haason Reddick,LB,0.0,8.281,1.667,0.0,0.0,0.0,0.0,0.0,...,35.0,4.0,5.0,11.0,64.0,17.0,49.0,0.0,0.0,0.0
1,Matthew Judon,LB,0.0,6.613,1.333,0.0,0.0,0.0,0.0,0.0,...,36.0,7.0,4.0,14.0,99.0,17.0,60.0,0.0,0.0,0.0
2,Alex Highsmith,LB,0.0,8.138,5.25,0.0,0.0,0.0,0.0,0.0,...,38.0,6.5,21.0,12.0,91.0,17.0,63.0,0.0,0.0,0.0
3,Micah Parsons,LB,0.0,6.556,1.75,0.0,0.0,0.0,0.0,0.0,...,42.0,4.0,7.0,13.0,66.0,17.0,65.0,0.0,0.0,0.0
4,Josh Uche,LB,0.0,4.304,0.0,0.0,0.0,0.0,0.0,0.0,...,23.0,0.0,0.0,9.0,47.0,17.0,27.0,0.0,0.0,0.0


In [58]:
lbssc = lbssc.drop(columns='Position')
lbscsimdex_df = lbssc.set_index(['PlayerName'])

In [59]:
t_player = 'Haason Reddick'
t_player_stat = lbscsimdex_df.loc[t_player]
dist = scipy.spatial.distance.cdist(lbscsimdex_df, [t_player_stat], metric = "cosine")[:,0]
query_distances = list(zip(lbssc.index, dist))

In [60]:
for sim_play, sim_stats in sorted(query_distances, key = lambda x: x[1], reverse = False)[:16]:
    print(lbssc['PlayerName'].iloc[sim_play], sim_stats)

Haason Reddick 1.1102230246251565e-16
Darrell Taylor 0.008981820581654754
Justin Houston 0.017630605964722212
Alex Highsmith 0.025156148188099303
Bradley Chubb 0.025412037578418367
Melvin Ingram 0.0260457070774176
Micah Parsons 0.03735301097551991
Matthew Judon 0.042428924095662124
Von Miller 0.04617582317401292
Josh Uche 0.04902161365874225
Azeez Ojulari 0.05512409923721695
Rashan Gary 0.05761264852219017
Khalil Mack 0.06325189919262242
Preston Smith 0.06774348653438245
James Houston 0.06851533701330437
Samson Ebukam 0.06999042608903638


### DB Similarity

In [61]:
dbs = tackles.loc[tackles['Position'] == 'CB']
dbs = dbs.set_index('PlayerName')
dbs = dbs.reset_index()
dbs.head()

Unnamed: 0,PlayerName,Position,assistTackles,avgInterceptionYards,avgSackYards,avgStuffYards,blockedFieldGoalTouchdowns,blockedPuntTouchdowns,hurries,kicksBlocked,...,safeties,soloTackles,stuffs,stuffYards,tacklesForLoss,tacklesYardsLost,teamGamesPlayed,totalTackles,yardsAllowed,pointsAllowed
0,L'Jarius Sneed,CB,19.0,8.857,2.0,0.0,0.0,0.0,0.0,1.0,...,75.0,4.5,8.0,5.0,11.0,17.0,108.0,0.0,0.0,0.0
1,Taron Johnson,CB,2.0,0.0,1.667,0.0,0.0,0.0,0.0,1.0,...,67.0,6.0,10.0,6.0,4.0,16.0,90.0,0.0,0.0,0.0
2,Desmond King II,CB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0
3,Jalen Ramsey,CB,13.25,8.5,1.333,0.0,0.0,0.0,0.0,1.0,...,64.0,4.5,4.0,4.0,11.0,17.0,88.0,0.0,0.0,0.0
4,Charvarius Ward,CB,9.0,0.0,1.667,0.0,0.0,0.0,0.0,1.0,...,59.0,4.5,5.0,3.0,1.0,17.0,87.0,0.0,0.0,0.0


In [62]:
dbs = dbs.drop(columns='Position')
dbsimdex_df = dbs.set_index(['PlayerName'])

In [63]:
t_player = "L'Jarius Sneed"
t_player_stat = dbsimdex_df.loc[t_player]
dist = scipy.spatial.distance.cdist(dbsimdex_df, [t_player_stat], metric = "cosine")[:,0]
query_distances = list(zip(dbs.index, dist))

In [64]:
for sim_play, sim_stats in sorted(query_distances, key = lambda x: x[1], reverse = False)[:16]:
    print(dbs['PlayerName'].iloc[sim_play], sim_stats)

L'Jarius Sneed 0.0
Jalen Ramsey 0.007268891014888479
Arthur Maulet 0.01138473835734366
Marlon Humphrey 0.014502468039428007
Deommodore Lenoir 0.021436782957368394
Coby Bryant 0.024322503000657525
Marcus Peters 0.026157460350360484
Eric Rowe 0.027320289805224385
Nate Hobbs 0.027910617235572865
Martin Emerson Jr. 0.03238589416274085
Charvarius Ward 0.033215540271458344
K'Waun Williams 0.03525250270346214
Jeff Okudah 0.03622340200347196
Benjamin St-Juste 0.036969197423821876
Troy Hill 0.03722338871765529
Kenny Moore II 0.03775936757806009


In [65]:
dbs2 = tackles.loc[tackles['Position'] == 'S']
dbs2 = dbs2.set_index('PlayerName')
dbs2 = dbs2.reset_index()
dbs2.head()

Unnamed: 0,PlayerName,Position,assistTackles,avgInterceptionYards,avgSackYards,avgStuffYards,blockedFieldGoalTouchdowns,blockedPuntTouchdowns,hurries,kicksBlocked,...,safeties,soloTackles,stuffs,stuffYards,tacklesForLoss,tacklesYardsLost,teamGamesPlayed,totalTackles,yardsAllowed,pointsAllowed
0,Jalen Pitre,S,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0
1,Jonathan Owens,S,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,84.0,2.5,0.0,0.0,0.0,17.0,125.0,0.0,0.0,0.0
2,Julian Love,S,22.0,9.0,1.8,0.0,0.0,0.0,0.0,1.0,...,79.0,3.0,9.0,6.0,18.0,17.0,124.0,0.0,0.0,0.0
3,Richie Grant,S,11.0,0.0,3.0,0.0,0.0,0.0,1.0,1.0,...,70.0,6.0,9.0,3.0,5.0,17.0,123.0,0.0,0.0,0.0
4,Rayshawn Jenkins,S,21.333,0.0,3.333,0.0,0.0,0.0,0.0,1.0,...,73.0,11.0,10.0,3.0,9.0,17.0,116.0,0.0,0.0,0.0


In [66]:
dbs2 = dbs2.drop(columns='Position')
dbs2imdex_df = dbs2.set_index(['PlayerName'])

In [67]:
t_player = "Julian Love"
t_player_stat = dbs2imdex_df.loc[t_player]
dist = scipy.spatial.distance.cdist(dbs2imdex_df, [t_player_stat], metric = "cosine")[:,0]
query_distances = list(zip(dbs2.index, dist))

In [68]:
for sim_play, sim_stats in sorted(query_distances, key = lambda x: x[1], reverse = False)[:16]:
    print(dbs2['PlayerName'].iloc[sim_play], sim_stats)

Julian Love 1.1102230246251565e-16
Talanoa Hufanga 0.004917977108510829
Vonn Bell 0.006283167151985292
Adrian Amos 0.00715134889614788
Ryan Neal 0.0071727572026331066
Rayshawn Jenkins 0.008452614568841721
Kyle Dugger 0.009040051897808099
Taylor Rapp 0.009169842425857722
Grant Delpit 0.010597465549497365
Rodney McLeod Jr. 0.010934825138829929
Richie Grant 0.011335276202456779
Minkah Fitzpatrick 0.01155233107904996
Tyrann Mathieu 0.011604463685619959
Budda Baker 0.012993625381505614
John Johnson III 0.01336000200584364
Kevin Byard 0.014387962270821508


### DLine Similarity

In [69]:
pos = 'DE', 'DT'
dlinett = tackles.loc[tackles['Position'].isin(pos)]
dlinett = dlinett.set_index('PlayerName')
dlinett = dlinett.reset_index()
dlinett.head()

Unnamed: 0,PlayerName,Position,assistTackles,avgInterceptionYards,avgSackYards,avgStuffYards,blockedFieldGoalTouchdowns,blockedPuntTouchdowns,hurries,kicksBlocked,...,safeties,soloTackles,stuffs,stuffYards,tacklesForLoss,tacklesYardsLost,teamGamesPlayed,totalTackles,yardsAllowed,pointsAllowed
0,Christian Wilkins,DT,0.0,7.0,2.071,0.0,0.0,0.0,0.0,0.0,...,59.0,17.5,29.0,16.0,40.0,17.0,98.0,0.0,0.0,0.0
1,Maxx Crosby,DE,0.0,8.16,2.25,0.0,0.0,0.0,1.0,0.0,...,58.0,20.0,27.0,22.0,104.0,17.0,89.0,0.0,0.0,0.0
2,Cameron Heyward,DT,0.0,6.905,2.167,0.0,0.0,0.0,0.0,0.0,...,39.0,11.5,13.0,14.0,58.0,17.0,74.0,0.0,0.0,0.0
3,DeForest Buckner,DT,0.0,6.063,2.0,0.0,0.0,0.0,0.0,0.0,...,44.0,12.0,12.0,11.0,43.0,17.0,74.0,0.0,0.0,0.0
4,Zach Sieler,DT,0.0,9.0,2.667,0.0,0.0,0.0,0.0,0.0,...,41.0,10.0,8.0,7.0,41.0,17.0,70.0,0.0,0.0,0.0


In [70]:
dlinett = dlinett.drop(columns='Position')
dlinettimdex_df = dlinett.set_index(['PlayerName'])

In [71]:
t_player = "Christian Wilkins"
t_player_stat = dlinettimdex_df.loc[t_player]
dist = scipy.spatial.distance.cdist(dlinettimdex_df, [t_player_stat], metric = "cosine")[:,0]
query_distances = list(zip(dlinett.index, dist))

In [72]:
for sim_play, sim_stats in sorted(query_distances, key = lambda x: x[1], reverse = False)[:16]:
    print(dlinett['PlayerName'].iloc[sim_play], sim_stats)

Christian Wilkins 0.0
Grover Stewart 0.005884971178452769
Sebastian Joseph-Day 0.01597064438103013
Larry Ogunjobi 0.02575481627154974
Justin Jones 0.026536259183034705
Shelby Harris 0.0276314770470244
Al Woods 0.02917655334190017
Roy Lopez 0.031099209826474405
Zach Sieler 0.03135957977279835
Maliek Collins 0.03550912365887282
DeMarcus Lawrence 0.035760475684381454
Rasheem Green 0.03596420867137262
Jordan Elliott 0.036208558063924645
Justin Madubuike 0.0387786315389641
Roy Robertson-Harris 0.042261008731865335
DaVon Hamilton 0.044374596415347356


In [73]:
pos = 'DE', 'DT'
dlinesc = sacks.loc[sacks['Position'].isin(pos)]
dlinesc = dlinesc.set_index('PlayerName')
dlinesc = dlinesc.reset_index()
dlinesc.head()

Unnamed: 0,PlayerName,Position,assistTackles,avgInterceptionYards,avgSackYards,avgStuffYards,blockedFieldGoalTouchdowns,blockedPuntTouchdowns,hurries,kicksBlocked,...,safeties,soloTackles,stuffs,stuffYards,tacklesForLoss,tacklesYardsLost,teamGamesPlayed,totalTackles,yardsAllowed,pointsAllowed
0,Nick Bosa,DE,0.0,7.432,2.667,0.0,0.0,0.0,0.0,0.0,...,41.0,5.0,8.0,19.0,129.0,17.0,51.0,0.0,0.0,0.0
1,Myles Garrett,DE,0.0,5.688,3.0,0.0,0.0,0.0,0.0,0.0,...,37.0,8.5,12.0,18.0,85.0,17.0,60.0,0.0,0.0,0.0
2,Chris Jones,DT,0.0,6.806,2.4,0.0,0.0,0.0,0.0,0.0,...,30.0,5.5,12.0,17.0,94.0,17.0,44.0,0.0,0.0,0.0
3,J.J. Watt,DE,0.0,7.52,2.0,0.0,0.0,0.0,0.0,0.0,...,30.0,11.5,14.0,18.0,96.0,17.0,39.0,0.0,0.0,0.0
4,Maxx Crosby,DE,0.0,8.16,2.25,0.0,0.0,0.0,1.0,0.0,...,58.0,20.0,27.0,22.0,104.0,17.0,89.0,0.0,0.0,0.0


In [74]:
dlinesc = dlinesc.drop(columns='Position')
dlinescimdex_df = dlinesc.set_index(['PlayerName'])

In [75]:
t_player = "Nick Bosa"
t_player_stat = dlinescimdex_df.loc[t_player]
dist = scipy.spatial.distance.cdist(dlinescimdex_df, [t_player_stat], metric = "cosine")[:,0]
query_distances = list(zip(dlinesc.index, dist))

In [76]:
for sim_play, sim_stats in sorted(query_distances, key = lambda x: x[1], reverse = False)[:16]:
    print(dlinesc['PlayerName'].iloc[sim_play], sim_stats)

Nick Bosa 0.0
Chris Jones 0.0035491949951625967
J.J. Watt 0.007774157018547778
Quinnen Williams 0.012834851789954693
Yannick Ngakoue 0.019312429058163882
Myles Garrett 0.020580569716193153
AJ Epenesa 0.020766477990339594
Denico Autry 0.02502041586430359
Quinton Jefferson 0.02789542740046258
Brandon Graham 0.029085305777819426
Vita Vea 0.029977495881471583
Dante Fowler Jr. 0.030670348035748862
DeMarcus Walker 0.030801411948735535
Montez Sweat 0.031139032631660624
Brian Burns 0.03135700228154248
Jerry Hughes 0.03152574683293807
