In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV

In [18]:
data = pd.read_csv('data/RBs_2020.csv')

In [34]:
column_list = list(data.columns)

In [42]:
def add_year(cols):
    new_cols = []
    for col in cols:
        new_col = col + "_2020"
        new_cols.append(new_col)
    return new_cols

In [44]:
new_columns = add_year(column_list)

In [45]:
data.columns = new_columns

In [48]:
data.reset_index()

Unnamed: 0,index,Rank_2020,Name_2020,Team_2020,Position_2020,Played_2020,RushingAttempts_2020,RushingYards_2020,RushingYardsPerAttempt_2020,RushingTouchdowns_2020,ReceivingTargets_2020,Receptions_2020,ReceivingYards_2020,ReceivingTouchdowns_2020,Fumbles_2020,FumblesLost_2020,FantasyPointsPerGame_2020,FantasyPoints_2020
0,0,1,Derrick Henry_2020,TEN,RB,16,378,2027,5.4,17,31,19,114,0,3,2,19.6,314.1
1,1,2,Alvin Kamara_2020,NO,RB,15,187,932,5.0,16,107,83,756,5,1,0,19.7,294.8
2,2,3,Dalvin Cook_2020,MIN,RB,14,312,1557,5.0,16,54,44,361,1,5,3,21.0,293.8
3,3,4,Jonathan Taylor_2020,IND,RB,15,232,1169,5.0,11,39,36,299,1,1,1,14.5,216.8
4,4,5,Aaron Jones_2020,GB,RB,14,201,1104,5.5,9,63,47,355,2,2,0,15.1,211.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,173,162,Derek Watt_2020,PIT,FB,12,0,0,0.0,0,0,0,0,0,0,0,0.0,0.0
174,174,162,Sewo Olonilua_2020,DAL,FB,2,0,0,0.0,0,0,0,0,0,0,0,0.0,0.0
175,175,162,Nathan Cottrell_2020,JAX,RB,8,0,0,0.0,0,0,0,0,0,0,0,0.0,0.0
176,176,162,Johnny Stanton_2020,CLE,FB,1,0,0,0.0,0,0,0,0,0,0,0,0.0,0.0


In [20]:
data['Name'] = data['Name'].apply(lambda x: x+'_2020')

In [50]:
data['Team_2020'].values

array(['TEN', 'NO', 'MIN', 'IND', 'GB', 'CHI', 'JAX', 'LV', 'CLE', 'CLE',
       'DAL', 'ARI', 'DEN', 'WAS', 'TB', 'SEA', 'BAL', 'CAR', 'HOU',
       'DET', 'PHI', 'KC', 'ATL', 'IND', 'PIT', 'SF', 'MIA', 'BAL', 'NYG',
       'ARI', 'LAR', 'NO', 'DET', 'WAS', 'LAC', 'CIN', 'BUF', 'GB', 'TB',
       'SF', 'DAL', 'LAR', 'BUF', 'NE', 'LAR', 'NYJ', 'SF', 'NE', 'CIN',
       'MIN', 'CAR', 'PHI', 'ATL', 'SEA', 'LV', 'NE', 'NE', 'PIT', 'LAC',
       'SF', 'DEN', 'KC', 'LAC', 'MIA', 'HOU', 'CIN', 'DET', 'WAS', 'IND',
       'NYJ', 'BAL', 'LAC', 'CHI', 'NYJ', 'ATL', 'SEA', 'GB', 'NYG',
       'NYG', 'KC', 'MIA', 'LV', 'TEN', 'NYJ', 'PHI', 'NYG', 'CAR', 'KC',
       'DEN', 'SF', 'SEA', 'JAX', 'MIN', 'BUF', 'SEA', 'JAX', 'TB', 'CLE',
       'MIN', 'LV', 'PIT', 'CIN', 'TEN', 'PHI', 'BUF', 'LAC', 'GB', 'CAR',
       'TEN', 'TB', 'MIN', 'NO', 'CHI', 'MIA', 'MIA', 'LAC', 'CHI', 'NE',
       'NE', 'NYG', 'CHI', 'SF', 'BAL', 'BAL', 'HOU', 'PIT', 'KC', 'ARI',
       'HOU', 'ATL', 'LV', 'IND', 'HOU', 'CAR

In [3]:
data_2019 = pd.read_csv('data/RBs_2019.csv')

In [5]:
data_2019

Unnamed: 0,Rank,Name,Team,Position,Played,RushingAttempts,RushingYards,RushingYardsPerAttempt,RushingTouchdowns,ReceivingTargets,Receptions,ReceivingYards,ReceivingTouchdowns,Fumbles,FumblesLost,FantasyPointsPerGame,FantasyPoints
0,1,Christian McCaffrey,CAR,RB,16,287,1387,4.8,15,141,116,1005,4,1,0,22.2,355.2
1,2,Derrick Henry,TEN,RB,15,303,1540,5.1,16,24,18,206,2,5,3,18.4,276.6
2,3,Aaron Jones,GB,RB,16,236,1084,4.6,16,68,49,474,3,3,2,16.6,265.8
3,4,Ezekiel Elliott,DAL,RB,16,301,1357,4.5,12,71,54,420,2,3,2,16.1,257.7
4,5,Dalvin Cook,MIN,RB,14,250,1135,4.5,13,63,53,519,0,4,2,17.1,239.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,154,Cullen Gillaspia,HOU,FB,16,0,0,0.0,0,0,0,0,0,0,0,0.0,0.0
163,154,Chandler Cox,MIA,FB,13,0,0,0.0,0,0,0,0,0,0,0,0.0,0.0
164,154,Trayveon Williams,CIN,RB,11,0,0,0.0,0,0,0,0,0,0,0,0.0,0.0
165,154,Craig Reynolds,WAS,RB,2,0,0,0.0,0,0,0,0,0,0,0,0.0,0.0
