In [4]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [16]:
# import the data file and create a dataframe object
df = pd.read_csv('2019-2.csv')

# remove some unnecessary columns, axis=1 specifies column, inplace means permanent change
df.drop(['Rk', '2PM', '2PP', 'FantPt', 'DKPt', 'FDPt', 'VBD', 'PosRank', 'OvRank', 'PPR', 'Fmb', 'GS'], axis=1, inplace=True)

# fix the name formatting
df['Player'] = df['Player'].apply(lambda x: x.split('*')[0]).apply(lambda x: x.split('\\')[0])

# rename some columns
# the .1, .2 etc. signifies the first instance, second instance, etc.
df.rename({
    'TD': 'PassingTD',
    'TD.1': 'RushingTD',
    'TD.2': 'ReceivingTD', 
    'TD.3': 'TotalTD', 
    'Yds': 'PassingYDs',
    'Yds.1': 'RushingYDs',
    'Yds.2': 'ReceivingYDs',
    'Att': 'PassingAtt',
    'Att.1': 'RushingAtt'
}, axis=1, inplace=True)

# seperate dataframes based off position
# creating new dataframes for each position
rb_df = df[df['FantPos'] == 'RB']
qb_df = df[df['FantPos'] == 'QB']
wr_df = df[df['FantPos'] == 'WR']
te_df = df[df['FantPos'] == 'TE']

# prints the first 5 rows of the altered df
# df.head()

In [17]:
rushing_columns = ['RushingAtt', 'RushingYDs', 'Y/A', 'RushingTD']
receiving_columns = ['Tgt', 'Rec', 'ReceivingYDs', 'Y/R', 'ReceivingTD']
passing_columns = ['PassingAtt', 'PassingYDs', 'PassingTD', 'Int']

def transform_columns(df, new_column_list):
    df = df[['Player', 'Tm', 'Age', 'G'] + new_column_list + ['FL']]
    return df

In [18]:
# take the position specific dataframes and add the relevant columns to them
# removes all superfluous columns and only keeps relevant ones
rb_df = transform_columns(rb_df, rushing_columns+receiving_columns)
wr_df = transform_columns(wr_df, rushing_columns+receiving_columns)
te_df = transform_columns(te_df, receiving_columns)
qb_df = transform_columns(qb_df, passing_columns)

# rb_df.head()

Unnamed: 0,Player,Tm,Age,G,RushingAtt,RushingYDs,Y/A,RushingTD,Tgt,Rec,ReceivingYDs,Y/R,ReceivingTD,FL
0,Christian McCaffrey,CAR,23,16,287,1387,4.83,15,142,116,1005,8.66,4,0
2,Derrick Henry,TEN,25,15,303,1540,5.08,16,24,18,206,11.44,2,3
3,Aaron Jones,GNB,25,16,236,1084,4.59,16,68,49,474,9.67,3,2
4,Ezekiel Elliott,DAL,24,16,301,1357,4.51,12,71,54,420,7.78,2,2
5,Dalvin Cook,MIN,24,14,250,1135,4.54,13,63,53,519,9.79,0,2


In [None]:
# This code will answer the question: How did Targets + Rushing TDs correlate to fantasy points per game for RBs in 2019

# Create new column for fantasy points scored (non-PPR)
rb_df['FantasyPoints'] = rb_df['RushingYDs']*0.1 + rb_df['RushingTD']*6 
+ rb_df['ReceivingYDs']*0.1 + rb_df['ReceivingTD']*6 - rb_df['FL']*2

# Create a new column for fantasy points per game (non-PPR)
rb_df['FantasyPoints/GM'] = rb_df['FantasyPoints']/rb_df['G']
rb_df['FantasyPoints/GM'] = rb_df['FantasyPoints/GM'].apply(lambda x: round(x, 2))

# Create a new column for usage per game. Usage = # of carries + targets
rb_df['Usage/GM'] = (rb_df['RushingAtt'] + rb_df['Tgt'])/rb_df['G']
# Round values
rb_df['Usage/GM'] = rb_df['Usage/GM'].apply(lambda x: round(x, 2))

# just for styling using seaborn (sns)
sns.set_style('whitegrid')

# create a canvas with matplotlib
fig, ax = plt.subplots()
fig.set_size_inches(15, 10)

# regression scatter plot with trendline
plot = sns.regplot(
    x=rb_df['Usage/GM'],
    y=rb_df['FantasyPoints/GM'],
    scatter=True)