# Web Scraping NFL data

 This is a project going over some basic web scraping techniques and methods applied to 
 collect, clean and analyze NFL fantasy football data
 
 ### Learning Objectives
     1. Finding a good data source and check if site allows for automatic access to data
     2. Inspect HTML code of the page we want to get our data from and see where it
     is embedded in the webpage
     3. Scrape/collect the data from the website that we want
     4. Explore the unfiltered messy data and get familiar with data
     5. Clean and prepare the messy data and store it into table
     6. Run some basic stats on data
     7. Hypothesis testing
     8. Visualizations
     9. Predictions

### Part 1:
    - Finding data online 
    - Checking site policies on data collection 
    - Basics to extracting data from HTML webpage    

In [56]:
# Lets import the packages we will need to use for this project
# You can use other packages if you'd like


import requests
%matplotlib inline
import numpy as np
from datascience import *
import pandas as pd
import lxml.html as lh


In [57]:
# This is a code that I copied from my DSC96 class at UCSD that will check if the 
# the website we wish to use prohibits webscraping via robot.txt policy so just run it!
# But remeber this does not gurantee you can scrape the website!!!!!!
# So it is best to follow these guide lines:
#
# 1.Don't break anything. Many rapid requests to smaller sites can overload the host server.
# 2.Use a published API if possible - it is more robust and usually much easier!
# 3.Respect the policy published at robots.txt
# 4.Don't spoof your UserAgent (or try to trick the server into thinking you are a person)
# 5.Read the Terms of Service for the site and follow it.
#
########################################################################################## 



from urllib.parse import urlparse
import urllib.robotparser

# This code checks the robots.txt file
def canFetch(url):

    parsed_uri = urlparse(url)
    domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)

    rp = urllib.robotparser.RobotFileParser()
    rp.set_url(domain + "/robots.txt")
    try:
        rp.read()
        canFetchBool = rp.can_fetch("*", url)
    except:
        canFetchBool = None
    
    return canFetchBool

In [58]:
url = "https://www.pro-football-reference.com/years/2018/fantasy.htm"

r = requests.get(url)
    
urlText = r.text

Nchars = 1000
print(urlText[:Nchars]) # Print the first 500 characters
print("\n\n... " + str(len(urlText)-Nchars) + " additional characters")


<!DOCTYPE html>
<html data-version="klecko-" data-root="/home/pfr/build" itemscope itemtype="https://schema.org/WebSite" lang="en" class="no-js" >
<head>
    <meta charset="utf-8">
    <meta http-equiv="x-ua-compatible" content="ie=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=2.0" />
    <link rel="dns-prefetch" href="https://d2p3bygnnzw9w3.cloudfront.net/req/201811161" />

<!-- no:cookie fast load the css.           -->
<link rel="preconnect" href="https://d2p3bygnnzw9w3.cloudfront.net" crossorigin>
<link rel="preconnect" href="https://d395i9ljze9h3x.cloudfront.net" crossorigin>
 <style>html,body{margin:0;padding:0;font:14px/1.25 "Helvetica Neue",helvetica,arial,sans-serif;color:#000}body{position:relative;background:#c9cbcd;z-index:0;-webkit-text-size-adjust:none;-moz-text-size-adjust:none;-ms-text-size-adjust:none}a img{border:0}ul,li,ol{margin:0;padding:0;list-style-type:none}table th,table td{border:0}iframe{max-width:100%}code{ba

In [59]:
# Scraping Data from webstie
# First check to see if it passes robot.txt test

url = "https://www.pro-football-reference.com/years/2018/fantasy.htm"
canFetch(url)

True

In [60]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(urlText, 'lxml')

In [61]:
# Scrape column headers
# Extract the necessary values for the column headers from the table
# and store them as a list
column_headers = [th.getText() for th in 
                  soup.findAll('tr', limit=2)[1].findAll('th')]
#column_headers

In [62]:
# Select data we want to scrape within the table rows of the element with id=fantasy
# We want the elements from the 3rd row and on
# Then store them into a list of tag elements callewd table_rows
table_rows = soup.select("#fantasy tr")[2:] 
type(table_rows)

list

In [63]:
len(table_rows[0])

33

In [64]:
# some_data = table_rows[2]
# type(some_data)

In [65]:
# # Get all ranking categories
# for rank in soup.find_all('th'):
#     print(rank.get('data-stat'))
    


In [66]:
# for _ in soup.find_all('td'):
#     print(_.get('data-stat'))

In [67]:
len(soup.find_all('tr')[2:])

559

In [68]:

# tds = make_array()
# for tr in soup.find_all('tr')[2:]:
#     tds = tr.find_all('td')
#     textlist = [x.text for x in tds]
#     print('\t'.join(textlist))

In [69]:
#type(textlist)

In [70]:
page = requests.get(url)

doc = lh.fromstring(page.content)

tr_elements = doc.xpath('//tr')

In [71]:
[len(T) for T in tr_elements[2:34]]

[33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33,
 33]

In [72]:
tr_elements = doc.xpath('//tr')

#Create empty list
col=[]
i=0

#For each row, store each first element (header) and an empty list
for t in tr_elements[1]:
    i+=1
    name=t.text_content()
    print(i,name)
    col.append((name,[]))

1 Rk
2 Player
3 Tm
4 FantPos
5 Age
6 G
7 GS
8 Cmp
9 Att
10 Yds
11 TD
12 Int
13 Att
14 Yds
15 Y/A
16 TD
17 Tgt
18 Rec
19 Yds
20 Y/R
21 TD
22 Fmb
23 FL
24 TD
25 2PM
26 2PP
27 FantPt
28 PPR
29 DKPt
30 FDPt
31 VBD
32 PosRank
33 OvRank


In [73]:
#Since out first row is the header, data is stored on the second row onwards
for j in range(1,len(tr_elements)):
    #T is our j'th row
    T=tr_elements[j]
    
    #If row is not of size 33, the //tr data is not from our table 
    if len(T)!=33:
        break
    
    #i is the index of our column
    i=0
    
    #Iterate through each element of the row
    for t in T.iterchildren():
        data=t.text_content() 
        #Append the data to the empty list of the i'th column
        col[i][1].append(data)
        #Increment i for the next column
        i+=1

In [74]:
[len(C) for (title,C) in col]

[560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560,
 560]

In [75]:
Dict={title:column for (title,column) in col}
df=pd.DataFrame(Dict)

In [76]:
df.head()

Unnamed: 0,Rk,Player,Tm,FantPos,Age,G,GS,Cmp,Att,Yds,...,FL,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank
0,Rk,Player,Tm,FantPos,Age,G,GS,Cmp,Att,Yds,...,FL,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank
1,1,Todd Gurley,LAR,RB,24,11,11,0,210,441,...,1,3,,254,297.4,304.4,275.9,165,1,1
2,2,Alvin Kamara,NOR,RB,23,11,10,0,150,519,...,0,2,,217,273.5,279.5,245.0,127,2,2
3,3,Kareem Hunt,KAN,RB,23,11,11,0,181,378,...,0,,,204,230.2,236.2,217.2,114,3,3
4,4,Tyreek Hill,KAN,WR,24,11,11,0,13,1106,...,0,,,191,255.7,258.7,223.2,113,1,4


In [77]:
fantasy_tbl = Table.from_df(df)
fantasy_tbl

Rk,Player,Tm,FantPos,Age,G,GS,Cmp,Att,Yds,TD,Int,Y/A,Tgt,Rec,Y/R,Fmb,FL,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank
Rk,Player,Tm,FantPos,Age,G,GS,Cmp,Att,Yds,TD,Int,Y/A,Tgt,Rec,Y/R,Fmb,FL,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank
1,Todd Gurley,LAR,RB,24,11,11,0,210,441,17,0,4.97,56,43,10.26,1,1,3,,254,297.4,304.4,275.9,165,1,1
2,Alvin Kamara,NOR,RB,23,11,10,0,150,519,15,0,4.71,73,57,9.11,0,0,2,,217,273.5,279.5,245.0,127,2,2
3,Kareem Hunt,KAN,RB,23,11,11,0,181,378,14,0,4.55,35,26,14.54,0,0,,,204,230.2,236.2,217.2,114,3,3
4,Tyreek Hill,KAN,WR,24,11,11,0,13,1106,12,0,6.23,94,65,17.02,0,0,,,191,255.7,258.7,223.2,113,1,4
5,Patrick Mahomes,KAN,QB,23,11,11,264,42,0,2,10,4.43,0,0,,7,2,,,300,299.7,317.7,309.7,111,1,5
6,Ezekiel Elliott,DAL,RB,23,11,11,0,217,363,8,0,4.95,62,47,7.72,4,1,,,190,236.7,243.7,213.2,100,4,6
7,Melvin Gordon,LAC,RB,25,9,9,0,143,448,11,0,5.18,58,42,10.67,1,0,2,,189,230.9,236.9,209.9,99,5,7
8,Saquon Barkley,NYG,RB,21,10,10,0,158,540,10,0,4.61,79,64,8.44,0,0,1,,189,252.8,258.8,220.8,99,6,8
9,James Conner,PIT,RB,23,10,9,0,173,411,11,0,4.60,60,45,9.13,3,1,2,,189,233.7,240.7,211.2,99,7,9


In [78]:
fantasy_tbl = fantasy_tbl.exclude(0)

In [79]:
fantasy_tbl

Rk,Player,Tm,FantPos,Age,G,GS,Cmp,Att,Yds,TD,Int,Y/A,Tgt,Rec,Y/R,Fmb,FL,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank
1,Todd Gurley,LAR,RB,24,11,11,0,210,441,17,0,4.97,56,43,10.26,1,1,3.0,,254,297.4,304.4,275.9,165,1,1
2,Alvin Kamara,NOR,RB,23,11,10,0,150,519,15,0,4.71,73,57,9.11,0,0,2.0,,217,273.5,279.5,245.0,127,2,2
3,Kareem Hunt,KAN,RB,23,11,11,0,181,378,14,0,4.55,35,26,14.54,0,0,,,204,230.2,236.2,217.2,114,3,3
4,Tyreek Hill,KAN,WR,24,11,11,0,13,1106,12,0,6.23,94,65,17.02,0,0,,,191,255.7,258.7,223.2,113,1,4
5,Patrick Mahomes,KAN,QB,23,11,11,264,42,0,2,10,4.43,0,0,,7,2,,,300,299.7,317.7,309.7,111,1,5
6,Ezekiel Elliott,DAL,RB,23,11,11,0,217,363,8,0,4.95,62,47,7.72,4,1,,,190,236.7,243.7,213.2,100,4,6
7,Melvin Gordon,LAC,RB,25,9,9,0,143,448,11,0,5.18,58,42,10.67,1,0,2.0,,189,230.9,236.9,209.9,99,5,7
8,Saquon Barkley,NYG,RB,21,10,10,0,158,540,10,0,4.61,79,64,8.44,0,0,1.0,,189,252.8,258.8,220.8,99,6,8
9,James Conner,PIT,RB,23,10,9,0,173,411,11,0,4.6,60,45,9.13,3,1,2.0,,189,233.7,240.7,211.2,99,7,9
10,Travis Kelce,KAN,TE,29,11,11,0,0,914,7,0,,101,67,13.64,1,0,,,133,200.4,203.4,166.9,83,1,10


In [80]:
rank_pos = fantasy_tbl.where('PosRank', are.equal_to('1'))
rank_pos

Rk,Player,Tm,FantPos,Age,G,GS,Cmp,Att,Yds,TD,Int,Y/A,Tgt,Rec,Y/R,Fmb,FL,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank
1,Todd Gurley,LAR,RB,24,11,11,0,210,441,17,0,4.97,56,43,10.26,1,1,3.0,,254.0,297.4,304.4,275.9,165.0,1,1
4,Tyreek Hill,KAN,WR,24,11,11,0,13,1106,12,0,6.23,94,65,17.02,0,0,,,191.0,255.7,258.7,223.2,113.0,1,4
5,Patrick Mahomes,KAN,QB,23,11,11,264,42,0,2,10,4.43,0,0,,7,2,,,300.0,299.7,317.7,309.7,111.0,1,5
10,Travis Kelce,KAN,TE,29,11,11,0,0,914,7,0,,101,67,13.64,1,0,,,133.0,200.4,203.4,166.9,83.0,1,10
75,Shane Smith,NYG,,25,2,0,0,0,0,0,0,,0,0,,0,0,,,,,,,,1,75


In [81]:
rank_pos.group('PPR', max).sort(rank_pos.column('PPR'))

PPR,Rk max,Player max,Tm max,FantPos max,Age max,G max,GS max,Cmp max,Att max,Yds max,TD max,Int max,Y/A max,Tgt max,Rec max,Y/R max,Fmb max,FL max,2PM max,2PP max,FantPt max,DKPt max,FDPt max,VBD max,PosRank max,OvRank max
299.7,5,Patrick Mahomes,KAN,QB,23,11,11,264,42,0,2,10,4.43,0,0,,7,2,,,300.0,317.7,309.7,111.0,1,5
297.4,1,Todd Gurley,LAR,RB,24,11,11,0,210,441,17,0,4.97,56,43,10.26,1,1,3.0,,254.0,304.4,275.9,165.0,1,1
200.4,10,Travis Kelce,KAN,TE,29,11,11,0,0,914,7,0,,101,67,13.64,1,0,,,133.0,203.4,166.9,83.0,1,10
,75,Shane Smith,NYG,,25,2,0,0,0,0,0,0,,0,0,,0,0,,,,,,,1,75
255.7,4,Tyreek Hill,KAN,WR,24,11,11,0,13,1106,12,0,6.23,94,65,17.02,0,0,,,191.0,258.7,223.2,113.0,1,4


In [82]:
# Stats to calculate are: find mean PPR points scored per game, find STD and Variance of every player
# sort the positions with the highest total average ppr points per game. 
# 

In [83]:
# Still need to clean the data a bit, so lets find all 
# the rows that have no stats or data to use in our stats/models 
#
#We search the FantPos column which corresponds to a players position
#if we find no position data we will not use these rows

nothing = fantasy_tbl.where('FantPos', are.equal_to(''))

# now we need to ge the row numbers corresponding to these non-players 
# by selecting the 'Rk' column from the table we can get the rank of each player
# then after store into nothin_array

nothin_array = nothing.column('Rk')

nothin_array


array(['75', '77', '437', '486', '487', '488', '489', '490', '491', '492',
       '493', '494', '495', '496', '498', '499', '502', '505', '508',
       '509', '511', '514', '515', '516', '517', '518', '520', '522',
       '523', '524', '525', '526', '528', '530', '531', '539'],
      dtype='<U3')

In [84]:
# So now we have an array with strings that hold the Rk numbers for 
# players we want to exclude but we need to convert them into a list of type int
# so we can make a list and use a for loop to convert all the strings into a int and
# store them in a list

nothin_list = [int(numeric_string) for numeric_string in nothin_array]
nothin_list

[75,
 77,
 437,
 486,
 487,
 488,
 489,
 490,
 491,
 492,
 493,
 494,
 495,
 496,
 498,
 499,
 502,
 505,
 508,
 509,
 511,
 514,
 515,
 516,
 517,
 518,
 520,
 522,
 523,
 524,
 525,
 526,
 528,
 530,
 531,
 539]

In [85]:
# Now that we have the list of Rk int values we can figure out 
# their corresponding row index by just adding one to every element in our list
# this wil give us the row index for every player we wish to exclude

new_nothin_list = [x+1 for x in nothin_list]

new_nothin_list

[76,
 78,
 438,
 487,
 488,
 489,
 490,
 491,
 492,
 493,
 494,
 495,
 496,
 497,
 499,
 500,
 503,
 506,
 509,
 510,
 512,
 515,
 516,
 517,
 518,
 519,
 521,
 523,
 524,
 525,
 526,
 527,
 529,
 531,
 532,
 540]

In [86]:
# Now we can finally use the tbl.exclude function from the datascience package
# to exclude these rows by simply passing the new list of rows indicies to exclude

fantasy_tbl_clean = fantasy_tbl.exclude(new_nothin_list)

In [87]:
fantasy_tbl_clean

Rk,Player,Tm,FantPos,Age,G,GS,Cmp,Att,Yds,TD,Int,Y/A,Tgt,Rec,Y/R,Fmb,FL,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank
1,Todd Gurley,LAR,RB,24,11,11,0,210,441,17,0,4.97,56,43,10.26,1,1,3.0,,254,297.4,304.4,275.9,165,1,1
2,Alvin Kamara,NOR,RB,23,11,10,0,150,519,15,0,4.71,73,57,9.11,0,0,2.0,,217,273.5,279.5,245.0,127,2,2
3,Kareem Hunt,KAN,RB,23,11,11,0,181,378,14,0,4.55,35,26,14.54,0,0,,,204,230.2,236.2,217.2,114,3,3
4,Tyreek Hill,KAN,WR,24,11,11,0,13,1106,12,0,6.23,94,65,17.02,0,0,,,191,255.7,258.7,223.2,113,1,4
5,Patrick Mahomes,KAN,QB,23,11,11,264,42,0,2,10,4.43,0,0,,7,2,,,300,299.7,317.7,309.7,111,1,5
6,Ezekiel Elliott,DAL,RB,23,11,11,0,217,363,8,0,4.95,62,47,7.72,4,1,,,190,236.7,243.7,213.2,100,4,6
7,Melvin Gordon,LAC,RB,25,9,9,0,143,448,11,0,5.18,58,42,10.67,1,0,2.0,,189,230.9,236.9,209.9,99,5,7
8,Saquon Barkley,NYG,RB,21,10,10,0,158,540,10,0,4.61,79,64,8.44,0,0,1.0,,189,252.8,258.8,220.8,99,6,8
9,James Conner,PIT,RB,23,10,9,0,173,411,11,0,4.6,60,45,9.13,3,1,2.0,,189,233.7,240.7,211.2,99,7,9
10,Travis Kelce,KAN,TE,29,11,11,0,0,914,7,0,,101,67,13.64,1,0,,,133,200.4,203.4,166.9,83,1,10


In [88]:
age = fantasy_tbl_clean.column('Age')
#age

In [89]:
age_list = age.tolist()
# age_list

new_age_list = []
for value in age_list:
    try:
        new_age_list.append(int(value))
    except ValueError:
        continue
        

In [90]:
age_clean = np.array(new_age_list)

In [91]:
len(age_clean)

507

In [92]:
targets_raw = fantasy_tbl_clean.column('Tgt')


In [93]:
targets_list = targets_raw.tolist()
targets_list

new_tgt_list = []
for value in targets_list:
    try:
        new_tgt_list.append(int(value))
    except ValueError:
            continue

In [94]:
tgt_clean = np.array(new_tgt_list)
tgt_clean

array([ 56,  73,  35,  94,   0,  62,  58,  79,  60, 101,  97, 106,   1,
       109,  71, 125, 115,   0,   0,  96, 103,  65, 106,  89,  90,  46,
        84,  71,  91,  20,   0,   1,  48,  66,  84,  80,  31,  65,  93,
        37, 103,   0,  81,   1,  62,  76,  49,  53,  87,  33,  63,   0,
        56,  40,  63,  68,  62,  18,  78,  18,  23,  64,   0,  68,  10,
        44,  57,  42,  21,  61,  64,   1,  81,  18,   1,  25,   0,   1,
         1,   0,   0,   0,   0,   0,   0,   1,   0,   0,   0,   1,   1,
         0,  49,  15,  22,  15,  28,   0,  11,  41,  18,  57,  12,   0,
        28,   0,  55,  45, 106,  80,  62,  58,  48,  35,  35,  68,  69,
        10,   8,  67,  56,  34,  58,  39,   0,  52,  12,  62,   7,  49,
        57,  42,  63,  18,   0,  59,   0,   6,  56,  25,  33,  79,  32,
        33,  43,  61,  48,  61,  24,  77,  32,  70,  39,  59,  42,  11,
        34,  59,  33,  15,  19,  23,  43,  58,  16,  18,  42,   0,  50,
        28,  55,  28,  56,  36,  18,  32,  26,  12,  20,  25,   

In [95]:
len(tgt_clean)

507

In [96]:
def clean_col(col_array):
    col_list = col_array.tolist()
    #print(col_list)
    new_col_list = []
    for value in col_list:
        try:
            new_col_list.append(int(value))
        except ValueError:
            continue
    return np.array(new_col_list)
            
    

In [97]:
TD_clean = clean_col(fantasy_tbl_clean.column('TD'))

In [98]:
TD_clean

array([17, 15, 14, 12,  2,  8, 11, 10, 11,  7,  8,  9,  3, 11,  8,  3,  7,
        2,  1,  8,  5, 10,  5, 10,  4,  8,  4,  3,  6,  7,  4,  3,  5,  8,
        5,  4,  6,  4,  3,  7,  5,  3,  5,  0,  5,  6,  7,  5,  3,  6,  6,
        0,  6,  4,  6,  3,  5,  5,  2,  6,  6,  4,  0,  4,  6,  3,  2,  5,
        8,  5,  3,  5,  3,  5,  1,  3,  1,  0,  2,  0,  2,  0,  1,  0,  1,
        0,  1,  0,  1,  0,  2,  1,  5,  5,  5,  5,  3,  1,  3,  2,  3,  4,
        5,  1,  4,  0,  3,  4,  2,  2,  4,  3,  5,  5,  4,  3,  2,  4,  4,
        4,  3,  2,  3,  2,  0,  3,  1,  3,  5,  3,  4,  3,  3,  3,  0,  3,
        3,  5,  2,  3,  4,  2,  4,  4,  5,  2,  2,  1,  4,  1,  4,  1,  3,
        0,  2,  3,  4,  2,  4,  1,  1,  5,  1,  2,  4,  1,  3,  0,  2,  3,
        2,  2,  2,  2,  4,  4,  3,  1,  2,  2,  1,  2,  1,  2,  1,  3,  3,
        2,  1,  1,  1,  2,  1,  1,  2,  1,  0,  2,  2,  3,  2,  4,  4,  1,
        2,  3,  2,  3,  2,  1,  1,  2,  2,  1,  1,  1,  2,  0,  1,  1,  1,
        1,  1,  1,  1,  1

In [99]:
len(TD_clean)

507

In [100]:
Int_clean = clean_col(fantasy_tbl_clean.column('Int'))
Rk_clean = clean_col(fantasy_tbl_clean.column('Rk'))
G_clean = clean_col(fantasy_tbl_clean.column('G'))
GS_clean = clean_col(fantasy_tbl_clean.column('GS'))
Cmp_clean = clean_col(fantasy_tbl_clean.column('Cmp'))
Att_clean = clean_col(fantasy_tbl_clean.column('Att'))
Yds_clean = clean_col(fantasy_tbl_clean.column('Yds'))
Rec_clean = clean_col(fantasy_tbl_clean.column('Rec'))
Fmb_clean = clean_col(fantasy_tbl_clean.column('Fmb'))
FL_clean = clean_col(fantasy_tbl_clean.column('FL'))

In [101]:
print(len(Int_clean))
print(len(Rk_clean))
print(len(G_clean))
print(len(GS_clean))
print(len(Cmp_clean))
print(len(Att_clean))
print(len(Yds_clean))
print(len(Rec_clean))
print(len(Fmb_clean))
print(len(FL_clean))



507
507
507
507
507
507
507
507
507
507


In [102]:
def clean_col_float(col_array):
    col_list = col_array.tolist()
    #print(col_list)
    new_col_list = []
    for value in col_list:
        try:
            new_col_list.append(float(value))
        except ValueError:
            continue
    return np.array(new_col_list)

In [103]:
YR_clean = clean_col_float(fantasy_tbl_clean.column('Y/R'))
YA_clean = clean_col_float(fantasy_tbl_clean.column('Y/A'))
PPR_clean = clean_col_float(fantasy_tbl_clean.column('PPR'))

In [104]:
print(len(YA_clean))
print(len(YR_clean))
print(len(PPR_clean))

269
422
485
