### Breaking Scraping and Writing into two steps 

In [1]:
from requests import get 
import lxml.html as lh
import pandas as pd
from bs4 import BeautifulSoup 

### Getting source urls 

In [2]:
base_url = 'http://www.nfl.com/stats/weeklyleaders'

weeks = [x for x in range(1,18)]

stat_cat = ['Passing', 'Rushing', 'Receiving']

urls = []

names =[]

for i in weeks:
    for j in stat_cat:
        full_url = base_url + '?week=' + str(i) + '&season=2019' + '&showCategory=' + j
        urls.append(full_url)
        name = '2019_' + 'week_' + str(i) + '_' + j
        names.append(name)

In [3]:
def get_url(url):
    #grabbing the HTML and getting text 
    my_header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'}
    fantasy_page = get(url, headers = my_header)
    
    return(fantasy_page)

In [4]:
def get_headers(html):
    
    doc = lh.fromstring(html.content)
    
    #getting column names 
    title = doc.xpath('//tr//th')
    
    colnames = []

    n = len(title)

    for i in range(0, n):
        name = title[i].text_content()
        colnames.append(name)
    
    return(colnames)     

In [5]:
def get_table_data(html, colnames):
     
    #parsing table elements in the HTML inside the pattern "//tr" --> this is a table element 
    doc = lh.fromstring(html.content)
    table_elements = doc.xpath('//tr')
    
    #creating an empty array to insert the table elements 
    cols = []

    for j in range(0, len(colnames)):
        name = colnames[j] #getting the column name from the HTML table 
        #print('%d:"%s"'% (i, name))
        cols.append((name, []))

   #Since out first row is the header, data is stored on the second row onwards

    for j in range(1,len(table_elements)):
        #T is our j'th row
        T=table_elements[j]

        #If row is not of size 24, the //tr data is not from our table 
        if len(T)!=len(colnames):
            break

        #i is the index of our column
        i=0

        #Iterate through each element of the row
        for t in T.iterchildren():
            data=t.text_content() 

            #Append the data to the empty list of the i'th column
            cols[i][1].append(data)
            #Increment i for the next column
            i+=1
    return(cols)

In [6]:
def create_df(cols):
    #creating a dictionary for the columns in the parsed table 
    Dict={title:column for (title,column) in cols}

    df=pd.DataFrame(Dict)
    return(df)

In [7]:
def clean_df(df, page_url):
    #data cleaning 
    escapes = ''.join([chr(char) for char in range(1, 32)])
    translator = str.maketrans('', '', escapes)
    df.columns = df.columns.str.translate(translator)
    
    #fixing escape charaters
    fixed = ['Name', 'Team', 'Opp', 'Score']
    for i in fixed:
        df.loc[:, i] = df.loc[:, i].astype(str).str.translate(translator)
    
    #inputing the season week 
    week = page_url.split('week=', 1)[1][0]
    
    df.insert(1, 'Week', week)
    
    #returning the df
    return(df)

In [8]:
def writer(df, sheet_name, share_email):
    #Grapping Parameters for looping 
    n_rows = df.shape[0]
    n_cols = df.shape[1]
    
    #writing to google sheets 
    import time 

    #Now will can access our google sheets we call client.open on StartupName
    sheet = client.create(sheet_name) #2019-q4_fantasy-web-scraping/passing

    sheet.share(share_email,  perm_type='user', role='writer') #sharing my email 
    
    #writing data to the worksheet
    ws = sheet.get_worksheet(0)

    shaped_data = df.transpose()

    ws.insert_row(df.columns.tolist(), 1)

    for i in range(1, n_rows+1): 
        row = df.iloc[i-1].tolist()
        index = i+1
        if i%95 == 0: #printing the step in the loop
            print(i)  
            time.sleep(60)
            
        ws.insert_row(row, index) #writing the data 
    
    print('row ', i, ' end of file')

### Looping through URLs to create dataframes

In [9]:
#making empty dataframes
passing = pd.DataFrame()
receiving = pd.DataFrame()
rushing = pd.DataFrame()

In [10]:
for i in range(0, len(urls)): 
    source_html = get_url(urls[i])
    colnames = get_headers(source_html)
    table_data = get_table_data(source_html, colnames)
    raw_df = create_df(table_data)
    df = clean_df(raw_df, urls[i])
    
    if 'Passing' in urls[i]: 
        passing = pd.concat([passing, df])
    
    elif 'Receiving' in urls[i]: 
        receiving = pd.concat([receiving, df])
        
    else: 
        rushing = pd.concat([rushing, df])
    

In [11]:
print('Passing', passing.shape)

print('Receiving', receiving.shape)

print('Rushing', rushing.shape)

Passing (621, 13)
Receiving (4101, 10)
Rushing (2061, 10)


### Writing the data 

In [12]:
#import library
import gspread 
#Service client credential from oauth2client
from oauth2client.service_account import ServiceAccountCredentials
# Print nicely
import pprint
#Create scope
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
#create some credential using that scope and content of startup_funding.json
creds = ServiceAccountCredentials.from_json_keyfile_name('../quickstart/g_sheet_creds.json',scope)
#create gspread authorize using that credential
client = gspread.authorize(creds)
my_email = 'matthewjchristy66@gmail.com'

In [35]:
writer(passing, 'passing', share_email = my_email)

APIError: {
  "error": {
    "code": 429,
    "message": "Quota exceeded for quota group 'WriteGroup' and limit 'Write requests per user per 100 seconds' of service 'sheets.googleapis.com' for consumer 'project_number:686366856477'.",
    "status": "RESOURCE_EXHAUSTED",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.Help",
        "links": [
          {
            "description": "Google developer console API key",
            "url": "https://console.developers.google.com/project/686366856477/apiui/credential"
          }
        ]
      }
    ]
  }
}


In [None]:
writer(receiving, 'receiving', share_email = my_email)

In [None]:
writer(rushing, 'rushing', share_email = my_email)

###

#### Working on Batch Upating - not work

###

In [13]:
sheet = client.create('batch_test') #2019-q4_fantasy-web-scraping/passing

sheet.share('matthewjchristy66@gmail.com',  perm_type='user', role='writer') #sharing my email    

In [14]:
import string
end_col = string.ascii_uppercase[13]

In [15]:
sheet_range = "'" + 'A1:'+ end_col + str(passing.shape[0]) + "'"
print(sheet_range)

'A1:N621'


In [21]:
df_as_list = passing.T.reset_index().T.values.tolist()

In [29]:
df_as_list[2][0]

'Dak Prescott'

In [32]:
test = passing.values.tolist()

In [33]:
test[0]

['Andy Dalton',
 '1',
 'CIN',
 '@ SEA',
 'L\xa020-21 ',
 '35',
 '51',
 '418',
 '2',
 '0',
 '5',
 '2',
 '106.5']

In [25]:
ws = sheet.get_worksheet(0)

cell_list = ws.range('A1:N621')

In [31]:
range(0, len(cell_list)+1)

range(0, 8695)

In [81]:
for i in range(0, len(cell_list)+1):
    cell.value = 

In [34]:
ws.update_cells(cell_list=cells)

AttributeError: 'list' object has no attribute 'row'