In [1]:
import itemQuery as iq
import articleParse as ap
from timeit import default_timer as timer

In [2]:
%run modelInit.py

In [3]:
def getFeeds():
    """Google Sheets API Code.
    Pulls urls for all NFL Team RSS Feeds
    https://docs.google.com/spreadsheets/d/1DUBb9OG1A1Xs6v2PK9Oislz4384DSu2MzEbH8dK0ad4/edit#gid=0
    """
    credentials = get_credentials()
    http = credentials.authorize(httplib2.Http())
    discoveryUrl = ('https://sheets.googleapis.com/$discovery/rest?'
                    'version=v4')
    service = discovery.build('sheets', 'v4', http=http,
                              discoveryServiceUrl=discoveryUrl)

    #specify sheetID and range
    spreadsheetId = '1DUBb9OG1A1Xs6v2PK9Oislz4384DSu2MzEbH8dK0ad4'
    rangeName = 'Sheet1!A2:E'
    result = service.spreadsheets().values().get(
        spreadsheetId=spreadsheetId, range=rangeName).execute()
    values = result.get('values', [])

    if not values:
        print('No data found.')
    else:
        print('Done')

    return values

In [4]:
feeds = getFeeds()

Done


In [5]:
def feedFrame(feedRow):
    return [{'pubDate':record[0], 
             'team':feedRow[1], 
             'title':record[1], 
             'type':feedRow[2], 
             'link':record[2], 
             'discription':record[3], 
             'creator':record[4]
            } for record in iq.recordsFromFeed(feedRow[3])]
    

In [6]:
data = []
for feedRow in feeds:
    if feedRow[3] == 'null':
        continue
    data.extend(feedFrame(feedRow))
    print('team: '+ feedRow[1] + ' ' + feedRow[0] )

team: Steelers 0
team: Steelers 1
team: Steelers 2
team: Steelers 3
team: Steelers 4
team: Ravens 5
team: Ravens 6
team: Ravens 7
team: Ravens 8
team: Ravens 9
team: Ravens 10
team: Ravens 11
team: Ravens 12
team: Ravens 13
team: Ravens 14
team: Ravens 15
team: Ravens 16
team: Ravens 17
team: Bengals 18
team: Bengals 19
team: Bengals 20
team: Bengals 21
team: Bengals 22
team: Browns 23
team: Browns 24
team: Browns 25
team: Browns 26
team: Packers 28
team: Packers 29
team: Packers 30
team: Packers 31
team: Packers 32
team: Packers 33
team: Vikings 34
team: Vikings 35
team: Vikings 36
team: Vikings 37
team: Lions 38
team: Lions 39
team: Lions 40
team: Lions 41
team: Lions 42
team: Texans 43
team: Texans 44
team: Titans 45
team: Titans 46
team: Titans 47
team: Titans 48
team: Titans 49
team: Titans 50
team: Titans 51
team: Titans 52
team: Jaguars 53
team: Jaguars 54
team: Jaguars 55
team: Colts 56
team: Colts 57
team: Colts 58
team: Saints 59
team: Saints 60
team: Saints 61
team: Saints 6

In [7]:
len(data)

2106

In [8]:
from datetime import datetime

In [9]:
import pandas as pd

In [10]:
def getTime(date):
    return datetime.strptime(date[:25], '%a, %d %b %Y %H:%M:%S')

In [11]:
dataSorts = sorted(data, key=lambda k: getTime(k['pubDate']), reverse=True)

In [12]:
df = pd.DataFrame(dataSorts)

In [13]:
def sheetColumns(record):
    return [record['pubDate'], record['team'], record['title'], record['type'], record['link'], record['discription'], record['creator']]

In [14]:
def writeLinkData(dataColumns):
    """Google Sheets API Code.

    Writes all team news link data from RSS feed to the NFL Team Articles speadsheet.
    https://docs.google.com/spreadsheets/d/1XiOZWw3S__3l20Fo0LzpMmnro9NYDulJtMko09KsZJQ/edit#gid=0
    """
    credentials = get_credentials()
    http = credentials.authorize(httplib2.Http())
    discoveryUrl = ('https://sheets.googleapis.com/$discovery/rest?'
                    'version=v4')
    service = discovery.build('sheets', 'v4', http=http,
                              discoveryServiceUrl=discoveryUrl)

    spreadsheet_id = '1XiOZWw3S__3l20Fo0LzpMmnro9NYDulJtMko09KsZJQ'
    value_input_option = 'RAW'
    rangeName = 'Sheet1!A2'
    values = dataColumns
    body = {
          'values': values
    }
    
    result = service.spreadsheets().values().update(spreadsheetId=spreadsheet_id, range=rangeName,
                                                    valueInputOption=value_input_option, body=body).execute()

    return result

In [15]:
writeLinkData([sheetColumns(record) for record in dataSorts])

{'spreadsheetId': '1XiOZWw3S__3l20Fo0LzpMmnro9NYDulJtMko09KsZJQ',
 'updatedCells': 13590,
 'updatedColumns': 7,
 'updatedRange': 'Sheet1!A2:G2107',
 'updatedRows': 2106}