In [1]:
from collections import defaultdict
from pathlib import Path

from bs4 import BeautifulSoup
from requests import Session
import pandas as pd

In [2]:
results = 'results'
fixtures = 'fixtures'

In [3]:
url = 'https://www.bbc.com/sport/american-football/'

In [4]:
session = Session()

headers = {'User-Agent': ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) '
                          'AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/39.0.2171.95 Safari/537.36'),
                    'Content-Type': 'application/json'}

session.headers.update(headers)

In [5]:
fixtures_url = f'{url}{fixtures}'

r = session.get(fixtures_url)

fixtures_soup = BeautifulSoup(r.content,'html.parser')

games_soup = fixtures_soup.find('div', {'id':'sp-c-filter-contents'})

games = games_soup.find_all('span',{'class':'qa-fixture-block'})

In [6]:
data = defaultdict(list)
for game in games:
    data['game_date'].append(game.h3.get_text())
    data['game_round'].append(game.h5.get_text())
    data['games'].append([{'home':home['title'],'away':away['title']} for home, away in [g.find_all('abbr') for g in game.find_all('li')]])
    data['time'].append([{'time':time.get_text()} for time in game.find_all('span', class_='sp-c-fixture__number--time')])
    data['venue'].append([{'venue':venue.get_text()} for venue in game.find_all('span', class_='sp-c-fixture__venue')])

In [7]:
df = pd.DataFrame(data)

In [8]:
df.head()

Unnamed: 0,game_date,game_round,games,time,venue
0,Friday 6th September 2019,Round 1,"[{'home': 'Chicago Bears', 'away': 'Green Bay ...",[{'time': '00:20'}],[{'venue': 'Venue: Soldier Field'}]
1,Sunday 8th September 2019,Round 1,"[{'home': 'Carolina Panthers', 'away': 'Los An...","[{'time': '17:00'}, {'time': '17:00'}, {'time'...","[{'venue': 'Venue: Bank of America Stadium'}, ..."
2,Monday 9th September 2019,Round 1,"[{'home': 'New England Patriots', 'away': 'Pit...",[{'time': '00:20'}],[{'venue': 'Venue: Gillette Stadium'}]
3,Tuesday 10th September 2019,Round 1,"[{'home': 'New Orleans Saints', 'away': 'Houst...","[{'time': '23:10'}, {'time': '02:20'}]","[{'venue': 'Venue: Mercedes-Benz Superdome'}, ..."
4,Friday 13th September 2019,Round 2,"[{'home': 'Carolina Panthers', 'away': 'Tampa ...",[{'time': '00:20'}],[{'venue': 'Venue: Bank of America Stadium'}]


In [11]:
logos_url = 'http://loodibee.com/nfl/'

In [9]:
folder = './logos/'
Path(folder).mkdir(parents=True, exist_ok=True)

In [12]:
s = session.get(logos_url)

In [13]:
logos_soup = BeautifulSoup(s.content,'html.parser').find_all('figure')

In [14]:
team_name_logo = [(logo.a['href'][:-1],logo.noscript.img['src']) for logo in logos_soup]
team_name_logo = team_name_logo[:-3]

In [15]:
for team, image_url in team_name_logo:
    response =session.get(str(image_url), stream=True)
    if response.ok:
        print(f'Success Team: {team.replace("-"," ").title()}')
        with open(f'{folder}{team}.png', 'wb') as f:
            f.write(response.content)
    else:
        print(f'Failed Team: {team.replace("-"," ").title()}')

Success Team: Arizona Cardinals
Success Team: Atlanta Falcons
Success Team: Baltimore Ravens
Success Team: Buffalo Bills
Success Team: Carolina Panthers
Success Team: Chicago Bears
Success Team: Cincinnati Bengals
Success Team: Cleveland Browns
Success Team: Dallas Cowboys
Success Team: Denver Broncos
Success Team: Detroit Lions
Success Team: Green Bay Packers
Success Team: Houston Texans
Success Team: Indianapolis Colts
Success Team: Jacksonville Jaguars
Success Team: Kansas City Chiefs
Success Team: Los Angeles Chargers
Success Team: Los Angeles Rams
Success Team: Miami Dolphins
Success Team: Minnesota Vikings
Success Team: New England Patriots
Success Team: New Orleans Saints
Success Team: New York Giants
Success Team: New York Jets
Success Team: Oakland Raiders
Success Team: Philadelphia Eagles
Success Team: Pittsburgh Steelers
Success Team: San Francisco 49Ers
Success Team: Seattle Seahawks
Success Team: Tampa Bay Buccaneers
Success Team: Tennessee Titans
Success Team: Washington 

In [48]:
games = df.explode('games')
time = df.explode('time')
venue = df.explode('venue')

In [51]:
games.drop(columns=['time','venue'], inplace=True)
time.drop(columns=['games','venue'], inplace=True)
venue.drop(columns=['time','games'], inplace=True)

In [60]:
games['time'] = time['time']
games['venue'] = venue['venue']

In [65]:
df = games

In [66]:
 df.to_json('data/fixtures.json', orient='records')

In [67]:
df = pd.read_json('data/fixtures.json', orient='records')

In [68]:
df

Unnamed: 0,game_date,game_round,games,time,venue
0,Friday 6th September 2019,Round 1,"{'home': 'Chicago Bears', 'away': 'Green Bay P...",{'time': '00:20'},{'venue': 'Venue: Soldier Field'}
1,Sunday 8th September 2019,Round 1,"{'home': 'Carolina Panthers', 'away': 'Los Ang...",{'time': '17:00'},{'venue': 'Venue: Bank of America Stadium'}
2,Sunday 8th September 2019,Round 1,"{'home': 'New York Jets', 'away': 'Buffalo Bil...",{'time': '17:00'},{'venue': 'Venue: MetLife Stadium'}
3,Sunday 8th September 2019,Round 1,"{'home': 'Minnesota Vikings', 'away': 'Atlanta...",{'time': '17:00'},{'venue': 'Venue: US Bank Stadium'}
4,Sunday 8th September 2019,Round 1,"{'home': 'Miami Dolphins', 'away': 'Baltimore ...",{'time': '17:00'},{'venue': 'Venue: Hard Rock Stadium'}
5,Sunday 8th September 2019,Round 1,"{'home': 'Jacksonville Jaguars', 'away': 'Kans...",{'time': '17:00'},{'venue': 'Venue: TIAA Bank Field'}
6,Sunday 8th September 2019,Round 1,"{'home': 'Cleveland Browns', 'away': 'Tennesse...",{'time': '17:00'},{'venue': 'Venue: FirstEnergy Stadium'}
7,Sunday 8th September 2019,Round 1,"{'home': 'Philadelphia Eagles', 'away': 'Washi...",{'time': '17:00'},{'venue': 'Venue: Lincoln Financial Field'}
8,Sunday 8th September 2019,Round 1,"{'home': 'Seattle Seahawks', 'away': 'Cincinna...",{'time': '20:05'},{'venue': 'Venue: CenturyLink Field'}
9,Sunday 8th September 2019,Round 1,"{'home': 'Los Angeles Chargers', 'away': 'Indi...",{'time': '20:05'},{'venue': 'Venue: StubHub Center'}
