# NBA Logo Scraper

## Import Deps

In [1]:
%config IPCompleter.greedy=True

# https://docs.python.org/3/library/datetime.html
import datetime as dt

# https://docs.python.org/3/howto/regex.html
import re

# https://beautiful-soup-4.readthedocs.io/en/latest/
from bs4 import BeautifulSoup as bs

# https://splinter.readthedocs.io/en/latest/#
from splinter import Browser

## Using Splinter to Download HTML

But first you want to check when the last time the splinter code was last ran.

In [2]:
import pathlib
from os import scandir


def convert_date(timestamp):
    d = dt.datetime.utcfromtimestamp(timestamp)
    formated_date = d.strftime("%d %b %Y")
    return formated_date


file = pathlib.Path("nbapage.html")
if file.exists():
    info = file.stat()
    print(f"File already exists and last updated on {convert_date(info.st_mtime)}")
else:
    # Using Splinter to automate the browser actions
    # Initiate headless driver for deployment
    with Browser("chrome", executable_path="chromedriver", headless=True) as browser:
        # Visit URL
        url = "https://cdn.nba.com/"
        browser.visit(url)

        # I know the current URL I've passed to visit
        # is the site that holds all the logos I am looking for
        # so I just save the site and start working with bs4

        # Scrape page into soup
        html = browser.html

        # Save HTML to local machine so we don't continue to requests
        with open("nbapage.html", "w", encoding="utf8") as f:
            f.write(html)

# browser = Browser("chrome", executable_path="chromedriver", headless=True)

File already exists and last updated on 28 Dec 2020


In [3]:
# current HTMl file
# opened_html = ""

# Read in local HTML page
with open("nbapage.html", "r", encoding="UTF-8", errors="strict") as f:
    opened_html = f.read()

    # Scrape page into soup
    soup = bs(opened_html, "html.parser")

    # Print out the html structure to plan out the scraping logic
    # print(soup.prettify())

    # I want to find all the image tags and filter down to a list of logo URLs
    list_of_img_tags = soup.find_all("img")
    # print(list_of_img_tags)

In [4]:
# Create regex pattern this is not used
regex = r"alt\=\"[\w\s]+Logo\""
print(regex)
# bs4 allowes you to get the alt attribute content so no need to match
alt_text_pattern = re.compile(r"[\w\s]+Logo")
print(alt_text_pattern)

img_url_list = []

for img in list_of_img_tags:
    matchObj = alt_text_pattern.fullmatch(img.get("alt"))
    if matchObj:
        img_url_list.append(
            dict([("Team Name", img.get("alt")), ("URL", img.get("src"))])
        )

# print(img_url_list)

alt\=\"[\w\s]+Logo\"
re.compile('[\\w\\s]+Logo')


## Download Images with Request

In [5]:
## Importing Necessary Modules
# https://requests.readthedocs.io/en/master/
# https://docs.python.org/3/library/shutil.html#module-shutil
import shutil  # to save it locally

import requests  # to get image from the web

# Looping through the img_url_list and downloading
for i in img_url_list:
    response = requests.get(i["URL"], stream=True)
    if response.status_code == 200:
        response.raw.decode_content = True

        # prep image file name
        processed_file_name = i["Team Name"].lower().replace(" ", "-")

        with open(f"{processed_file_name}.svg", "wb") as f:
            shutil.copyfileobj(response.raw, f)

        print(f"Image sucessfully Downloaded: {processed_file_name}")

    else:
        print("Image Couldn't be retreived")

Image sucessfully Downloaded: nba-logo
Image sucessfully Downloaded: boston-celtics-logo
Image sucessfully Downloaded: brooklyn-nets-logo
Image sucessfully Downloaded: new-york-knicks-logo
Image sucessfully Downloaded: philadelphia-76ers-logo
Image sucessfully Downloaded: toronto-raptors-logo
Image sucessfully Downloaded: chicago-bulls-logo
Image sucessfully Downloaded: cleveland-cavaliers-logo
Image sucessfully Downloaded: detroit-pistons-logo
Image sucessfully Downloaded: indiana-pacers-logo
Image sucessfully Downloaded: milwaukee-bucks-logo
Image sucessfully Downloaded: atlanta-hawks-logo
Image sucessfully Downloaded: charlotte-hornets-logo
Image sucessfully Downloaded: miami-heat-logo
Image sucessfully Downloaded: orlando-magic-logo
Image sucessfully Downloaded: washington-wizards-logo
Image sucessfully Downloaded: denver-nuggets-logo
Image sucessfully Downloaded: minnesota-timberwolves-logo
Image sucessfully Downloaded: oklahoma-city-thunder-logo
Image sucessfully Downloaded: port