# Test Case: NBA Statistics

Generate a csv file with some features of the all the active teams from 2014 to 2024
Features included:
 - Win 
 - Lost
 - Win Rate
 - Game Behind (GB) 
 - Points Per Game (PS/G)
 - Opponent Points Per Game (PA/G)
 - Simple Rating System (SRS) (Higher the better, below zero indicates points below average)

Caution:
 - Teams might appear more than once in certain division, and we don't want redundent data records. 
 - NBA doesn't have conference standing for the year 2014 to 2015

In [20]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

1. Request all the websites we need. 

In [21]:
# This Website follows the similar pattern as well.
# It has the following formatting:
# https://www.basketball-reference.com/leagues/NBA_2014.html
# https://www.basketball-reference.com/leagues/NBA_2015.html
# to
# https://www.basketball-reference.com/leagues/NBA_2024.html

# Lets create replacement strings for each year.
year = list(range(2014,2025,1))

# Now lets request all the years. 
def request_all_years(years):
    """
    This function allows the user to request webpage for all the years listed in "year" attribute.
    """
    result = []
    header = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
    }
    for year in years:
        url = f'https://www.basketball-reference.com/leagues/NBA_{year}.html'
        r = requests.get(url, headers=header)
        if r.status_code != 200:
            return f'Error found in the following: {url} with the status code of {r.status_code}'
        result.append(r.text)
    return result

In [28]:
htmls = request_all_years(years=year)
for i in htmls:
    print(i)


<!DOCTYPE html>
<html data-version="klecko-" data-root="/home/bbr/build" lang="en" class="no-js" >
<head>
    <meta charset="utf-8">
    <meta http-equiv="x-ua-compatible" content="ie=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=2.0" />
    <link rel="dns-prefetch" href="https://cdn.ssref.net/req/202406111" />



<!-- yes-inmobi-ssi -->  
<!-- InMobi Choice. Consent Manager Tag v3.0 (for TCF 2.2) -->
<script type="text/javascript" async=true>
(function() {
  var host = window.location.hostname;
  var element = document.createElement('script');
  var firstScript = document.getElementsByTagName('script')[0];
  var url = 'https://cmp.inmobi.com'
    .concat('/choice/', 'XwNYEpNeFfhfr', '/', host, '/choice.js?tag_version=V3');
  var uspTries = 0;
  var uspTriesLimit = 3;
  element.async = true;
  element.type = 'text/javascript';
  element.src = url;

  firstScript.parentNode.insertBefore(element, firstScript);

  function makeStub() {
   

2. Html parsing will be our next step.

In [112]:
# After the request stage, all we have to do is try to grab the features from the html webpage.
# Since the html page act differently for certain years, we have to introduce a new parameter. 

def check_redundent(list, element):
    """
    Helper function: check redundency
    """
    for i in list:
        if i == element:
            return True
    return False

def parse_single_html(html, year):
    soup = BeautifulSoup(html, "html.parser")
    data = []
    redundent = []
    table_section = (
        soup.find_all("tr", class_ = "full_table")
    )

    for record in table_section:
        name = record.find("th").get_text()
        if len(redundent) > 0: # Check for redundence team if there's there's records already
            if check_redundent(redundent, name):
                continue

        redundent.append(name) # Make sure to add team record

        # All other statistics can be found under "td" tag
        other_statistic = record.find_all("td")
        win = other_statistic[0].get_text()
        lost = other_statistic[1].get_text()
        wr = other_statistic[2].get_text()
        gb = other_statistic[3].get_text()
        psg = other_statistic[4].get_text()
        pag = other_statistic[5].get_text()
        srs =  other_statistic[6].get_text()

        data.append({
            "Name": name,
            "Win": win,
            "Lost": lost,
            "Win Rate": wr,
            "Game Behind": gb,
            "Points Per Game": psg,
            "Opponent Points Per Game": pag,
            "Simple Rating System(SRS)": srs,
            "Year": year
        })
    
    return data

In [113]:
data = []
temp = 0
for html in htmls:
    data.extend(parse_single_html(html, year=year[temp]))
    temp += 1
data

[{'Name': 'Toronto Raptors*',
  'Win': '48',
  'Lost': '34',
  'Win Rate': '.585',
  'Game Behind': '—',
  'Points Per Game': '101.3',
  'Opponent Points Per Game': '98.0',
  'Simple Rating System(SRS)': '2.55',
  'Year': 2014},
 {'Name': 'Brooklyn Nets*',
  'Win': '44',
  'Lost': '38',
  'Win Rate': '.537',
  'Game Behind': '4.0',
  'Points Per Game': '98.5',
  'Opponent Points Per Game': '99.5',
  'Simple Rating System(SRS)': '-1.58',
  'Year': 2014},
 {'Name': 'New York Knicks',
  'Win': '37',
  'Lost': '45',
  'Win Rate': '.451',
  'Game Behind': '11.0',
  'Points Per Game': '98.6',
  'Opponent Points Per Game': '99.4',
  'Simple Rating System(SRS)': '-1.40',
  'Year': 2014},
 {'Name': 'Boston Celtics',
  'Win': '25',
  'Lost': '57',
  'Win Rate': '.305',
  'Game Behind': '23.0',
  'Points Per Game': '96.2',
  'Opponent Points Per Game': '100.7',
  'Simple Rating System(SRS)': '-4.97',
  'Year': 2014},
 {'Name': 'Philadelphia 76ers',
  'Win': '19',
  'Lost': '63',
  'Win Rate': '.2

3. Export as csv file

In [114]:
df = pd.DataFrame(data)
df.to_csv("NBA.csv")

4. Reload the csv file

In [115]:
csv = pd.read_csv("NBA.csv")
csv.head(30)

Unnamed: 0.1,Unnamed: 0,Name,Win,Lost,Win Rate,Game Behind,Points Per Game,Opponent Points Per Game,Simple Rating System(SRS),Year
0,0,Toronto Raptors*,48,34,0.585,—,101.3,98.0,2.55,2014
1,1,Brooklyn Nets*,44,38,0.537,4.0,98.5,99.5,-1.58,2014
2,2,New York Knicks,37,45,0.451,11.0,98.6,99.4,-1.4,2014
3,3,Boston Celtics,25,57,0.305,23.0,96.2,100.7,-4.97,2014
4,4,Philadelphia 76ers,19,63,0.232,29.0,99.5,109.9,-10.66,2014
5,5,Indiana Pacers*,56,26,0.683,—,96.7,92.3,3.63,2014
6,6,Chicago Bulls*,48,34,0.585,8.0,93.7,91.8,1.2,2014
7,7,Cleveland Cavaliers,33,49,0.402,23.0,98.2,101.5,-3.86,2014
8,8,Detroit Pistons,29,53,0.354,27.0,101.0,104.7,-4.13,2014
9,9,Milwaukee Bucks,15,67,0.183,41.0,95.5,103.7,-8.41,2014


: 