## Extração de Dados
Site Oficial: https://www.nba.com/stats

### Objetivo: Construção de um card para orientar o técnico do time

In [1]:
import pandas as pd
import requests as r
from bs4 import BeautifulSoup

In [2]:
%reload_ext watermark
%watermark -a "Extração de Dados da NBA (2023-2024)"
%watermark --iversions

Author: Extração de Dados da NBA (2023-2024)

bs4     : 0.0.2
requests: 2.31.0
pandas  : 2.2.3



In [3]:
list_years = ['2023-24']

In [4]:
headers_scraping = {
    'Accept':'*/*',
    'Accept-Encoding':'gzip, deflate, br',
    'Accept-Language':'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7',
    'Connection':'keep-alive',
    'Host':'stats.nba.com',
    'Origin':'https://www.nba.com',
    'Referer':'https://www.nba.com/',
    'Sec-Ch-Ua':'"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
    'Sec-Ch-Ua-Mobile':'?0',
    'Sec-Ch-Ua-Platform':'"Windows"',
    'Sec-Fetch-Dest':'empty',
    'Sec-Fetch-Mode':'cors',
    'Sec-Fetch-Site':'same-site',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
}

# 1) Player Speed

In [5]:
concatenated_df_players_speed = pd.DataFrame()
for i in list_years:
    url_player_speed = f'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=SpeedDistance&Season={i}&SeasonSegment=&SeasonType=Regular%20Season&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
    res_player_speed = r.get(url=url_player_speed, headers=headers_scraping).json()
    headers = res_player_speed['resultSets'][0]['headers']
    data = res_player_speed['resultSets'][0]['rowSet']

    df_players_speed = pd.DataFrame(data, columns=headers)
    df_players_speed['date'] = i
    concatenated_df_players_speed = pd.concat([concatenated_df_players_speed, df_players_speed], ignore_index=True)

In [6]:
def mph_to_kph(mph):
    kph = mph * 1.60934
    return kph

In [7]:
concatenated_df_players_speed['AVG_SPEED_KPH'] = concatenated_df_players_speed['AVG_SPEED'].apply(mph_to_kph).round(2)

In [8]:
concatenated_df_players_speed.to_csv('datasets/players_speed_23_24.csv',index=False)

In [37]:
concatenated_df_players_speed.head(3)

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,GP,W,L,MIN,DIST_FEET,DIST_MILES,DIST_MILES_OFF,DIST_MILES_DEF,AVG_SPEED,AVG_SPEED_OFF,AVG_SPEED_DEF,date,AVG_SPEED_KPH
0,1630639,A.J. Lawson,1610612742,DAL,41,26,15,308.0,134578,25.5,13.0,12.4,4.62,4.77,4.44,2023-24,7.44
1,1631260,AJ Green,1610612749,MIL,56,35,21,614.0,253296,48.0,25.4,22.6,4.34,4.72,3.99,2023-24,6.98
2,1631100,AJ Griffin,1610612737,ATL,20,8,12,171.0,74118,14.0,7.4,6.7,4.59,4.77,4.47,2023-24,7.39


# 2) Player Stats

In [9]:
concatenated_df_players_stats = pd.DataFrame()
for i in list_years:
    url_player_stats = f'https://stats.nba.com/stats/leagueLeaders?LeagueID=00&PerMode=Totals&Scope=S&Season={i}&SeasonType=Regular%20Season&StatCategory=PTS'
    res_player_stats = r.get(url=url_player_stats).json()
    headers = res_player_stats['resultSet']['headers']
    data = res_player_stats['resultSet']['rowSet']

    df_players_stats = pd.DataFrame(data, columns=headers)
    df_players_stats['date'] = i
    concatenated_df_players_stats = pd.concat([concatenated_df_players_stats, df_players_stats], ignore_index=True)

In [10]:
# 2023-24
concatenated_df_players_stats_23_24 = pd.DataFrame()
url_player_stats_23_24 = 'https://stats.nba.com/stats/leagueLeaders?LeagueID=00&PerMode=Totals&Scope=S&Season=2023-24&SeasonType=Regular%20Season&StatCategory=PTS'    
res_player_stats_23_24 = r.get(url=url_player_stats_23_24).json()
headers = res_player_stats_23_24['resultSet']['headers']
data = res_player_stats_23_24['resultSet']['rowSet']
df_players_stats_23_24 = pd.DataFrame(data, columns=headers)
concatenated_df_players_stats_23_24 = pd.concat([concatenated_df_players_stats_23_24, df_players_stats_23_24], ignore_index=True)

In [39]:
concatenated_df_players_stats_23_24.head(3)

Unnamed: 0,PLAYER_ID,PLAYER_NAME,MIN,AST,STL,BLK,TOV,PF,FGM,FGA,FG%
0,1629029,Luka Dončić,2624,686,99,38,282,149,804,1652,0.487
1,1628983,Shai Gilgeous-Alexander,2553,465,150,67,162,184,796,1487,0.535
2,203507,Giannis Antetokounmpo,2567,476,87,79,250,210,837,1369,0.611


In [11]:
# res_player_stats_23_24
# res_player_stats_23_24['resultSet']

In [12]:
concatenated_df_players_stats_23_24.to_csv('datasets/player_stats_23_24.csv',index=False)

# 3) Player Shooting

### By Zone

In [14]:
concatenated_df_players_shooting = pd.DataFrame()
for i in list_years:
    url_player_shooting = f'https://stats.nba.com/stats/leaguedashplayershotlocations?College=&Conference=&Country=&DateFrom=&DateTo=&DistanceRange=By%20Zone&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={i}&SeasonSegment=&SeasonType=Regular%20Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
    res_player_shooting = r.get(url=url_player_shooting, headers=headers_scraping).json()
    headers = res_player_shooting['resultSets']['headers']
    result = [header['columnNames'] for header in headers if header['name'] in ['SHOT_CATEGORY', 'columns']]
    
    categories = result[0]
    columns = result[1]

    default_columns = [f"{column}_default" for column in columns[:6]]

    new_columns_grouped = []

    for category in categories:
        category_columns = [f"{column}_{category}" for column in columns[6:]]
        new_columns_grouped.extend(category_columns[:3])

    final_columns = default_columns + new_columns_grouped
    
    data_player_shooting = res_player_shooting['resultSets']['rowSet']
    df_player_shooting = pd.DataFrame(data_player_shooting, columns=final_columns)
    df_player_shooting['date'] = i
    concatenated_df_players_shooting = pd.concat([concatenated_df_players_shooting, df_player_shooting], ignore_index=True)    

In [15]:
concatenated_df_players_shooting.to_csv('datasets/player_shooting_23_24.csv',index=False)

In [42]:
concatenated_df_players_shooting.head(3)

Unnamed: 0,PLAYER_ID_default,PLAYER_NAME_default,TEAM_ID_default,TEAM_ABBREVIATION_default,AGE_default,NICKNAME_default,FGM_Restricted Area,FGA_Restricted Area,FG_PCT_Restricted Area,FGM_In The Paint (Non-RA),...,FGM_Above the Break 3,FGA_Above the Break 3,FG_PCT_Above the Break 3,FGM_Backcourt,FGA_Backcourt,FG_PCT_Backcourt,FGM_Corner 3,FGA_Corner 3,FG_PCT_Corner 3,date
0,1630639,A.J. Lawson,1610612742,DAL,23.0,A.J.,38.0,53.0,0.717,1.0,...,5,18,0.278,0.0,0.0,0.0,8.0,32.0,0.25,2023-24
1,1631260,AJ Green,1610612749,MIL,24.0,AJ,6.0,7.0,0.857,0.0,...,51,130,0.392,0.0,0.0,0.0,18.0,39.0,0.462,2023-24
2,1631100,AJ Griffin,1610612737,ATL,20.0,AJ,3.0,5.0,0.6,3.0,...,8,30,0.267,0.0,0.0,0.0,2.0,9.0,0.222,2023-24


# 4) Team Stats

In [16]:
concatenated_df_team_stats = pd.DataFrame()
for i in list_years:
    url_team_stats = f'https://stats.nba.com/stats/leaguedashteamstats?Conference=&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={i}&SeasonSegment=&SeasonType=Regular%20Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision='
    res_team_stats = r.get(url=url_team_stats, headers=headers_scraping).json()
    headers = res_team_stats['resultSets'][0]['headers']
    data_team_stats = res_team_stats['resultSets'][0]['rowSet']

    df_team_stats = pd.DataFrame(data_team_stats, columns=headers)
    df_team_stats['date'] = i
    concatenated_df_team_stats = pd.concat([concatenated_df_team_stats, df_team_stats], ignore_index=True)

In [17]:
concatenated_df_team_stats.to_csv('datasets/team_stats_23_24.csv',index=False)

# 5) Teams Traditional

In [18]:
concatenated_df_teams_traditional = pd.DataFrame()
for i in list_years:
    url_teams_traditional = f'https://stats.nba.com/stats/leaguedashteamstats?Conference=&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={i}&SeasonSegment=&SeasonType=Regular%20Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision='
    res_team_traditional = r.get(url=url_teams_traditional, headers=headers_scraping).json()
    headers = res_team_traditional['resultSets'][0]['headers']
    data_teams_traditional = res_team_traditional['resultSets'][0]['rowSet']

    df_teams_traditional = pd.DataFrame(data_teams_traditional, columns=headers)
    df_teams_traditional['date'] = i
    concatenated_df_teams_traditional = pd.concat([concatenated_df_teams_traditional, df_teams_traditional], ignore_index=True)

In [19]:
concatenated_df_teams_traditional.to_csv('datasets/teams_traditional_23_24.csv',index=False)

In [43]:
concatenated_df_teams_traditional.head(3)

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,E_OFF_RATING,OFF_RATING,E_DEF_RATING,...,AST_RATIO_RANK,OREB_PCT_RANK,DREB_PCT_RANK,REB_PCT_RANK,TM_TOV_PCT_RANK,EFG_PCT_RANK,TS_PCT_RANK,PACE_RANK,PIE_RANK,date
0,1610612737,Atlanta Hawks,82,36,46,0.439,3971.0,114.0,116.4,116.7,...,21,5,16,12,13,17,18,6,22,2023-24
1,1610612738,Boston Celtics,82,64,18,0.78,3966.0,120.2,122.2,109.0,...,13,14,7,5,1,2,1,19,1,2023-24
2,1610612751,Brooklyn Nets,82,32,50,0.39,3961.0,110.4,112.4,113.1,...,19,12,19,20,14,25,27,25,21,2023-24


# 6) Team Shooting

In [20]:
concatenated_df_team_shooting = pd.DataFrame()
for i in list_years:
    url_team_shooting = f'https://stats.nba.com/stats/leaguedashteamshotlocations?Conference=&DateFrom=&DateTo=&DistanceRange=By%20Zone&Division=&GameScope=&GameSegment=&ISTRound=&LastNGames=0&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={i}&SeasonSegment=&SeasonType=Regular%20Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision='
    res_team_shooting = r.get(url=url_team_shooting, headers=headers_scraping).json()
    headers = res_team_shooting['resultSets']['headers']
    result = [header['columnNames'] for header in headers if header['name'] in ['SHOT_CATEGORY', 'columns']]

    categories = result[0]
    columns = result[1]

    default_columns = [f"{column}_default" for column in columns[:2]]

    new_columns_grouped = []

    for category in categories:
        category_columns = [f"{column}_{category}" for column in columns[5:]]
        new_columns_grouped.extend(category_columns[:3])

    final_columns = default_columns + new_columns_grouped
    
    data_team_shooting = res_team_shooting['resultSets']['rowSet']
    df_team_shooting = pd.DataFrame(data_team_shooting, columns=final_columns)
    df_team_shooting['date'] = i
    concatenated_df_team_shooting = pd.concat([concatenated_df_team_shooting, df_team_shooting], ignore_index=True)    

In [21]:
concatenated_df_team_shooting.to_csv('datasets/team_shooting_23_24.csv',index=False)

# 7) Player Bios

In [22]:
concatenated_df_player_bios = pd.DataFrame()
for i in list_years:
    url_player_bios = f'https://stats.nba.com/stats/leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season={i}&SeasonSegment=&SeasonType=Regular%20Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
    res_player_bios = r.get(url=url_player_bios, headers=headers_scraping).json()
    headers = res_player_bios['resultSets'][0]['headers']
    data_player_bios = res_player_bios['resultSets'][0]['rowSet']

    df_player_bios = pd.DataFrame(data_player_bios, columns=headers)
    df_player_bios['date'] = i
    concatenated_df_player_bios = pd.concat([concatenated_df_player_bios, df_player_bios], ignore_index=True)

In [23]:
concatenated_df_player_bios.to_csv('datasets/player_bios_23_24.csv',index=False)

# 8) Regular Season Standings

In [24]:
concatenated_df_regular_season_standings = pd.DataFrame()
for i in list_years:
    url_regular_season_standings = f'https://stats.nba.com/stats/leaguestandingsv3?GroupBy=conf&LeagueID=00&Season={i}&SeasonType=Regular%20Season&Section=overall'
    res_regular_season_standings = r.get(url=url_regular_season_standings, headers=headers_scraping).json()
    headers = res_regular_season_standings['resultSets'][0]['headers']
    data_regular_season_standings = res_regular_season_standings['resultSets'][0]['rowSet']

    df_regular_season_standings = pd.DataFrame(data_regular_season_standings, columns=headers)
    df_regular_season_standings['date'] = i
    concatenated_df_regular_season_standings = pd.concat([concatenated_df_regular_season_standings, df_regular_season_standings], ignore_index=True)

In [25]:
important_cols = ['TeamID',
                  'TeamCity',
                  'TeamName',
                  'Conference',
                  'PlayoffRank',
                  'WINS',
                  'LOSSES',
                  'WinPCT',
                  'HOME', 
                  'ROAD',
                  'date']

In [26]:
concatenated_df_regular_season_standings

concatenated_df_regular_season_standings = concatenated_df_regular_season_standings[concatenated_df_regular_season_standings.columns[concatenated_df_regular_season_standings.columns.isin(important_cols)]]

In [27]:
concatenated_df_regular_season_standings.to_csv('datasets/regular_season_standings_23_24.csv',index=False)

## Players
Junção dos dados e filtro de algumas colunas

### Colunas que serão mantidas
- PLAYER_ID: Unique identifier for the player
- PLAYER_NAME: Full name of the player
- GP: Games Played
- PTS_PER_GP: Points per Game
- IMC: Impact Metric Composite (custom or advanced performance metric)
- NET_RATING: Net Rating (points per 100 possessions difference between offensive and defensive rating)
- OREB_PCT: Offensive Rebounding Percentage
- DREB_PCT: Defensive Rebounding Percentage
- USG_PCT: Usage Percentage
- TS_PCT: True Shooting Percentage
- FGM: Field Goals Made
- FGA: Field Goals Attempted
- FG%: Field Goal Percentage
- AST: Assists
- TOV: Turnovers
- PF: Personal Fouls
- TEAM_ABBREVIATION: Team Abbreviation (short code representing the player's team)
- FG_PCT_Restricted_Area: Field Goal Percentage in the Restricted Area
- FG_PCT_In_The_Paint_(Non-RA): Field Goal Percentage in the Paint (Non-Restricted Area)
- FG_PCT_Left_Corner_3: Field Goal Percentage from the Left Corner Three
- FG_PCT_Right_Corner_3: Field Goal Percentage from the Right Corner Three
- FG_PCT_Mid-Range: Field Goal Percentage from Mid-Range
- FG_PCT_Above_the_Break_3: Field Goal Percentage from Above the Break Three
- AST_PER_GP: Assists per Game
- STL_PER_GP: Steals per Game
- BLK_PER_GP: Blocks per Game
- MIN_PER_GP: Minutes Played per Game

In [28]:
def height_to_meter(american_height):    
    parts = american_height.split('-')
    feet = int(parts[0])
    inches = int(parts[1])
    height_in_meters = (feet * 0.3048) + (inches * 0.0254)
    return round(height_in_meters,2)

def pounds_to_kilos(weight_in_pounds):
    try:
        return round(float(weight_in_pounds) * 0.453592, 2)
    except:
        return None

In [29]:
concatenated_df_player_bios = concatenated_df_player_bios[
    ['PLAYER_ID', 'PLAYER_NAME', 'GP', 'PTS', 'REB', 'AST', 'NET_RATING', 'OREB_PCT', 'DREB_PCT', 'USG_PCT', 'TS_PCT', 'PLAYER_HEIGHT', 'PLAYER_WEIGHT']
]

concatenated_df_player_bios['PLAYER_HEIGHT'] = concatenated_df_player_bios['PLAYER_HEIGHT'].apply(height_to_meter)
concatenated_df_player_bios['PLAYER_WEIGHT'] = concatenated_df_player_bios['PLAYER_WEIGHT'].apply(pounds_to_kilos)
concatenated_df_player_bios['PTS_PER_GP'] = (concatenated_df_player_bios['PTS'] / concatenated_df_player_bios['GP']).round(2)
concatenated_df_player_bios['IMC'] = round(concatenated_df_player_bios['PLAYER_WEIGHT'] / (concatenated_df_player_bios['PLAYER_HEIGHT'] ** 2), 2)

concatenated_df_player_bios = concatenated_df_player_bios.drop(columns=['PTS', 'PLAYER_WEIGHT', 'PLAYER_HEIGHT', 'AST'])

concatenated_df_players_stats_23_24 = concatenated_df_players_stats_23_24[
    ['PLAYER_ID', 'PLAYER', 'MIN', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'FGM', 'FGA', 'FG_PCT']
].rename(columns={
    'PLAYER': 'PLAYER_NAME',
    'FG_PCT': 'FG%'
})

players = pd.merge(concatenated_df_player_bios, concatenated_df_players_stats_23_24, on=['PLAYER_ID', 'PLAYER_NAME'], how='outer')
players = players.loc[:, ['PLAYER_ID', 'PLAYER_NAME', 'GP', 'PTS_PER_GP', 'IMC', 'NET_RATING',
                          'OREB_PCT', 'DREB_PCT', 'USG_PCT', 'TS_PCT',
                          'FGM', 'FGA', 'FG%', 'MIN', 'AST', 'STL', 'BLK', 'TOV', 'PF']]

shooting_cols = [
    'PLAYER_ID_default', 'PLAYER_NAME_default', 'TEAM_ABBREVIATION_default',
    'FG_PCT_Restricted Area', 'FG_PCT_In The Paint (Non-RA)',
    'FG_PCT_Left Corner 3', 'FG_PCT_Right Corner 3',
    'FG_PCT_Mid-Range', 'FG_PCT_Above the Break 3'
]

shooting_df = concatenated_df_players_shooting[shooting_cols].rename(columns={
    'PLAYER_ID_default': 'PLAYER_ID',
    'PLAYER_NAME_default': 'PLAYER_NAME',
    'TEAM_ABBREVIATION_default': 'TEAM_ABBREVIATION',
    'FG_PCT_Restricted Area': 'FG_PCT_Restricted_Area',
    'FG_PCT_In The Paint (Non-RA)': 'FG_PCT_In_The_Paint_(Non-RA)',
    'FG_PCT_Left Corner 3': 'FG_PCT_Left_Corner_3',
    'FG_PCT_Right Corner 3': 'FG_PCT_Right_Corner_3',
    'FG_PCT_Above the Break 3': 'FG_PCT_Above_the_Break_3'
})

players = pd.merge(players, shooting_df, on=['PLAYER_ID', 'PLAYER_NAME'], how='left')

players['AST_PER_GP'] = players['AST'] / players['GP']
players['STL_PER_GP'] = players['STL'] / players['GP']
players['BLK_PER_GP'] = players['BLK'] / players['GP']
players['MIN_PER_GP'] = players['MIN'] / players['GP']


for i in players.columns:
    if 'PCT' in str(i) or '%' in str(i):
        players[i] *= 100

players = players.drop(columns=['STL', 'BLK', 'MIN'])

In [30]:
players.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,GP,PTS_PER_GP,IMC,NET_RATING,OREB_PCT,DREB_PCT,USG_PCT,TS_PCT,...,FG_PCT_Restricted_Area,FG_PCT_In_The_Paint_(Non-RA),FG_PCT_Left_Corner_3,FG_PCT_Right_Corner_3,FG_PCT_Mid-Range,FG_PCT_Above_the_Break_3,AST_PER_GP,STL_PER_GP,BLK_PER_GP,MIN_PER_GP
0,2544,LeBron James,71,25.66,26.72,3.8,2.6,17.2,28.5,63.0,...,73.3,41.8,33.3,56.5,32.7,40.6,8.295775,1.253521,0.535211,35.267606
1,101108,Chris Paul,58,9.19,23.7,2.8,1.1,12.6,15.4,54.4,...,85.7,52.9,22.7,26.7,44.7,41.1,6.775862,1.206897,0.103448,26.396552
2,200768,Kyle Lowry,60,8.12,26.55,-0.3,2.0,9.8,12.8,59.4,...,58.8,46.2,45.5,38.1,45.8,38.3,4.216667,0.983333,0.35,28.166667
3,200782,P.J. Tucker,31,1.68,28.93,-1.3,5.9,11.6,5.2,50.7,...,57.1,0.0,33.3,40.0,50.0,0.0,0.516129,0.516129,0.225806,15.677419
4,201142,Kevin Durant,75,27.09,24.45,4.8,1.6,15.5,28.3,62.6,...,77.4,50.2,50.0,44.1,51.8,40.1,5.04,0.92,1.213333,37.213333


In [31]:
players = players.dropna()

In [32]:
players.to_csv('datasets/players.csv',index=False)

## Teams
Junção dos dados e filtro de algumas colunas

### Colunas que serão mantidas
- FG%: Field Goal Percentage
- FG_PCT_Restricted_Area_PER_GP: Field Goal Percentage in the Restricted Area per Game
- FG_PCT_In_The_Paint_(Non-RA)_PER_GP: Field Goal Percentage in the Paint (Non-Restricted Area) per Game
- FG_PCT_Left_Corner_3_PER_GP: Field Goal Percentage from Left Corner 3 per Game
- FG_PCT_Right_Corner_3_PER_GP: Field Goal Percentage from Right Corner 3 per Game
- FG_PCT_Mid-Range_PER_GP: Field Goal Percentage from Mid-Range per Game
- FG_PCT_Above_the_Break_3_PER_GP: Field Goal Percentage from Above the Break 3 per Game
- WinPCT: Winning Percentage in the Season
- HOME: Winning Percentage at Home
- ROAD: Winning Percentage Away
- GP: Games Played
- W: Wins
- L: Losses
- TEAM_ID: Team Identifier
- TEAM_NAME: Team Name

In [33]:
df_stats = concatenated_df_team_stats[
    ['TEAM_ID', 'TEAM_NAME', 'GP', 'W', 'L', 'EFG_PCT']
].rename(columns={
    'EFG_PCT': 'FG%'
})

df_shooting = concatenated_df_team_shooting[
    ['TEAM_ID_default',
     'FG_PCT_Restricted Area',
     'FG_PCT_In The Paint (Non-RA)',
     'FG_PCT_Left Corner 3',
     'FG_PCT_Right Corner 3',
     'FG_PCT_Mid-Range',
     'FG_PCT_Above the Break 3']
].rename(columns={
    'TEAM_ID_default': 'TEAM_ID',
    'FG_PCT_Restricted Area': 'FG_PCT_Restricted_Area_PER_GP',
    'FG_PCT_In The Paint (Non-RA)': 'FG_PCT_In_The_Paint_(Non-RA)_PER_GP',
    'FG_PCT_Left Corner 3': 'FG_PCT_Left_Corner_3_PER_GP',
    'FG_PCT_Right Corner 3': 'FG_PCT_Right_Corner_3_PER_GP',
    'FG_PCT_Mid-Range': 'FG_PCT_Mid-Range_PER_GP',
    'FG_PCT_Above the Break 3': 'FG_PCT_Above_the_Break_3_PER_GP'
})

df_extra = concatenated_df_regular_season_standings[
    ['TeamID', 'WinPCT', 'HOME', 'ROAD']
].rename(columns={
    'TeamID': 'TEAM_ID'
})

df_extra[['HOME_success', 'HOME_attempt']] = df_extra['HOME'].str.split('-', expand=True)
df_extra[['ROAD_success', 'ROAD_attempt']] = df_extra['ROAD'].str.split('-', expand=True)

df_extra['HOME_success'] = pd.to_numeric(df_extra['HOME_success'])
df_extra['HOME_attempt'] = pd.to_numeric(df_extra['HOME_attempt'])
df_extra['ROAD_success'] = pd.to_numeric(df_extra['ROAD_success'])
df_extra['ROAD_attempt'] = pd.to_numeric(df_extra['ROAD_attempt'])

df_extra['HOME_efficiency'] = round((df_extra['HOME_success'] / (df_extra['HOME_success'] + df_extra['HOME_attempt'])) * 100, 2)
df_extra['ROAD_efficiency'] = round((df_extra['ROAD_success'] / (df_extra['ROAD_success'] + df_extra['ROAD_attempt'])) * 100, 2)

df_extra['HOME'] = df_extra['HOME_efficiency']
df_extra['ROAD'] = df_extra['ROAD_efficiency']

df_extra = df_extra[['TEAM_ID', 'WinPCT', 'HOME', 'ROAD']]

teams = pd.merge(df_stats, df_shooting, on='TEAM_ID', how='left')
teams = pd.merge(teams, df_extra, on='TEAM_ID', how='left')

for col in teams.columns:
    if 'PCT' in col or '%' in col:
        teams[col] = round(teams[col] * 100, 2)

In [34]:
teams.head(3)

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,FG%,FG_PCT_Restricted_Area_PER_GP,FG_PCT_In_The_Paint_(Non-RA)_PER_GP,FG_PCT_Left_Corner_3_PER_GP,FG_PCT_Right_Corner_3_PER_GP,FG_PCT_Mid-Range_PER_GP,FG_PCT_Above_the_Break_3_PER_GP,WinPCT,HOME,ROAD
0,1610612737,Atlanta Hawks,82,36,46,53.9,62.3,44.0,36.0,40.4,46.8,36.0,43.9,51.22,36.59
1,1610612738,Boston Celtics,82,64,18,57.8,70.3,45.4,42.1,43.9,43.1,37.7,78.0,90.24,65.85
2,1610612751,Brooklyn Nets,82,32,50,53.1,64.5,40.7,42.4,39.9,38.6,34.6,39.0,48.78,29.27


In [35]:
teams.to_csv('datasets/teams.csv',index=False)