In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Preguntas objetivo

- ¿Cómo se han ganado más partidos, estando en casa o de visitante?
- ¿Qué equipos han hecho la mayor cantidad de puntos, tanto en casa como visitante?
- ¿Cuántos partidos han ganado los distintos equipos estando en casa y de visitante?
- ¿Cómo se ha comportado los puntos anotados a lo largo de los años?
- ¿Contra cuáles equipos han ganado más estando en casa y visitante?

In [None]:
games = pd.read_csv('/kaggle/input/nba-games/games.csv')
games.head()

In [None]:
games.info()

In [None]:
games = games[['GAME_DATE_EST', 'GAME_ID', 'GAME_STATUS_TEXT', 'TEAM_ID_home',
              'TEAM_ID_away', 'PTS_home', 'PTS_away', 'HOME_TEAM_WINS']]
games.head()

In [None]:
games['GAME_DATE_EST'] = pd.to_datetime(games['GAME_DATE_EST'])
#games.loc[:, 'GAME_DATE_EST'] = pd.to_datetime(games['GAME_DATE_EST'])

In [None]:
games.info()

In [None]:
games.head()

In [None]:
games['year'] = games['GAME_DATE_EST'].dt.year
games.head()

In [None]:
# Cargar el conjunto teams
teams = pd.read_csv('/kaggle/input/nba-games/teams.csv')
teams.head()

In [None]:
teams = teams[['TEAM_ID', 'CITY', 'NICKNAME']]
teams.head()

In [None]:
home_games = pd.merge(games, teams, left_on='TEAM_ID_home', right_on='TEAM_ID', how='inner')
home_games.head()

In [None]:
home_games = home_games.rename(columns={
    'CITY': 'city_home',
    'NICKNAME': 'nickname_home'
})
home_games.head()

In [None]:
both_games = pd.merge(home_games, teams, left_on='TEAM_ID_away', right_on='TEAM_ID', how='inner')
both_games.head()

In [None]:
both_games = both_games.rename(columns={
    'CITY': 'city_away',
    'NICKNAME': 'nickname_away'
})
both_games.head()

In [None]:
both_games = both_games.drop(columns=[
    'TEAM_ID_home', 'TEAM_ID_away', 'TEAM_ID_x', 'TEAM_ID_y'
])
both_games.head()

In [None]:
games = both_games.copy()
games.columns = games.columns.str.lower()
games.head()

In [None]:
games['total_points'] = games['pts_home'] + games['pts_away']
games.head()

In [None]:
games.nlargest(n=5, columns='total_points')

In [None]:
games.nsmallest(n=5, columns='total_points')

## Gráficos

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
total = games['home_team_wins'].value_counts()
total

In [None]:
plt.pie(total.values, labels=['Casa', 'Visitante'], autopct='%.0f%%')
plt.show()

In [None]:
## Cantidad de puntos por equipo cuando juegan en casa o de visita
plt.figure(figsize=(20,6))
home_wins = games[games['home_team_wins'] == 1]
result = home_wins.groupby('nickname_home')['pts_home'].aggregate(np.mean).reset_index().sort_values('pts_home')

ax = sns.barplot(data=home_wins, x='nickname_home', y='pts_home', order=result['nickname_home'], errorbar=None)
ax.bar_label(ax.containers[-1], fmt='%.0f', label_type='center', rotation=45)
plt.xticks(rotation=75)
plt.show()

In [None]:
plt.figure(figsize=(20,6))
away_wins = games[games['home_team_wins'] == 0]
result = away_wins.groupby('nickname_away')['pts_away'].aggregate(np.mean).reset_index().sort_values('pts_away')

ax = sns.barplot(data=away_wins, x='nickname_away', y='pts_away', order=result['nickname_away'], errorbar=None)
ax.bar_label(ax.containers[-1], fmt='%.0f', label_type='center', rotation=45)
plt.xticks(rotation=75)
plt.show()

In [None]:
sns.histplot(data=games, x='total_points', bins=50)

In [None]:
sns.boxplot(x=games['total_points'])

In [None]:
plt.figure(figsize=(20,6))

result = home_wins['nickname_home'].value_counts().sort_values()

ax = sns.countplot(x='nickname_home', data=home_wins, order=result.index)
ax.bar_label(ax.containers[-1], label_type='center', rotation=45)

plt.xticks(rotation=75)
plt.show()

In [None]:
plt.figure(figsize=(20,6))

result = away_wins['nickname_away'].value_counts().sort_values()

ax = sns.countplot(x='nickname_away', data=away_wins, order=result.index)
ax.bar_label(ax.containers[-1], label_type='center', rotation=45)

plt.xticks(rotation=75)
plt.show()

In [None]:
games.head()

In [None]:
games.groupby('year')['total_points'].mean()

In [None]:
plt.figure(figsize=(20,6))

ax = sns.lineplot(data=games, x='year', y='total_points')

def format_xlabels(value, pos):
    return str(int(round(value)))

ax.xaxis.set_major_formatter(plt.FuncFormatter(format_xlabels))

plt.show()

In [None]:
x = home_wins[['nickname_home', 'nickname_away']].value_counts()

ax = x[:10].plot(kind="bar", figsize=[10, 5])
ax.set_title('Casa vs visita')
ax.set_xlabel('Partidos')
ax.set_ylabel('cantidad')
ax.set_xticklabels(x.index[:10], rotation = 90)

for p in ax.patches:
    ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width()/2, p.get_height()), ha='center', va='bottom')

plt.show()

In [None]:
x = away_wins[['nickname_home', 'nickname_away']].value_counts()

ax = x[:10].plot(kind="bar", figsize=[10, 5])
ax.set_title('Casa vs visita')
ax.set_xlabel('Partidos')
ax.set_ylabel('cantidad')
ax.set_xticklabels(x.index[:10], rotation = 90)

for p in ax.patches:
    ax.annotate(f'{p.get_height()}', (p.get_x() + p.get_width()/2, p.get_height()), ha='center', va='bottom')

plt.show()

In [None]:
game_details = pd.read_csv('/kaggle/input/nba-games/games_details.csv')
game_details.head()

In [None]:
game_details.info()

In [None]:
game_details = game_details[['GAME_ID', 'PLAYER_NAME', 'PTS']]
game_details

In [None]:
games.info()

In [None]:
games_complete = pd.merge(game_details, games, left_on='GAME_ID', right_on='game_id', how='inner')
games_complete.head()

In [None]:
player_pts = games_complete.groupby(['year', 'PLAYER_NAME'])['PTS'].mean().reset_index().sort_values('year')
player_pts.head(10)

In [None]:
player_pts[player_pts['year'] == 2022].sort_values(by='PTS', ascending=False).head(10)