In [None]:
#Exploracion datos hsitoricos de las temporadas en la NBA

#Datos encontrados en la plataforma kaggle

import numpy as np 
import pandas as pd 

#Realizado con kaggle
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



 **PREPARAR LOS DATOS**

In [None]:
games = pd.read_csv('/kaggle/input/nba-games/games.csv')
games.head()

In [None]:
games.info()

In [None]:
#escoger solo las columnas que se necesitan
games = games[['GAME_DATE_EST', 'GAME_ID', 'GAME_STATUS_TEXT', 'TEAM_ID_home', 'TEAM_ID_away', 'PTS_home', 'PTS_away', 'HOME_TEAM_WINS']]
games.head()

In [None]:
#cambiar el tipo de dato de la fecha del juego
#games.loc[:, 'GAME_DATE_EST'] = pd.to_datetime(games['GAME_DATE_EST'])
games['GAME_DATE_EST'] = pd.to_datetime(games['GAME_DATE_EST'])

In [None]:
#Separar el año de los juegos agregando una nueva columna
games['year'] = games['GAME_DATE_EST'].dt.year
games.head()

In [None]:
#cargar los datos de los equipos
teams = pd.read_csv('/kaggle/input/nba-games/teams.csv')
teams.head()

In [None]:
teams = teams[['TEAM_ID', 'CITY', 'NICKNAME']]
teams.head()

In [None]:
#unir las tablas para descubrir el equipo de casa
home_games = pd.merge(games, teams, left_on ='TEAM_ID_home', right_on = 'TEAM_ID', how = 'inner')
home_games.head()

In [None]:
#renombrar ciertas columnas
home_games = home_games.rename(columns={
    'CITY': 'city_home',
    'NICKNAME': 'nickname_home'
})

home_games.head()

In [None]:
#unir las tablas para descubrir el equipo visitante
both_games = pd.merge(home_games, teams, left_on ='TEAM_ID_away', right_on = 'TEAM_ID', how = 'inner')
both_games.head()

In [None]:
#renombrar ciertas columnas
both_games = both_games.rename(columns={
    'CITY': 'city_home',
    'NICKNAME': 'nickname_away'
})

both_games.head()

In [None]:
both_games = both_games.drop(columns=[
    'TEAM_ID_home', 'TEAM_ID_away', 'TEAM_ID_x', 'TEAM_ID_y'
    
])
both_games.head()

In [None]:
#crear una copoa de esta nueva tabla, para evitar errores
games = both_games.copy()
games.columns = games.columns.str.lower()
games.head()

In [None]:
#total del puntos en un juego
games['total_points'] = games['pts_home'] + games['pts_away']
games.head()

In [None]:
#partidos con mas puntos totales
games.nlargest(n=5, columns='total_points')

In [None]:
#partidos con menos puntos totales
games.nsmallest(n=5, columns='total_points')

**GRAFICOS**

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#victorias de equipos locales y visitantes
total = games['home_team_wins'].value_counts()
total

In [None]:
#grafica de victorias
plt.pie(total.values, labels=['casa', 'visitante'], autopct='%.0f%%')
plt.show()

In [None]:
#Grafica | Puntos por equipo cuando juegan de local
plt.figure(figsize =(20,6))
home_wins = games[games['home_team_wins'] == 1]
result = home_wins.groupby('nickname_home')['pts_home'].aggregate(np.mean).reset_index().sort_values('pts_home')

ax = sns.barplot(data = home_wins, x = 'nickname_home', y = 'pts_home' , order = result['nickname_home'])
ax.bar_label(ax.containers[-1], fmt = '%.0f', label_type = 'center', rotation = 45)
plt.xticks(rotation = 75)
plt.show()

In [None]:
#Grafica | Puntos por equipo cuando juegan de visitante
plt.figure(figsize =(20,6))
away_wins = games[games['home_team_wins'] == 0]
result = away_wins.groupby('nickname_away')['pts_away'].aggregate(np.mean).reset_index().sort_values('pts_away')

ax = sns.barplot(data = away_wins, x = 'nickname_away', y = 'pts_away' , order = result['nickname_away'])
ax.bar_label(ax.containers[-1], fmt = '%.0f', label_type = 'center', rotation = 45)
plt.xticks(rotation = 75)
plt.show()

In [None]:
#Grafica | promedio de puntos totales en los partidos
sns.histplot(data = games, x = 'total_points', bins = 50)

In [None]:
sns.boxplot(x = games['total_points'])

In [None]:
#Grafica | cantidad de partidos ganados como local
plt.figure(figsize =(20,6))
result = home_wins['nickname_home'].value_counts().sort_values()

ax = sns.countplot(x = 'nickname_home', data = home_wins, order =result.index)
ax.bar_label(ax.containers[-1], label_type = 'center', rotation = 45)

plt.xticks(rotation = 75)
plt.show()

In [None]:
#Grafica | cantidad de partidos ganados como visitante
plt.figure(figsize =(20,6))
result = away_wins['nickname_away'].value_counts().sort_values()

ax = sns.countplot(x = 'nickname_away', data = away_wins, order =result.index)
ax.bar_label(ax.containers[-1], label_type = 'center', rotation = 45)

plt.xticks(rotation = 75)
plt.show()

In [None]:
#Cantidad de puntos anotados durante los ultimos años
games.groupby('year')['total_points'].mean()

In [None]:
#Grafica | Cantidad de puntos anotados durante los ultimos años
plt.figure(figsize =(20,6))

ax = sns.lineplot(data = games, x = 'year', y = 'total_points')

plt.show()

In [None]:
# Mayores Victorias como Local en Enfrntamientos
x = home_wins[['nickname_home', 'nickname_away']].value_counts()

ax = x[:10].plot(kind='bar', figsize = [10,5])
ax.set_title('Casa vs Visita')
ax.set_xlabel('Partidos')
ax.set_ylabel('Cantidad')
ax.set_xticklabels(x.index[:10], rotation = 90)


plt.show()

In [None]:
# Mayores Victorias como Visitante en Enfrntamientos
x = away_wins[['nickname_home', 'nickname_away']].value_counts()

ax = x[:10].plot(kind='bar', figsize = [10,5])
ax.set_title('Casa vs Visita')
ax.set_xlabel('Partidos')
ax.set_ylabel('Cantidad')
ax.set_xticklabels(x.index[:10], rotation = 90)


plt.show()

In [None]:
#Imprtar un nuevo archivo
game_details = pd.read_csv('/kaggle/input/nba-games/games_details.csv')
game_details.head()

In [None]:
#Escoger solo las columnas a utilizar
game_details = game_details[['GAME_ID', 'PLAYER_NAME', 'PTS']]
game_details

In [None]:
#Unir ods tablas 
games_complete = pd.merge(game_details, games, left_on = 'GAME_ID', right_on = 'game_id', how = 'inner')
games_complete.head()

In [None]:
#Puntos anotados por jugador
player_pts = games_complete.groupby(['year', 'PLAYER_NAME'])['PTS'].mean().reset_index().sort_values('year')
player_pts.head()

In [None]:
#Top 10 , jugadores cn mas puntos anotados
player_pts[player_pts['year'] == 2022].sort_values(by = 'PTS', ascending = False).head(10)