Import and read file as df_nba

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df_nba = pd.read_csv('datasets/nba_final.csv')

## Preview dataset

In [None]:
df_nba.head()

## Dataset overview

In [None]:
df_nba.describe()

## Best player per position

In [None]:
def top_player_points(df):
    df = df.groupby('Pos').apply(lambda x: x.sort_values('PTS', ascending=False).head(1)).reset_index(drop=True)

    # Gráfico de barras para os principais 5 jogadores por 
    fig, ax = plt.subplots(figsize=(10, 6))

    for position, group in df.groupby('Pos'):
        ax.bar(group['Player'], group['PTS'], label=f"{position} - PTS: {group['PTS'].values[0]}, AST: {group['AST'].values[0]}, TRB: {group['TRB'].values[0]}")

    ax.set_xlabel('Jogadores')
    ax.set_ylabel('Pontos (PTS)')
    ax.set_title('Principais jogadores por posição em termos de pontos (PTS), assistências (AST) e rebotes (TRB)')
    ax.legend()
    plt.show()

In [None]:
top_player_points(df_nba)

In [None]:
def top_player_assits(df):
    df = df.groupby('Pos').apply(lambda x: x.sort_values('AST', ascending=False).head(1)).reset_index(drop=True)

    # Gráfico de barras para os principais jogadores por posição em termos de assistências
    fig, ax = plt.subplots(figsize=(10, 6))

    for position, group in df.groupby('Pos'):
        ax.bar(group['Player'], group['AST'], label=f"{position} - PTS: {group['PTS'].values[0]}, AST: {group['AST'].values[0]}, TRB: {group['TRB'].values[0]}")

    ax.set_xlabel('Jogadores')
    ax.set_ylabel('Assistências (AST)')
    ax.set_title('Principais jogadores por posição em termos de assistências (AST)')
    ax.legend()
    plt.show()

In [None]:
top_player_assits(df_nba)

In [None]:
def top_player_rebounds(df):
    df = df.groupby('Pos').apply(lambda x: x.sort_values('TRB', ascending=False).head(1)).reset_index(drop=True)

    # Gráfico de barras para os principais jogadores por posição em termos de rebotes
    fig, ax = plt.subplots(figsize=(10, 6))

    for position, group in df.groupby('Pos'):
        ax.bar(group['Player'], group['TRB'], label=f"{position} - PTS: {group['PTS'].values[0]}, AST: {group['AST'].values[0]}, TRB: {group['TRB'].values[0]}")

    ax.set_xlabel('Jogadores')
    ax.set_ylabel('Rebotes (TRB)')
    ax.set_title('Principais jogadores por posição em termos de rebotes (TRB)')
    ax.legend()
    plt.show()

In [None]:
top_player_rebounds(df_nba)

## Top 10 salaries

In [None]:
def top10_highest_salary(df):
    df = df.sort_values(by=['Salario_2022/2023'], ascending=False).head(10)
    
    # Multiplicar os valores do eixo y por 1.000.000
    df['Salario_2022/2023'] = df['Salario_2022/2023']
    
    # Gráfico de barras para os 10 jogadores com o salário mais alto
    fig, ax = plt.subplots(figsize=(10, 6))

    ax.bar(df['Player'], df['Salario_2022/2023'])

    ax.set_xlabel('Jogadores')
    ax.set_ylabel('Salário em 10^7$ (USD)')
    ax.set_title('10 jogadores com o salário mais alto')
    plt.xticks(rotation=90)
    plt.show()

In [None]:
top10_highest_salary(df_nba)

## Number players for each franchise

In [None]:
def count_players_franchise(df):
    
    # count the number of players for each tm and plot it
    df = df['Tm'].value_counts().reset_index()
    df.columns = ['Franchise', 'Number of Players']
    
    # graph with the number of players for each team
    fig, ax = plt.subplots(figsize=(10, 6))
    
    ax.bar(df['Franchise'], df['Number of Players'])
    
    ax.set_xlabel('Franchise')
    ax.set_ylabel('Number of players')
    ax.set_title('Number of players for each franchise')
    
    plt.xticks(rotation=90)
    plt.show()

In [None]:
count_players_franchise(df_nba)

## Percentage positions in the legue

In [None]:
def postions_per(df):
    
    # for each postion get percentage in all players and plot it in a pie chart
    
    df = df['Pos'].value_counts().reset_index()
    
    df.columns = ['Position', 'Number of Players']
    
    # graph with the number of players for each team
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    ax.pie(df['Number of Players'], labels=df['Position'], autopct='%1.1f%%', shadow=True, startangle=90)
    
    ax.set_title('Percentage of players for each position')
    
    plt.show()

In [None]:
postions_per(df_nba)

## Mean of weight and height distribution for each position 

In [None]:
def mean_weight(df):
    
    # for each position get mean and weight and do 2 plots for each one by position
    
    df = df.groupby('Pos')['Weight'].mean().reset_index()
    
    # graph with the number of players for each team
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    ax.bar(df['Pos'], df['Weight'])
    
    ax.set_xlabel('Position')
    
    ax.set_ylabel('Weight')
    
    ax.set_title('Mean weight for each position')
    
    plt.show()
    
    print(df)

In [None]:
mean_weight(df_nba)

In [None]:
def mean_height(df):
    
    # for each position get mean and height and do 2 plots for each one by position
    
    df = df.groupby('Pos')['Height'].mean().reset_index()
    
    # graph with the number of players for each team
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    ax.bar(df['Pos'], df['Height'])
    
    ax.set_xlabel('Position')
    
    ax.set_ylabel('Height')
    
    ax.set_title('Mean height for each position')
    
    plt.show()
    
    #and print values for each positon 
    
    print(df)

In [None]:
mean_height(df_nba)

## Information of the dataset

In [None]:
df_nba.info()