In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Hello everyone! In this study, we examine some information about the NBA 16/17 season. Player stats, earned salaries, and a single player enough to make the team a champion? We will cover all of them with images and statistical graphics.

<p><img style="float: top;margin: max-width:700px" src="https://media0.giphy.com/media/3oKIPkYaXWNQftk3cY/giphy.gif?cid=ecf05e478ny15npfz8cgkbd2b4vy24lf6qzsxm9go1j0tlq9&rid=giphy.gif"></p>

<a id='ldata'></a>
# <h1 style="background-color:skyblue; font-family:newtimeroman; font-size:250%; text-align:center; border-radius: 15px 50px;">Load data 📚</h1>

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from pandas_profiling import ProfileReport
import pandas.util.testing as tm
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [None]:
data = pd.read_csv('/kaggle/input/social-power-nba/nba_2017_br.csv')
data.head()

In [None]:
df_2 = pd.read_csv('/kaggle/input/social-power-nba/nba_2017_salary.csv')
df_2.head()

In [None]:
df_2.columns = ['Player', 'Pos', 'Tm', 'Salary']

In [None]:
df = data.merge(df_2, how='inner', on='Player')

In [None]:
df.head()

In [None]:
df.drop(['Rk', 'Pos_y', 'Tm_y'], axis=1, inplace=True)

In [None]:
def check_df(dataframe):
    print(dataframe.info())
    print('*'*50)
    print(dataframe.head())
    print('*'*50)
    print(dataframe.tail())
    print('*'*50)
    print(dataframe.describe().T)
    print('*'*50)
    print(dataframe.isnull().sum())

In [None]:
check_df(df)

In [None]:
df.dropna(inplace=True)

In [None]:
df.head()

In [None]:
df.rename(columns={'Pos_x': 'POSITIONS', 'Tm_x': 'TEAM'}, inplace=True)

In [None]:
point_guards = df[df['POSITIONS'] == 'PG']
point_guards = point_guards.sort_values(by='PS/G', ascending=False)

In [None]:
shooting_guards = df[df['POSITIONS'] == 'SG']
shooting_guards = shooting_guards.sort_values(by='PS/G', ascending=False)

In [None]:
power_forwards = df[df['POSITIONS'] == 'PF']
power_forwards = power_forwards.sort_values(by='PS/G', ascending=False)

In [None]:
small_forwards = df[df['POSITIONS'] == 'SF']
small_forwards = small_forwards.sort_values(by='PS/G', ascending=False)

In [None]:
centers = df[df['POSITIONS'] == 'C']
centers = centers.sort_values(by='PS/G', ascending=False)

In [None]:
cols = [shooting_guards, centers, power_forwards, small_forwards, point_guards]
for i in cols:
   print(i.groupby(['Player','TEAM']).agg({'PS/G':'mean',
                                            'AST':'mean',
                                            'FG%':'mean',
                                            '3P%':'mean'}).sort_values(by='PS/G', ascending=False).head(10))

<a id='ldata'></a>
# <h1 style="background-color:skyblue; font-family:newtimeroman; font-size:250%; text-align:center; border-radius: 15px 50px;">Salary Analysis 💵</h1>

In [None]:
# Let's examine salary distributions according to positions.
sns.set(style="whitegrid")
sns.stripplot(x="Salary", y="POSITIONS", order=['PG','SG','SF','PF','C'], data=df, alpha=.50)
sns.pointplot(x="Salary", y="POSITIONS", order=['PG','SG','SF','PF','C'], data=df, palette="dark", markers="d")
plt.show()

# Top 5 Shooting Guards with the highest salary

In [None]:
shooting_guards.groupby(['Player', 'TEAM']).mean().sort_values(by='Salary', ascending=False).head(5)

In [None]:
plt.figure(figsize=(15,5))
sns.barplot(x='TEAM', y='Salary', data=shooting_guards);
plt.title('Shooting Guards Salary');

**The Chicago Bulls and Washington Wizards appear to be the teams with the highest salary in the PG position. We can comment that the star players in the team with the graphic are in this position. If we look at the Chicago Bulls squad, a star like Dwyane Wade stands out. Bradley Beal stands out in the Washington Wizards team, close to Dwyane Wade.**



<p><img style="float: left;margin:50px 50px 50px 50px; max-width:400px" src="https://www.gannett-cdn.com/-mm-/14e092b8d6566eb720d7e25051d0eb8a245c6d17/c=0-75-2917-1723/local/-/media/2016/12/31/USATODAY/usatsports/wade-2017.jpg?width=2917&height=1648&fit=crop&format=pjpg&auto=webp"></p>


<p><img style="float: right;margin:50px 50px 50px 50px; max-width:400px" src="https://i.ytimg.com/vi/em1EZetSrw4/maxresdefault.jpg"></p>


# Top 5 Point Guards with the highest salary

In [None]:
point_guards.groupby(['Player', 'TEAM']).mean().sort_values('Salary',ascending=False).head(5)

In [None]:
plt.figure(figsize=(15,5))
sns.barplot(x='TEAM', y='Salary', data=point_guards);
plt.title('Point Guards Salary');

**Oklahoma City Thunder = 'Russell Westbrook', Houston Rockets = 'James Harden' and Memphis Grizzles = 'Mike Conley' are the highest Point Guards, notable here is The two stars of the final 'Kyrie Irving' and
'Stephen Curry'  are not in the top 5 in the highest salary category.**



<p><img style="float: left;margin:50px 50px 50px 50px; max-width:400px" src="https://images.eurohoops.net/2017/11/mike-conley-memphis-grizzlies-e1511011598854-625x375.jpg"></p>

<p><img style="float: right;margin:50px 50px 50px 70px; max-width:400px" src="https://images2.minutemediacdn.com/image/fetch/c_fill,g_auto,f_auto,h_2133,w_3200/https%3A%2F%2Fthunderousintentions.com%2Fwp-content%2Fuploads%2Fgetty-images%2F2018%2F04%2F944012958-oklahoma-city-thunder-v-miami-heat.jpg.jpg"></p>

<p><img style="float: left;margin:50px 50px 50px 50px; max-width:400px" src="https://a4.espncdn.com/combiner/i?img=%2Fphoto%2F2017%2F0331%2Fr195541_1296x729_16%2D9.jpg"></p>

# Top 5 Small Forwards with the highest salary

In [None]:
small_forwards.groupby(['Player', 'TEAM']).mean().sort_values('Salary',ascending=False).head(5)

In [None]:
plt.figure(figsize=(12,5))
sns.barplot(x='TEAM', y='Salary', data=small_forwards);
plt.title('Small Forwards Salary');

**The Cleveland Cavaliers are at the top by far because they have a superstar like 'LeBron James'.**

<p><img style="float: center;margin:50px 50px 50px 50px; max-width:750px" src="https://api.time.com/wp-content/uploads/2018/10/lebron-james-student-athlete.jpg"></p>

# Top 5 Power Forwards with the highest salary

In [None]:
power_forwards.groupby(['Player', 'TEAM']).mean().sort_values('Salary',ascending=False).head(5)

In [None]:
plt.figure(figsize=(12,5))
sns.barplot(x='TEAM', y='Salary', data=power_forwards);
plt.title('Power Forwards Salary');

**Dirk Nowitzki, one of the first names that come to mind in the Power Forwards position, is the only reason Dallas ranks first in salary..**
<p><img style="float: center;margin:50px 50px 50px 50px; max-width:600px" src="https://cdn.vox-cdn.com/thumbor/YPE7JuHf4H2zceVV5wHfSMXsL8M=/0x0:3504x2332/1200x800/filters:focal(1164x838:1724x1398)/cdn.vox-cdn.com/uploads/chorus_image/image/55436741/usa_today_10006778.0.jpg"></p>

# Top 5 Centers with the highest salary

In [None]:
centers.groupby(['Player', 'TEAM']).mean().sort_values('Salary',ascending=False).head(5)

In [None]:
plt.figure(figsize=(12,5))
sns.barplot(x='TEAM', y='Salary', data=centers);
plt.title('Centers Salary');

**We can observe that the top 3 teams with the best centers in the NBA are very close to each other in terms of salaries.**

<p><img style="float: left;margin:50px 50px 50px 50px; max-width:400px" src="https://images2.minutemediacdn.com/image/fetch/w_736,h_485,c_fill,g_auto,f_auto/https%3A%2F%2Fchowderandchampions.com%2Fwp-content%2Fuploads%2Fgetty-images%2F2017%2F08%2F673583652-chicago-bulls-v-boston-celtics-game-five.jpg.jpg"></p>
    
<p><img style="float: right;margin:50px 50px 50px 50px; max-width:400px" src="https://cdn.sporx.com/img/59/2017/howard.jpg"></p>

<p><img style="float: left;margin:50px 50px 50px 50px; max-width:400px" src="https://i.ytimg.com/vi/cGJRcDhGHwI/maxresdefault.jpg"></p>

In [None]:
sf = df.groupby(['Salary']).agg({'PS/G': 'mean'}).reset_index()

In [None]:
# Average points per match by salary

fig = px.bar(
    sf,
    x='Salary',
    y='PS/G',
    animation_frame='Salary',
    range_y=[0, 40],
)
fig.update_xaxes(type='category')
fig.update_xaxes(categoryorder='category ascending')
fig.show()

In [None]:
best_salry = df.groupby('Player').agg({'Salary':'mean'}).sort_values('Salary', ascending=False).head(30).reset_index()

In [None]:
fig = go.Figure()
    
fig.add_trace(go.Scatter(
    x=best_salry['Player'], 
    y=best_salry['Salary'], 
    mode='lines+markers',
))
fig.update_layout(title="The highest salary players (Millions)")
fig.update_xaxes(type='category')
# fig.update_xaxes(categoryorder='total descending')
fig.show()

<a id='ldata'></a>
# <h1 style="background-color:skyblue; font-family:newtimeroman; font-size:250%; text-align:center; border-radius: 15px 50px;">Performance Analysis 🏀</h1>

In [None]:
# Groupby by the number of matches played
df.groupby(['Player']).agg({'G':'mean'}).sort_values('G', ascending=False).head(20).reset_index()

In [None]:
perf_list = ['G', 'PS/G', 'FG', '3P', 'AST', 'BLK', 'STL', 'ORB', 'DRB', 'FT']

In [None]:
game_perf = df.groupby(['Player']).agg({'G' : 'mean',
                                        'PS/G': 'mean',
                                        'FG': 'mean',
                                        '3P':'mean',
                                        'AST': 'mean',
                                        'BLK': 'mean',
                                        'STL': 'mean',
                                        'ORB': 'mean',
                                        'DRB':'mean', 
                                        'FT': 'mean'}).sort_values('PS/G', ascending=False).head(20).reset_index()
game_perf.head()

In [None]:
fig  = go.Figure()
fig.add_trace(go.Pie(
    labels=game_perf['Player'], 
    values=df['PS/G'],
))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title="Percent of Players Points/Game")
fig.show();

In [None]:
fig = go.Figure()
    
fig.add_trace(go.Scatter(
    x=game_perf['Player'], 
    y=game_perf['PS/G'], 
    mode='lines+markers',
))
fig.update_layout(title="Average score per match")
fig.update_xaxes(type='category')
fig.update_xaxes(categoryorder='total descending')
fig.show()

In [None]:
# Distribution of points and assists per match
sns.scatterplot(x= df['PS/G'], y=df.AST);

In [None]:
# Points per game and points from free throws
sns.scatterplot(x= df['PS/G'], y=df.FT);

In [None]:
# Best Rookies 
df[df['Age'] == 19].sort_values('PS/G', ascending=False)

# The most successful players of the season

<img style="float: left;margin:50px 50px 50px 50px; max-width:500px" src="https://pbs.twimg.com/media/DDS-gzcXYAAqUha.jpg">


In [None]:
mvp = df.groupby('Player').mean().sort_values('PS/G', ascending=False).head(1).reset_index()
rpg_lead = df.groupby('Player').mean().sort_values('TRB', ascending=False).head(1).reset_index()
ppg_lead = df.groupby('Player').mean().sort_values('PS/G', ascending=False).head(1).reset_index()
apg_lead = df.groupby('Player').mean().sort_values('AST', ascending=False).head(1).reset_index()

In [None]:
print('Seasson Most Valuable Player: ' + mvp['Player'])
print('*'*50)
print('Seasson RPG Leader: ' + rpg_lead['Player'])
print('*'*50)
print('Seasson PPG Leader: ' + ppg_lead['Player'])
print('*'*50)
print('Seasson APG Leader: ' + apg_lead['Player'])
print('*'*50)

# <h1 style="background-color:skyblue; font-family:newtimeroman; font-size:250%; text-align:center; border-radius: 15px 50px;">2017 Finals 🏆</h1>

<p><img style="float: ;margin:; max-width:700px" src="https://stat.ameba.jp/user_images/20170603/11/1952-nisi/44/5b/j/o0300046313952062999.jpg?caw=800"></p>

In [None]:
cle = df[df['TEAM'] == 'CLE']
cle

<p><img style="float: left;margin:50px 50px 50px 50px; max-width:750px" src="https://www.spoura.com/wp-content/uploads/2016/11/cleveland.png"></p>

In [None]:
gsw = df[df['TEAM'] == 'GSW']
gsw

<p><img style="float: left;margin:50px 50px 50px 50px; max-width:750px" src="https://www.spoura.com/wp-content/uploads/2016/11/golden-state.png"></p>

### Let's look at the statistics of the teams

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
        x=cle['Player'], 
        y=cle['PS/G'], 
        mode='lines+markers',
        ))
fig.update_layout(title="Cleveland Cavaliers Players PS/G")
fig.update_xaxes(type='category')
fig.update_xaxes(categoryorder='total descending')
fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
        x=gsw['Player'], 
        y=gsw['PS/G'], 
        mode='lines+markers',
        ))
fig.update_layout(title="Golden State Warriors Players PS/G")
fig.update_xaxes(type='category')
fig.update_xaxes(categoryorder='total descending')
fig.show()

In [None]:
# Total team average
cle['PS/G'].sum()

In [None]:
# Total team average
gsw['PS/G'].sum()

In [None]:
fig  = go.Figure()
fig.add_trace(go.Pie(
    labels=cle['Player'], 
    values=cle['3P'],
))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title="Cleveland Cavaliers Players 3 Points Per Match")
fig.show();

In [None]:
fig  = go.Figure()
fig.add_trace(go.Pie(
    labels=gsw['Player'], 
    values=gsw['3P'],
))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title="Golden State Warriors Players 3 Points Per Match")
fig.show();

In [None]:
fig  = go.Figure()
fig.add_trace(go.Pie(
    labels=cle['Player'], 
    values=cle['FG'],
))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title="Cleveland Cavaliers Players Field Goals")
fig.show();

In [None]:
fig  = go.Figure()
fig.add_trace(go.Pie(
    labels=gsw['Player'], 
    values=gsw['FG'],
))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title="Golden State Warriors Players Field Goals")
fig.show();

### No matter how good Lebron vs Kyrie is in mid-range shooting, the 3-point shots seem to determine the fate of the matches.

In [None]:
cle.groupby(['Player','FG%']).mean().sort_values('FG', ascending=False).reset_index()

In [None]:
gsw.groupby(['Player','FG%']).mean().sort_values('FG', ascending=False).reset_index()

In [None]:
cle.groupby(['Player','3P%']).mean().sort_values('3P', ascending=False).reset_index()

In [None]:
gsw.groupby(['Player','3P%']).mean().sort_values('3P', ascending=False).reset_index()

## Stephen Curry and Klay Thompson's spectacular three-point performance and Kevin Durant's high mid-range performance, Lebron and Kyrie's efforts seem to have not prevented this.


## 2017 NBA Champion - Golden State Warriors



<p><img style="float: left;margin:50px 50px 50px 50px; max-width:750px" src="https://i.gifer.com/27tL.gif"></p>