### Exploratory Visualization

In [None]:
import os
import glob
import time
import pickle
import numpy as np
import pandas as pd
import pandas_profiling
import seaborn as sns
import matplotlib.pyplot as plt
import colorlover as cl
import plotly
import plotly.graph_objs as go
import plotly.plotly as py
from tqdm import tqdm_notebook as tqdm
from datetime        import datetime
from IPython.display import HTML

%matplotlib inline
plotly.offline.init_notebook_mode(connected=False)
pd.set_option("display.max_columns",30)

In [None]:
cwd = os.getcwd().replace('/notebooks','')
data_dir = os.path.join(cwd, 'data')
season = '2017-18'

In [None]:
def add_positions(df):
    
    PG, SG, F, C = [], [], [] ,[]
    
    for i in range(df.shape[0]):
        if 'PG' in df.loc[i,'Pos']:
            PG.append(1)
            SG.append(0)
            F.append(0)
            C.append(0)
            
        elif 'SG' in df.loc[i,'Pos']:
            PG.append(0)
            SG.append(1)
            F.append(0)
            C.append(0)
        
        elif 'C' in df.loc[i,'Pos']:
            PG.append(0)
            SG.append(0)
            F.append(0)
            C.append(1)
            
        else:
            PG.append(0)
            SG.append(0)
            F.append(1)
            C.append(0)

    df['PG'] = PG
    df['SG'] = SG
    df['F'] = F
    df['C'] = C

In [None]:
df = pd.read_csv(os.path.join(data_dir,'Dataframes','clean','df_2017-18.csv'))
df = df.loc[df['Salary'] != 0]
df = df.loc[df['MP'] != 0].reset_index(drop=True)

### Exploring Risk-Return Relationship

In [None]:
salary_latest = {'Date':[], 'Name':[], 'Salary':[], 'Pos':[]}

names = list(set(df['Name']))
df = df.sort_values(by='Date', ascending=False).reset_index(drop=True)

for name in tqdm(names):
    for i in range(df.shape[0]):
        if name == df.loc[i,'Name']:
            if name not in salary_latest['Name']:
                salary_latest['Name'].append(name)
                salary_latest['Salary'].append(df.loc[i,'Salary'])
                salary_latest['Pos'].append(df.loc[i,'Pos'])
                salary_latest['Date'].append(datetime.strptime(str(df.loc[i,'Date']),'%Y%m%d'))
                break

In [None]:
returns = []
risks = []
actual = []

for name in tqdm(names):
    df_tmp = df.loc[df['Name']==name].sort_values(by='Date', ascending=False).reset_index(drop=True)
    actual.append(df_tmp.loc[0,'FPTS'])
    returns.append(df_tmp.loc[1:11,'FPTS'].mean())
    risks.append(df_tmp.loc[1:11,'FPTS'].std())

df_rr = pd.DataFrame({'Name': names, 'Actual': actual, 'Expected Return': returns, 'Risk': risks})
df_rr = pd.merge(df_rr, pd.DataFrame(salary_latest), on='Name')
add_positions(df_rr)
columns = ['Date','Name','Salary', 'Actual','Expected Return', 'Risk', 'Pos', 'PG', 'SG', 'F','C']
df_rr = df_rr.loc[:, columns].dropna().reset_index(drop=True)

In [None]:
data = []
for pos in (['PG', 'SG', 'F', 'C']):
    df_pos = df_rr[df_rr[pos]==1].reset_index(drop=True)
    
    trace = go.Scatter(x=df_pos['Risk'],
                       y=df_pos['Expected Return'], 
                       mode='markers',
                       text=df_pos['Name'],
                       name=pos)
    data.append(trace)

    layout = go.Layout(
        title = '10-game Risk-Return Relationship',
        legend = {"x":0.075, 'y':1, 'borderwidth': 1},
        hovermode = 'closest',
        xaxis = {"title":"Risk (Standard Deviation)"},
        yaxis = {"title":"Expected Return (10-game Average)"},
    )


fig = go.Figure(data=data, layout=layout)

plot_url = py.plot(fig, filename='risk_return')
plotly.offline.iplot(fig)

### Exploring Salary-Return relationship

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [None]:
target_date = pd.to_datetime('20180411', format='%Y%m%d')
df_rr = df_rr[df_rr['Date']==target_date]

In [None]:
data = []
df_rr = df_rr[df_rr['Salary']>=3000]

for pos in (['PG', 'SG', 'F', 'C']):
    df_pos = df_rr[df_rr[pos]==1].reset_index(drop=True)
    
    trace = go.Scatter(x=df_pos['Salary'],
                       y=df_pos['Actual'], 
                       mode='markers',
                       text=df_pos['Name'],
                       name=pos)
    data.append(trace)

    layout = go.Layout(
        title = 'Salary-Return Relationship',
        legend = {"x":0.9, 'y':0.2, 'borderwidth': 1},
        hovermode = 'closest',
        xaxis = {"title":"Salary"},
        yaxis = {"title":"Actual Return"},
    )


reg = LinearRegression()
reg.fit(df_rr['Salary'].values.reshape(-1,1), df_rr['Actual'].values.reshape(-1,1))

x = np.random.randint(2800, 14300, 1000).tolist()
y = reg.intercept_+reg.coef_*x
reg_line = go.Scatter(x=x, 
                      y=y.flatten().tolist(),
                      mode='lines',
                      line={'color':'navy','width':1},
                      name='Reg')
    
data.append(reg_line)

fig = go.Figure(data=data, layout=layout)

plot_url = py.plot(fig, filename='salary-return')
plotly.offline.iplot(fig)

### Visualize the path of Stars and Rookies

In [None]:
def visualize_salary(df, names):
    
    window = 5 
    
    for key in names.keys():
        
        data = []

        for name in names[key]:

            df_focus = df[df['Name']==name].sort_values(by='Date').reset_index(drop=True)
            df_focus['Date'] = [pd.to_datetime(str(date), format='%Y%m%d') for date in df_focus['Date']]

            x = [date for date in df_focus['Date']][window:]
            y = df_focus['Salary'].rolling(window).mean()
            
            name_short = '. '.join([name.split(' ')[0][0], name.split(' ')[1]]) 
            
            scatter = go.Scatter(x=x,
                                 y=y, 
                                 mode='lines',
                                 text=df_focus['Name'],
                                 name=name_short)

            data.append(scatter)


        layout = go.Layout(
            title = 'Evolution of {} ({}-day rolling mean)'.format(key, window),
            legend = {'x':1.02, 'y':0.5, 'borderwidth': 1},
            hovermode = 'closest',
            xaxis = {'title':'Date'},
            yaxis = {'title':'Salary'},
        )


        fig = go.Figure(data=data, layout=layout)
        plotly.offline.iplot(fig)

In [None]:
players = {'Stars': ['LeBron James', 'Russell Westbrook', 'James Harden', 'Anthony Davis', 'Kevin Durant'],
           'Rookies': ['Donovan Mitchell', 'Jayson Tatum', 'Ben Simmons', 'Kyle Kuzma', 'Lonzo Ball', 'Markelle Fultz']
          }
visualize_salary(df, players)

### Visualize Top 10 Performances

In [None]:
def visualize_top_n(df, stats, n):
    
    for stat in stats:
        
        data = []
        
        colors = cl.scales['11']['qual']['Paired']
        positions = list(set(df['Pos']))
        pos_color = {}
        
        for i, pos in enumerate(positions):
            pos_color[pos] = colors[i]
        
        
        df_stat = df.sort_values(by=stat, ascending=False).drop_duplicates(subset='Name').reset_index(drop=True).head(n)
        
        bar = go.Bar(x=df_stat['Name'],
                     y=df_stat[stat],
                     marker={'color': [pos_color[pos] for pos in df_stat['Pos']]},
                     text=[pos for pos in df_stat['Pos']]
                    )
            
        data.append(bar)

        layout = go.Layout(
            title = 'Top 10 in {}'.format(stat),
            legend = {'x':1, 'y':1, 'borderwidth': 1},
            hovermode = 'closest',
            yaxis = {'title':'{}'.format(stat)},
        )

        fig = go.Figure(data=data, layout=layout)
        plotly.offline.iplot(fig)

In [None]:
visualize_top_n(df, ['PTS','3P','AST','TRB','STL','BLK'], 10)