### Exploratory Data Exploration
Looking at the relationship between fantasy salary and player performance as well as its consistency.  
Sign up with plotly for free and set credientials in your local folder to use plotly (https://plot.ly/python/getting-started/)

In [None]:
import os
import glob
import time

import numpy as np
import pandas as pd
import pandas_profiling

import seaborn as sns
import matplotlib.pyplot as plt

import plotly
import plotly.graph_objs as go
import plotly.plotly as py
import colorlover as cl

from tqdm import tqdm_notebook as tqdm
from datetime        import datetime
from IPython.display import HTML
from sklearn.linear_model import LinearRegression

from constants import DATA_DIR, PLOTLY_USERNAME, PLOTLY_API_KEY

In [None]:
plotly.offline.init_notebook_mode(connected=False)
pd.set_option("display.max_columns",30)

In [None]:
# Specify your own username and api_key in constants.py, only need to be set once
# plotly.tools.set_credentials_file(username=PLOTLY_USERNAME, api_key='PLOTLY_API_KEY')

In [None]:
# Explore the latest season data
season = '2018-19'
df = pd.read_csv(os.path.join(DATA_DIR,'Dataframes','Merged','df_{}.csv'.format(season)))

# Filter out irrelevant rows with no salary or zero minutes played
df = df.loc[df['Salary'] != 0]
df = df.loc[df['MP'] != 0].reset_index(drop=True)

### Preparing Dataframes for Latest Data

In [None]:
# Create a new dataframe based on latest salary information at the end of the 2018-19 season
names = list(set(df['Name']))
df = df.sort_values(by='Date', ascending=False).reset_index(drop=True)

salary_pos = {'Date':[], 'Name':[], 'Salary':[], 'PG':[], 'SG':[], 'F':[], 'C':[]}

for name in tqdm(names):
    for i in range(df.shape[0]):
        if name == df.loc[i,'Name']:
            if name not in salary_pos['Name']:
                salary_pos['Name'].append(name)
                salary_pos['Salary'].append(df.loc[i,'Salary'])
                salary_pos['PG'].append(df.loc[i,'PG'])
                salary_pos['SG'].append(df.loc[i,'SG'])
                salary_pos['F'].append(df.loc[i,'F'])
                salary_pos['C'].append(df.loc[i,'C'])
                salary_pos['Date'].append(datetime.strptime(str(df.loc[i,'Date']),'%Y%m%d'))
                break
                
df_salary_pos = pd.DataFrame(salary_pos)

In [None]:
# Create a dataframe for risk and return over the past 10 games and merge them with salary
returns, risks, actual = [], [], []

for name in tqdm(names):
    df_tmp = df.loc[df['Name']==name].sort_values(by='Date', ascending=False).reset_index(drop=True)
    actual.append(df_tmp.loc[0,'FPTS'])
    returns.append(df_tmp.loc[1:11,'FPTS'].mean())
    risks.append(df_tmp.loc[1:11,'FPTS'].std())

df_rr = pd.DataFrame({'Name': names, 'Actual': actual, 'Expected Return': returns, 'Risk': risks})
df_rr = pd.merge(df_rr, df_salary_pos , on='Name')

columns = ['Date','Name','Salary', 'Actual','Expected Return', 'Risk', 'PG', 'SG', 'F','C']
df_rr = df_rr.loc[:, columns].dropna().reset_index(drop=True)

### Exploring Risk-Return Relationship

In [None]:
data = []

for pos in (['PG', 'SG', 'F', 'C']):
    df_pos = df_rr[df_rr[pos]==1].reset_index(drop=True)
    
    trace = go.Scatter(x=df_pos['Risk'],
                       y=df_pos['Expected Return'], 
                       mode='markers',
                       text=df_pos['Name'],
                       name=pos)
    data.append(trace)

    layout = go.Layout(
        title = 'Risk-Return Relationship (2018-19)',
        legend = {"x":0.075, 'y':1, 'borderwidth': 1},
        hovermode = 'closest',
        xaxis = {"title":"Risk (Standard Deviation)"},
        yaxis = {"title":"Expected Return (10-game Average)"},
    )


fig = go.Figure(data=data, layout=layout)

plot_url = py.plot(fig, filename='risk_return')
plotly.offline.iplot(fig)

### Exploring Expected vs Actual with 10-game average

In [None]:
data = []

for pos in (['PG', 'SG', 'F', 'C']):
    df_pos = df_rr[df_rr[pos]==1].reset_index(drop=True)
    
    trace = go.Scatter(x=df_pos['Expected Return'],
                       y=df_pos['Actual'], 
                       mode='markers',
                       text=df_pos['Name'],
                       name=pos)
    data.append(trace)

    layout = go.Layout(
        title = 'Expected-Actual Relationship (2018-19)',
        legend = {"x":0.075, 'y':1, 'borderwidth': 1},
        hovermode = 'closest',
        xaxis = {"title":"Expected Return (10-game Average)"},
        yaxis = {"title":"Actual FPTS"},
    )

reg = LinearRegression()
reg.fit(df_rr['Expected Return'].values.reshape(-1,1), df_rr['Actual'].values.reshape(-1,1))

x = np.random.randint(0, 65, 1000).tolist()
y = reg.intercept_+reg.coef_*x
reg_line = go.Scatter(x=x, 
                      y=y.flatten().tolist(),
                      mode='lines',
                      line={'color':'navy','width':1},
                      name='Reg')
    
data.append(reg_line)
    
fig = go.Figure(data=data, layout=layout)

plot_url = py.plot(fig, filename='expected_actual')
plotly.offline.iplot(fig)

### Exploring Expected vs Actual with Fantasy Salary Info

In [None]:
data = []
df_sr = df_rr[df_rr['Salary']>=3000]

for pos in (['PG', 'SG', 'F', 'C']):
    df_pos = df_sr[df_sr[pos]==1].reset_index(drop=True)
    # Create a new column to display the name and date (i.e. S.Curry 04/10) 
    df_pos['Name_Date'] = [name[0]+"."+name.split(' ')[1]+" "+date.strftime("%m/%d") \
                           for name, date in zip(df_pos['Name'], df_pos['Date'])]
    
    trace = go.Scatter(x=df_pos['Salary'],
                       y=df_pos['Actual'], 
                       mode='markers',
                       text=df_pos['Name_Date'],
                       name=pos)
    data.append(trace)

    layout = go.Layout(
        title = 'Salary-Return Relationship',
        legend = {"x":0.9, 'y':0.2, 'borderwidth': 1},
        hovermode = 'closest',
        xaxis = {"title":"Salary"},
        yaxis = {"title":"Actual Return"},
    )


reg = LinearRegression()
reg.fit(df_sr['Salary'].values.reshape(-1,1), df_sr['Actual'].values.reshape(-1,1))

x = np.random.randint(2800, 14300, 1000).tolist()
y = reg.intercept_+reg.coef_*x
reg_line = go.Scatter(x=x, 
                      y=y.flatten().tolist(),
                      mode='lines',
                      line={'color':'navy','width':1},
                      name='Reg')
    
data.append(reg_line)

fig = go.Figure(data=data, layout=layout)

plot_url = py.plot(fig, filename='salary-return')
plotly.offline.iplot(fig)

### Visualize the Progression of Stars and Rookies

In [None]:
def visualize_salary(df, names):
    window = 5 
    
    for key in names.keys():
        data = []
        
        for name in names[key]:
            df_focus = df[df['Name']==name].sort_values(by='Date').reset_index(drop=True)
            df_focus['Date'] = [pd.to_datetime(str(date), format='%Y%m%d') for date in df_focus['Date']]

            x = [date for date in df_focus['Date']][window:]
            y = df_focus['Salary'].rolling(window).mean()
            
            name_short = '. '.join([name.split(' ')[0][0], name.split(' ')[1]]) 
            
            scatter = go.Scatter(x=x,
                                 y=y, 
                                 mode='lines',
                                 text=df_focus['Name'],
                                 name=name_short)

            data.append(scatter)

        layout = go.Layout(
            title = 'Evolution of {} ({}-day rolling mean)'.format(key, window),
            legend = {'x':1.02, 'y':0.5, 'borderwidth': 1},
            hovermode = 'closest',
            xaxis = {'title':'Date'},
            yaxis = {'title':'Salary'},
        )

        fig = go.Figure(data=data, layout=layout)
        plotly.offline.iplot(fig)

In [None]:
players = {'Stars': ['LeBron James', 'Russell Westbrook', 'James Harden', 'Anthony Davis', 'Kevin Durant'],
           'Rookies': ['Deandre Ayton', 'Luka Doncic', 'Trae Young', 'Jaren Jackson', 'Collin Sexton', 'Marvin Bagley', 'Mohamed Bamba']
          }
visualize_salary(df, players)

### Visualize Top 10 Performances

In [None]:
def visualize_top_n(df, stats, n):
    for stat in stats:
        data = []
        
        colors = cl.scales['11']['qual']['Paired']
        positions = list(set(df['Pos']))
        pos_color = {}
        
        for i, pos in enumerate(positions):
            pos_color[pos] = colors[i]
        
        
        df_stat = df.sort_values(by=stat, ascending=False).drop_duplicates(subset='Name').reset_index(drop=True).head(n)
        
        bar = go.Bar(x=df_stat['Name'],
                     y=df_stat[stat],
                     marker={'color': [pos_color[pos] for pos in df_stat['Pos']]},
                     text=[pos for pos in df_stat['Pos']]
                    )
            
        data.append(bar)

        layout = go.Layout(
            title = 'Top 10 in {}'.format(stat),
            legend = {'x':1, 'y':1, 'borderwidth': 1},
            hovermode = 'closest',
            yaxis = {'title':'{}'.format(stat)},
        )

        fig = go.Figure(data=data, layout=layout)
        plotly.offline.iplot(fig)

In [None]:
visualize_top_n(df, ['PTS','3P','AST','TRB','STL','BLK'], 10)