# Player Correlations
find the correlation between FPPG and certain positions to make use of selecting players with positions who have a high correlation when creating lineups

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr
import itertools
import requests
import json
import re
import pandas as pd
from pandas.io.json import json_normalize
from io import StringIO
from lxml import html

#NOTE: change this section to use demo data
player_data_url = "https://rotogrinders.com/game-stats/nba-player"
lineup_data_url = "https://rotogrinders.com/lineups/nba?site=draftkings&date=2016-11-25"
login_post_url = "https://rotogrinders.com/sign-in"

#login and get player data
form_data = {'username': '', 'password': ''} 
with requests.Session() as sesh:
    sesh.post(login_post_url, data=form_data)
    response = sesh.get(player_data_url)
    player_data = response.text
#---------------------------------------------------
    
#store json data from player data page html
start = player_data.find("data = [{\"")
start = start+7
end = player_data.find("}];")
end = end+2
player_data = player_data[start:end]
StringData = StringIO(player_data)

#login and get lineup data
form_data = {'username': 'Wlotongo25', 'password': 'Cameroon12'} 
with requests.Session() as sesh2:
    sesh.post(login_post_url, data=form_data)
    response = sesh2.get(lineup_data_url)
    lineup_data = response.text

#TODO: get starting lineup data from html page
start = lineup_data.find("<ul class=\"lst lineup\">")
end = lineup_data.find("<a class=\"edit-schedule\" href=\"https://rotogrinders.com/schedules/35652/edit\">Edit Schedule</a>")
lineup_data = lineup_data[start:end]

#convert lineup page html to dataframe remove tags and extract data using regex
#find name between markers then find the name in data frame and mark the player as a starter or bench player
#TODO: flag lineups that do not match the ones present in dataframe, "try-catch"

starters_list = re.findall(r'<li style="font-size:.9em; font-weight:bold; margin-top:8px;">(.*?)<li style="font-size:.9em; font-weight:bold; margin-top:8px;">',
                         lineup_data, flags = re.DOTALL)

bench_list = re.findall(r'<li style="font-size:.9em; font-weight:bold; margin-top:8px;">Bench</li>(.*?)<li style="font-size:.9em; font-weight:bold; margin-top:8px;">',
                         lineup_data, flags = re.DOTALL)

print('STARTERS')
for i in starters_list:
    players = re.findall(r'<a class="player-popup" data-url="(.*?)</a>', i)
    print('team')
    for j in players:
        end = j.find("\">") + 2
        #find player
        print(j[end:])

print('BENCH')
for i in bench_list:
    players = re.findall(r'<a class="player-popup" data-url="(.*?)</a>', i)
    print('team')
    for j in players:
        end = j.find("\">") + 2
        print(j[end:])

#now there is enough data to compare correlations of starters to bench players on avg
#another data set may be needed to compare correlations for each game
#TODO: find player data from date equivalent to game lineup projectionComparisons.ipynb may hold the answer

#create json file from data
with open('test_1.json', 'w') as json_file:
    json.dump(player_data, json_file)
    
#crate text file
f = open("test_1.txt","w")
f.write(lineup_data)
f.close()

columns = ['player','team','pos','pts']

#convert json to dataframe
df = pd.read_json(StringData)
#convert dataframe to csv
df.to_csv(r'test_1.csv', index=False)

df_optimizer_form = df[columns]

#create list of teams
teams = list(df_optimizer_form.groupby(['team']).groups.keys())

In [None]:
#!!**--.._Turn some of these operations into functions (especially loops), run unit tests, post to GitHub_..--**!!
position_types = ['DIS','CG','SHW','SCW','3DW','VF','VB','PB']
position_pairs = []
correlations_hi = []
correlations_lo = []

#create list of pairs to use when creating ordered pairs
for pair in itertools.combinations(range(len(position_types)), r=2):
    position_pairs.append("{}-{}".format(position_types[pair[0]],position_types[pair[1]]))

#create ordered pairs lists to be used in plots
for pos_pair in position_pairs:
    pos2, pos1 = pos_pair.split('-')
    ord_pairs_max = []
    ord_pairs_min = []
    for team in teams:
        team_df = df_optimizer_form.groupby('team').get_group(team)

        if (pos1 in list(team_df['pos']) and pos2 in list(team_df['pos'])):
            pos1_max = max(team_df.groupby('pos').get_group(pos1)['pts'])
            pos2_max = max(team_df.groupby('pos').get_group(pos2)['pts'])
            
            pos1_min = min(team_df.groupby('pos').get_group(pos1)['pts'])
            pos2_min = min(team_df.groupby('pos').get_group(pos2)['pts'])
            
            ord_pairs_max.append("{},{}".format(pos1_max,pos2_max))
            ord_pairs_min.append("{},{}".format(pos1_min,pos2_min))
            
    print("{}-{}".format(pos1,pos2))
    print(ord_pairs_max)
    
    x = []
    y = []
    x_min = []
    y_min = []

    #create scatter plot from dictionary values
    #place max and min in separate plots
    for pair in ord_pairs_max:
        scatter_lhs, scatter_rhs = pair.split(',')
        x.append(float(scatter_lhs))
        y.append(float(scatter_rhs))
        
    for pair in ord_pairs_min:
        scatter_lhs, scatter_rhs = pair.split(',')
        x_min.append(float(scatter_lhs))
        y_min.append(float(scatter_rhs))
    
    corr_min, _min = pearsonr(x_min, y_min)
    corr, _ = pearsonr(x, y)
    print('Pearsons correlation: %.3f' % corr)
    
    correlations_hi.append(corr)
    correlations_lo.append(corr_min)

y_pos = np.arange(len(position_pairs))

fig, (ax1,ax2) = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))

ax1.bar(y_pos, correlations_hi, align='center', alpha=0.5)
ax1.set_xticks(y_pos, position_pairs)
ax1.set_ylabel('Correlations')
ax1.set_title('correlations by position(high)')

ax2.bar(y_pos, correlations_lo, align='center', alpha=0.5)
ax2.set_xticklabels(position_pairs, fontdict=None, minor=False)
ax2.set_ylabel('Correlations')
ax2.set_title('correlations by position(low)')

#check restrictions for how many players on the same team can be picked for draftkings
#also check cobination of starting lineup w/ position pairs

#compare starting lineup, mixture, and full of bench players
