# Data Scraping from ESPN

tournements: https://www.espn.com/golf/schedule/_/tour/pga

In [343]:
from bs4 import BeautifulSoup
import urllib.request
import pandas as pd
import numpy as np

### Load the inital page

In [416]:
main_links = [
    'https://www.espn.com/golf/schedule/_/season/2018', 
    'https://www.espn.com/golf/schedule/_/season/2017',
    'https://www.espn.com/golf/schedule/_/season/2016',
    'https://www.espn.com/golf/schedule/_/season/2019'
]

In [384]:
# source = urllib.request.urlopen('https://www.espn.com/golf/schedule/_/tour/pga/').read()
for link in main_links:
    source = urllib.request.urlopen('https://www.espn.com/golf/schedule/_/season/2019').read()
    soup = BeautifulSoup(source,'lxml')

### Get the table with completed tournements

In [417]:
tournement_links = []
for link in main_links:
    source = urllib.request.urlopen('https://www.espn.com/golf/schedule/_/season/2019').read()
    soup = BeautifulSoup(source,'lxml')
    table_titles = soup.findAll("section", {"class" : "ResponsiveTable"})
    for table in table_titles:
        title = table.find("div", {"class" : "Table__Title"})
        if title.text == "Completed Tournaments":
            # This is the one that we want
            # Still saved in table
            break
    links = table.findAll('a', {'class' : "AnchorLink"})
    for link in links: 
        href = link.attrs['href']
        if "player" not in href.split('/'):
            tournement_links.append(href)

### Extract all tournement links from the table

There are tournement links and player links. Tournement links do not have "player" in the path.

In [386]:
links = table.findAll('a', {'class' : "AnchorLink"})
tournement_links = []
for link in links: 
    href = link.attrs['href']
    if "player" not in href.split('/'):
        tournement_links.append(href)

In [418]:
def get_tournement_results(link):
    source = urllib.request.urlopen(link).read()
    soup = BeautifulSoup(source,'lxml')
    
    compet_table = soup.find("div", {"class" : "competitors"})
    tables = compet_table.find_all("section", {"class" : "ResponsiveTable"})
    for table in tables:
        #Get headings
        headings = []
        headings_tag = table.find('thead')
        head_cells = headings_tag.findAll("th")
        if len(head_cells) < 8: 
            continue

        for heading in headings_tag.findAll("th"):
                headings.append(heading.find('a').text)
        
        body = table.find("tbody")
        rows = body.findAll('tr')
        player_data = []
        for row in rows:
            current_row = []
            for text in row.findAll("td"):
                current_row.append(text.text)

            player_data.append(current_row)
        return [headings] + player_data 

In [419]:
def parse_results(results):
    '''Gets nodes and edges of victory graph'''
    data = []
    for p1 in range(len(results)):
        for p2 in range(p1, len(results)):
            try:
                player1 = results[p1]
                player2 = results[p2]
                # need to handle DQ
                
                # Change even to 0
                if player1[2] == "E":
                    player1[2] = 0
                if player2[2] == "E":
                    player2[2] = 0

                if player1[2] == "WD" or player2[2] == "WD":
                    # Not sure how to handle these
                    continue
                    
                if player2[2] == "CUT":
                    # player 2 was cut

                    if player1[2] == "CUT":
                        # player 1 was also cut, so tie
                        continue

                    else: 
                        # player1 was not cut
                        data.append([player1[1], player2[1]])
                        continue

                difference = int(player1[2]) - int(player2[2])
                if difference < 0:
                    data.append([player1[1], player2[1]])
                elif difference > 0:
                    data.append([player2[1], player1[1]])
            except Exception as err: 
                print(err)
                print(player1)
                print(player2)
    return data 
                

In [420]:
tournement_links[0]

'http://www.espn.com/golf/leaderboard?tournamentId=401056252'

In [421]:
data = []
for link in tournement_links:
    try:
        results = get_tournement_results(link)
#         data.append(parse_results(results))
        
#         df = pd.DataFrame(results, columns=['positon', 'name','score', 'r1', 'r2', 'r3','r4', 'total', 'winnings', 'x']).set_index('name')
        df = pd.DataFrame(results[1:], columns=results[0]).set_index("PLAYER") 
        df[['R1', 'R2', 'R3', 'R4']] = df[['R1', 'R2', 'R3', 'R4']].replace("--", np.nan).astype(float)
        # Set types
        df.to_csv('data/' + link.split('=')[-1])
        data.append(df)
        
    except Exception as err: 
        print(link)
        print(err)
    

http://www.espn.com/golf/leaderboard?tournamentId=401077168
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=401056524
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=401056529
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=401056560
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=401077168
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=401056524
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=401056529
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=401056560
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=401077168
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamen

In [422]:
d.loc['Tiger Woods']

Kevin Tway        -19.0
Brandt Snedeker   -23.0
Ryan Moore        -21.0
Sam Ryder         -15.0
Sungjae Im        -21.0
                   ... 
Philip Eriksson     0.0
Poom Saksansin      0.0
Paul Peterson       0.0
Carter Page         0.0
John Lyras          0.0
Name: Tiger Woods, Length: 628, dtype: float64

In [423]:
data[0]


Unnamed: 0_level_0,POS,TO PAR,R1,R2,R3,R4,TOT,EARNINGS,FEDEX PTS
PLAYER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Kevin Tway,1,-14,68.0,67.0,68.0,71.0,274,"$1,152,000",500
Brandt Snedeker,T2,-14,66.0,65.0,69.0,74.0,274,"$563,200",245
Ryan Moore,T2,-14,67.0,67.0,73.0,67.0,274,"$563,200",245
Sam Ryder,T4,-13,69.0,70.0,67.0,69.0,275,"$241,280",104
Sungjae Im,T4,-13,66.0,69.0,69.0,71.0,275,"$241,280",104
...,...,...,...,...,...,...,...,...,...
Shawn Stefani,-,CUT,75.0,73.0,,,148,--,0
Mark Mulder,-,CUT,75.0,74.0,,,149,--,0
Joey Garber,-,CUT,72.0,77.0,,,149,--,0
Ollie Schniederjans,-,CUT,73.0,78.0,,,151,--,0


In [424]:
def add_series(s1, s2):
    result = {}
    for name in s1.index:
        if name in s2: 
            result[name] = s1[name] + s2[name]
        else: 
            result[name] = s1[name]
    for name in s2.index: 
        if name not in s1:
            result[name] = s2[name]
    return pd.Series(result)

In [425]:
tounement_dfs = []
player_dict = {} # key : name, value : np array where true = win, false = loss, rows are rounds, index is other player
player_wins = {}
player_losses = {}
for df in data:
    all_rounds = df[['R1', 'R2', 'R3', 'R4']].values.T

    rows = []
    for i, name in enumerate(df.index): 
        player_rounds = df.loc[name, ['R1', 'R2', 'R3', 'R4']].values
        player_rounds = player_rounds.reshape(4, 1)
        diff = all_rounds - player_rounds #difference between scores per round
        wins = diff > 0 & (pd.notnull(diff))
        total_matches_arr = pd.notnull(diff).sum(axis=0) 
        total_matches_arr[i] = 0
        total_matches = total_matches_arr.sum()
        total_wins = wins.sum()
        total_losses = total_matches - total_wins


        # add wins and losses to the player's total
        if name in player_wins:
            player_wins[name] += total_wins
            player_losses[name] += total_losses
        else: 
            player_wins[name] = total_wins
            player_losses[name] = total_losses

        player_series = pd.Series(total_matches_arr, index=df.index)
        
        if name in player_dict: 
            player_dict[name] = add_series(player_series, player_dict[name])
        else:
            player_dict[name] = player_series
            
d = pd.DataFrame(player_dict)
# tournement_dfs.append(d)
d = d.reindex(d.columns)

Player dict has each player as the key and a list with the player's index and a boolean matrix as the value. The rows of the boolean matrix are each round, and the columns are the player that they played against (by index)

### Combine all of the dataframes

In [426]:
d = d*-1

In [427]:
d = d.fillna(0)

In [428]:
v = []
for name in d.index:
    score = 1 + (1/2)*(player_wins[name] - player_losses[name])
    v.append(score)
    # set the diagonal
    d.loc[name, name] = player_wins[name] + player_losses[name] + 2
v = np.array(v)

In [429]:
d.index

Index(['Kevin Tway', 'Brandt Snedeker', 'Ryan Moore', 'Sam Ryder',
       'Sungjae Im', 'Troy Merritt', 'Aaron Baddeley', 'Luke List',
       'J.B. Holmes', 'Chase Wright',
       ...
       'Steve Flesch', 'Callum Bruce', 'Hunter Richardson',
       'Marcos Montenegro', 'Hunter Stewart', 'Philip Eriksson',
       'Poom Saksansin', 'Paul Peterson', 'Carter Page', 'John Lyras'],
      dtype='object', length=628)

In [430]:
x =  np.linalg.solve(d.values, v)

In [431]:
x

array([-303.80750999, -303.73846876, -303.79997175, -303.78858514,
       -303.73939094, -303.80455808, -303.78077266, -303.82732693,
       -303.79987892, -303.89675277, -303.8286964 , -303.83412036,
       -303.87637161, -303.86843423, -303.7811215 , -303.81485514,
       -303.79207238, -303.59535463, -303.93433654, -303.7727875 ,
       -303.72076399, -303.77961606, -303.89736022, -303.86393998,
       -303.81805784, -303.78996688, -303.85976016, -303.44562589,
       -303.71734168, -303.79162204, -303.81617372, -303.85604759,
       -303.87595517, -303.85499348, -303.83203026, -303.83171221,
       -303.70741664, -303.85214296, -303.96509801, -303.79435808,
       -303.88031934, -303.7468359 , -303.87877309, -303.74615129,
       -303.82899348, -303.81046914, -303.79977175, -303.78690646,
       -303.90084484, -303.80209411, -303.80032013, -303.82323914,
       -303.79981211, -303.77969798, -303.83341299, -303.83250434,
       -303.87528003, -303.89205105, -303.83711421, -303.82232

In [435]:
def get_rankings(rankings, names):
    rank_indexes = np.argsort(rankings)
    ranked_names = [0]*len(names)
    for i in range(len(names)):
        ranked_names[i] = names[rank_indexes[i]]
    ranked_names.reverse()
    return ranked_names

In [436]:
ranks = pd.DataFrame({'player' : d.index.values, 'rank' : x})

In [437]:
ranks.sort_values('rank')

Unnamed: 0,player,rank
478,Will Claxton,-304.067433
143,Wesley Bryan,-304.053822
72,Michael Kim,-304.048020
352,Michael Block,-304.044065
448,Tony Romo (a),-304.038342
...,...,...
580,Sang-hyun Park,-303.187216
579,Robert MacIntyre,-303.167586
223,Carl Yuan,-303.164230
459,Sandy Lyle,-303.114655


In [404]:
get_rankings(x, d.index)


['Tom Purtzer',
 'Sandy Lyle',
 'Carl Yuan',
 'Robert MacIntyre',
 'Sang-hyun Park',
 'James Sugrue (a)',
 'Matthew Baldwin',
 'Andrew Johnston',
 'Connor Syme',
 'Larry Mize',
 'Darren Clarke',
 'Hayden Shieh',
 'Dong-Kyu Jang',
 'Spencer Tibbits (a)',
 'Neal Lancaster',
 'Joseph Bramlett',
 'Lee Slattery',
 'Callum Shinkwin',
 'Christiaan Bezuidenhout',
 'Chris Wood',
 'Benjamin Hebert',
 'Romain Langasque',
 'Philip Eriksson',
 'Armando Favela',
 'Zander Lombard',
 'WC Liang',
 'Andrew Wilson',
 'Inn-choon Hwang',
 'Ashton Turner',
 'Ian Woosnam',
 'Jose Maria Olazabal',
 'Jack Senior',
 'Yoshinori Fujimoto',
 'Paul Dunne',
 'Dean Burmester',
 'Tyler Hall',
 'Marcus Kinhult',
 'Xin-Jun Zhang',
 'A-Shun Wu',
 'Jason Norris',
 'Adam Bland',
 'Paul Waring',
 'Gunn Charoenkul',
 'Martin Flores',
 'Derek Lamely',
 'Dong Seop Maeng',
 'Chad Ramey',
 'Robert Rock',
 'Matthias Schmid (a)',
 'Luis Gagne',
 'Jason Caron',
 'Curtis Knipes (a)',
 'Austin Eckroat (a)',
 'Frank Lickliter II',
 'Y

In [170]:
sum = 0
for i in data: 
    sum += len(i)
print(sum)

7861


### Make the list one dimensional

In [172]:
data_list = [] # one dimensional list
for row in data:
     data_list += row

In [173]:
import pandas as pd

In [191]:
df = pd.DataFrame(data_list, columns=['winner', "loser"])
df.head()

Unnamed: 0,winner,loser
0,Joaquin Niemann,Tom Hoge
1,Joaquin Niemann,Brian Harman
2,Joaquin Niemann,Harris English
3,Joaquin Niemann,Nate Lashley
4,Joaquin Niemann,Richy Werenski


In [190]:
pd.get_dummies(df['winner']).sum()

Adam Long         136
Andrew Novak      108
Austin Cook       136
Beau Hossler       87
Brendan Steele    108
                 ... 
Tom Hoge          152
Tyler McCumber     98
Viktor Hovland    142
Vince Covello      98
Zack Sucher       124
Length: 67, dtype: int64

In [196]:
players = {}
for match in data_list:
    if match[0] not in players:
        players[match[0]] = {'win_against' : [], 'lose_against' : []}
        
    if match[1] not in players:
        players[match[1]] = {'win_against' : [], 'lose_against' : []}
    
    players[match[0]]['win_against'].append(match[1])
    players[match[1]]['lose_against'].append(match[1])
    

In [197]:
player_df = pd.DataFrame(players)
player_df.head()

Unnamed: 0,Joaquin Niemann,Tom Hoge,Brian Harman,Harris English,Nate Lashley,Richy Werenski,Sebastian Munoz,Scottie Scheffler,Robby Shelton,Viktor Hovland,...,Kristoffer Ventura,Ryan Blaum,Brendon de Jonge,Michael Gellerman,Conrad Shindler,Patton Kizzire,Mason Williams,Freddie Jacobson,Shawn Stefani,Joe Boros
win_against,"[Tom Hoge, Brian Harman, Harris English, Nate ...","[Brian Harman, Harris English, Nate Lashley, R...","[Sebastian Munoz, Scottie Scheffler, Robby She...","[Sebastian Munoz, Scottie Scheffler, Robby She...","[Sebastian Munoz, Scottie Scheffler, Robby She...","[Sebastian Munoz, Scottie Scheffler, Robby She...","[Viktor Hovland, Matt Jones, Mark Hubbard, Lan...","[Viktor Hovland, Matt Jones, Mark Hubbard, Lan...","[Viktor Hovland, Matt Jones, Mark Hubbard, Lan...","[Lanto Griffin, Bud Cauley, Austin Cook, Kevin...",...,[],[],[],[],[],[],[],[],[],[]
lose_against,[],[Tom Hoge],"[Brian Harman, Brian Harman]","[Harris English, Harris English]","[Nate Lashley, Nate Lashley]","[Richy Werenski, Richy Werenski]","[Sebastian Munoz, Sebastian Munoz, Sebastian M...","[Scottie Scheffler, Scottie Scheffler, Scottie...","[Robby Shelton, Robby Shelton, Robby Shelton, ...","[Viktor Hovland, Viktor Hovland, Viktor Hovlan...",...,"[Kristoffer Ventura, Kristoffer Ventura, Krist...","[Ryan Blaum, Ryan Blaum, Ryan Blaum, Ryan Blau...","[Brendon de Jonge, Brendon de Jonge, Brendon d...","[Michael Gellerman, Michael Gellerman, Michael...","[Conrad Shindler, Conrad Shindler, Conrad Shin...","[Patton Kizzire, Patton Kizzire, Patton Kizzir...","[Mason Williams, Mason Williams, Mason William...","[Freddie Jacobson, Freddie Jacobson, Freddie J...","[Shawn Stefani, Shawn Stefani, Shawn Stefani, ...","[Joe Boros, Joe Boros, Joe Boros, Joe Boros, J..."


In [207]:
winner_dict = {} # Number of each times a player beat the other (winner is row, loser is col)
total_match_dict = {} # Sum of total number of times each player played the other
for winner in df['winner'].unique():
    winner_dict[winner] = {winner : 0}
    for loser in df[df['winner'] == winner]['loser']:
        if loser not in winner_dict[winner]:
            # They havent played yet
            winner_dict[winner][loser] = 0
            
        winner_dict[winner][loser] += 1

In [215]:
winner_dict

{'Joaquin Niemann': {'Tom Hoge': 1,
  'Brian Harman': 1,
  'Harris English': 1,
  'Nate Lashley': 1,
  'Richy Werenski': 1,
  'Sebastian Munoz': 1,
  'Scottie Scheffler': 1,
  'Robby Shelton': 1,
  'Viktor Hovland': 1,
  'Matt Jones': 1,
  'Mark Hubbard': 1,
  'Lanto Griffin': 1,
  'Bud Cauley': 1,
  'Austin Cook': 1,
  'Kevin Na': 1,
  'Joseph Bramlett': 1,
  'Adam Long': 1,
  'Harold Varner III': 1,
  'Scott Piercy': 1,
  'Bronson Burgoon': 1,
  'Harry Higgs': 1,
  'Sungjae Im': 1,
  'Nick Taylor': 1,
  'Rob Oppenheim': 1,
  'Doc Redman': 1,
  'Cameron Smith': 1,
  'Zack Sucher': 1,
  'Scott Harrington': 1,
  'Keegan Bradley': 1,
  'Denny McCarthy': 1,
  'Doug Ghim': 1,
  'Morgan Hoffmann': 1,
  'Sam Ryder': 1,
  'Scott Brown': 1,
  'Danny Lee': 1,
  'Brice Garnett': 1,
  'Peter Uihlein': 1,
  'Hank Lebioda': 1,
  'Cameron Tringale': 1,
  'Brendan Steele': 1,
  'Joel Dahmen': 1,
  'D.J. Trahan': 1,
  'Andrew Novak': 1,
  'Grayson Murray': 1,
  'Mark D. Anderson': 1,
  'Jonathan Byrd'

### Match Dataframe
Winner is the row index, column index is the loser. 

The value is the number of times that the winner beat the loser.

In [208]:
match_df = pd.DataFrame(winner_dict).fillna(0).astype(int)

In [209]:
wins = match_df.sum(axis=1)
losses = match_df.sum(axis=0)
total = wins + losses

### Make the vector v

In [212]:
match_df.head()

Unnamed: 0,Joaquin Niemann,Tom Hoge,Brian Harman,Harris English,Nate Lashley,Richy Werenski,Sebastian Munoz,Scottie Scheffler,Robby Shelton,Viktor Hovland,...,Roberto Castro,J.J. Spaun,Rhein Gibson,Russell Henley,Jason Dufner,Cameron Percy,Johnson Wagner,Sebastian Cappelen,Robert Streb,Beau Hossler
Tom Hoge,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Brian Harman,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Harris English,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Nate Lashley,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Richy Werenski,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [210]:
match_df.describe()

Unnamed: 0,Joaquin Niemann,Tom Hoge,Brian Harman,Harris English,Nate Lashley,Richy Werenski,Sebastian Munoz,Scottie Scheffler,Robby Shelton,Viktor Hovland,...,Roberto Castro,J.J. Spaun,Rhein Gibson,Russell Henley,Jason Dufner,Cameron Percy,Johnson Wagner,Sebastian Cappelen,Robert Streb,Beau Hossler
count,153.0,153.0,153.0,153.0,153.0,153.0,153.0,153.0,153.0,153.0,...,153.0,153.0,153.0,153.0,153.0,153.0,153.0,153.0,153.0,153.0
mean,1.0,0.993464,0.96732,0.96732,0.96732,0.96732,0.947712,0.947712,0.947712,0.928105,...,0.601307,0.601307,0.601307,0.601307,0.601307,0.594771,0.581699,0.581699,0.575163,0.568627
std,0.0,0.080845,0.178381,0.178381,0.178381,0.178381,0.223337,0.223337,0.223337,0.259163,...,0.491237,0.491237,0.491237,0.491237,0.491237,0.492549,0.4949,0.4949,0.495942,0.496894
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [179]:
v_series = 1 + (1/2)*(wins - losses)


Validate v with Tiger Woods

In [180]:
(1 + (1/2)*(wins['Tiger Woods'] - losses['Tiger Woods'])) == v_series['Tiger Woods']

KeyError: 'Tiger Woods'

### Make the matrix A

We need total matches against each person

In [213]:
total_matches = {}
for player in match_df: # iterates over rows
    # For every other player
    # add the wins and losses
    if player not in total_matches:
        total_matches[player] = {}
        
    for other in match_df: # for every other player
        if other not in total_matches:
            total_matches[other] = {}
            
        if player == other:
            # Same player make it 2 + total matches
#             total_matches[player][other] = 2 + total[player]
            total_matches[player][other] = 0
            
        else:
            total_matches[player][other] = -1*(match_df.loc[player, other] + match_df.loc[other, player])
            total_matches[other][player] = total_matches[player][other]
    

KeyError: 'Joaquin Niemann'

The values of this dataframe form the matrix A

In [157]:
total_matches_df = pd.DataFrame(total_matches).astype(int)
total_matches_df.head()

Unnamed: 0,Joaquin Niemann,Tom Hoge,Brian Harman,Harris English,Nate Lashley,Richy Werenski,Sebastian Munoz,Scottie Scheffler,Robby Shelton,Viktor Hovland,...,David Lingmerth,MJ Daffue,Brendon de Jonge,Jay McLuen,Daniel Chopra,Tom Lewis,Hayden Buckley,Sam Saunders,Rod Perry,Davis Love III
Joaquin Niemann,0,-3,-4,-5,-2,-3,-3,-5,-5,-4,...,-1,0,-1,0,0,-2,-1,-1,-1,-1
Tom Hoge,-3,0,-5,-8,-2,-5,-6,-5,-7,-3,...,-2,0,-1,0,0,-2,-1,-3,-1,-1
Brian Harman,-4,-5,0,-5,-1,-2,-4,-6,-5,-4,...,-1,0,-1,0,0,-1,-1,-2,-1,-3
Harris English,-5,-8,-5,0,-2,-3,-6,-6,-8,-4,...,-2,0,-1,0,-1,-3,-1,-2,-1,-3
Nate Lashley,-2,-2,-1,-2,0,0,-3,-3,-3,-3,...,-1,0,-1,0,0,-1,0,-1,-1,-1


Now we need to make sure that v is in the correct order

In [161]:
sum_df = total_matches_df.sum()*-1

In [166]:
sum_df['Tom Hoge']

969

In [165]:
total['Tom Hoge']

1017.0

In [155]:
total_matches_df['Tom Hoge'].sum() - 1019 + total['Tom Hoge']

48.0

In [144]:
import numpy as np

In [145]:
v_lst = []
for player in total_matches_df: # goes in order of the rows
    v_lst.append(v_series[player])
    
v = np.array(v_lst)

In [146]:
v[0]

13.5

In [147]:
A = total_matches_df.values

In [148]:
rankings_arr = np.linalg.solve(A, v)

In [149]:
np.amax(rankings)

0.25

In [150]:
rankings = {}
for i in range(len(rankings_arr)):
    rankings[total_matches_df.index[i]] = rankings_arr[i]
rankings = pd.Series(rankings)

In [151]:
rankings

Joaquin Niemann   -0.438461
Tom Hoge          -0.553864
Brian Harman      -0.520519
Harris English    -0.735778
Nate Lashley      -0.424828
                     ...   
Tom Lewis         -0.169049
Hayden Buckley    -0.417729
Sam Saunders      -0.125463
Rod Perry         -0.443530
Davis Love III    -0.124328
Length: 303, dtype: float64

In [152]:
rankings.sort_values()

Rory McIlroy        -0.927810
Tyrrell Hatton      -0.919660
Jon Rahm            -0.895888
Patrick Reed        -0.801863
Bryson DeChambeau   -0.771709
                       ...   
Yongjun Bae (a)     -0.016577
Marcus Kinhult      -0.012190
Whee Kim             0.001258
Michael Kim          0.002154
Yi Keun Chang        0.021820
Length: 303, dtype: float64

In [130]:
new_df = pd.DataFrame(pd.concat([rankings, wins, losses, total], axis=1)) 

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [131]:
new_df.head()

Unnamed: 0,0,1,2,3
3,0.25,1,1.0,2.0
Aaron Baddeley,-0.457617,364,413.0,777.0
Aaron Wise,-0.395688,321,309.0,630.0
Abraham Ancer,-0.603094,279,510.0,789.0
Adam Hadwin,-0.622431,134,264.0,398.0


In [136]:
df.sort_values(by=0, axis=0)

KeyError: 0