# Data Scraping from ESPN

tournements: https://www.espn.com/golf/schedule/_/tour/pga

In [1]:
from bs4 import BeautifulSoup
import urllib.request
import pandas as pd
import numpy as np
import requests
import selenium

### Load the roster for the 2020 Masters from masters.com

In [21]:
roster = pd.read_excel('players.xlsx')

In [22]:
roster.head()

Unnamed: 0,Name,Country,Qualification
0,"An, Byeong Hun",Korea,1819
1,"# Ancer, Abraham",Mexico,171819
2,"#* Augenstein, John",United States,7-B
3,"# Bezuidenhout, Christiaan",South Africa,19
4,"Cabrera, Angel",Argentina,1


In [23]:
def reverse_name(name):
    '''Goes from `last, first` to `first last`
        Also removes "#" from the name
    '''
    name = name.replace('#', "")
    name_lst = name.split(',')
    name_lst = [name.strip() for name in name_lst[::-1]]
    return " ".join(name_lst)

In [24]:
players_2020 = []
for row in roster.itertuples(): 
    if "*" not in row.Name:
        players_2020.append(reverse_name(row.Name))
        

In [25]:
s = pd.Series(players_2020)

In [2]:
roster = pd.read_csv('2020_players.csv', header=None)
roster.head()

Unnamed: 0,0,1
0,0,Byeong-Hun An
1,1,Abraham Ancer
2,2,Christiaan Bezuidenhout
3,3,Angel Cabrera
4,4,Rafael Cabrera Bello


In [52]:
s.to_csv('2020_players.csv')

  """Entry point for launching an IPython kernel.


## Scraping previous tournement data from 

In [7]:
base_link = "https://www.espn.com/golf/schedule/_/season/"
main_links = [
    'https://www.espn.com/golf/schedule/_/season/2010',
    'https://www.espn.com/golf/schedule/_/season/2011',
    'https://www.espn.com/golf/schedule/_/season/2012',
    'https://www.espn.com/golf/schedule/_/season/2013',
    'https://www.espn.com/golf/schedule/_/season/2015',
    'https://www.espn.com/golf/schedule/_/season/2016',
    'https://www.espn.com/golf/schedule/_/season/2017',
    'https://www.espn.com/golf/schedule/_/season/2018', 
    'https://www.espn.com/golf/schedule/_/season/2019',
    'https://www.espn.com/golf/schedule/_/season/2020'
]

### Get links to all of the tournements in the past 10 years

In [9]:
tournement_links = {}
for link in main_links:
    year = link.split('/')[-1]
    tournement_links[year] = []
    source = urllib.request.urlopen(link).read()
    soup = BeautifulSoup(source,'lxml')
    table_titles = soup.findAll("section", {"class" : "ResponsiveTable"})
    for table in table_titles:
        title = table.find("div", {"class" : "Table__Title"})
        if title.text == "Completed Tournaments":
            # This is the one that we want
            # Still saved in table
            break
    links = table.findAll('a', {'class' : "AnchorLink"})
    for link in links: 
        href = link.attrs['href']
        if "player" not in href.split('/'):
            tournement_links[year].append(href)

### Extract all tournement links from the table

There are tournement links and player links. Tournement links do not have "player" in the path.

In [11]:
def get_tournement_results(link):
    source = urllib.request.urlopen(link).read()
    soup = BeautifulSoup(source,'lxml')
    
    compet_table = soup.find("div", {"class" : "competitors"})
    tables = compet_table.find_all("section", {"class" : "ResponsiveTable"})
    for table in tables:
        #Get headings
        headings = []
        headings_tag = table.find('thead')
        head_cells = headings_tag.findAll("th")
        if len(head_cells) < 8: 
            continue

        for heading in headings_tag.findAll("th"):
                headings.append(heading.find('a').text)
        
        body = table.find("tbody")
        rows = body.findAll('tr')
        player_data = []
        for row in rows:
            current_row = []
            for text in row.findAll("td"):
                current_row.append(text.text)

            player_data.append(current_row)
        return [headings] + player_data 

In [None]:
data = {}
for year in tournement_links:
    for link in tournement_links[year]:
        data[year] = []
        try:
            results = get_tournement_results(link)
    #         data.append(parse_results(results))

    #         df = pd.DataFrame(results, columns=['positon', 'name','score', 'r1', 'r2', 'r3','r4', 'total', 'winnings', 'x']).set_index('name')
            df = pd.DataFrame(results[1:], columns=results[0]).set_index("PLAYER") 
            df[['R1', 'R2', 'R3', 'R4']] = df[['R1', 'R2', 'R3', 'R4']].replace("--", np.nan).astype(float)
            # Set types
            df.to_csv('data/' + str(year) + '/' + link.split('=')[-1])
            data[year].append(df)

        except Exception as err: 
            print(link)
            print(err)
    

http://www.espn.com/golf/leaderboard?tournamentId=769
[Errno 54] Connection reset by peer
http://www.espn.com/golf/leaderboard?tournamentId=802
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=401024025
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=838
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=919
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=984
"['R4'] not in index"
http://www.espn.com/golf/leaderboard?tournamentId=993
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=1025
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=1026
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/leaderboard?tournamentId=1062
'NoneType' object has no attribute 'find_all'
http://www.espn.com/golf/

Remove all of the amateurs and people who withdrew or were disqualified from the dataset.

When a player withdraws, they get a score of whatever they had when they withdrew—42 through 9 holes, for example— so it looks like the got a 42 on the round and beat everybody.

# Data Preprocessing
### Parse results from each tournement

In [3]:
import os

dfs = {}
for folder in os.listdir('data'):
    if folder[0] == '2':
        dfs[folder] = []
        for file in os.listdir('data/' + folder):
            dfs[folder].append(pd.read_csv('data/' + folder + '/' +file).set_index("PLAYER"))


In [4]:
pro_data = {}
for year in dfs: 
    pro_data[year] = []
    for df in dfs[year]:
        split_names = [] 
        for player in df.index: 
            split_names.append(player.split(' '))
        df['name_arr'] = split_names 
        df['pro'] = df['name_arr'].apply(lambda arr : arr[-1] != '(a)')
        df = df[df.pro]
        df['finished'] = df['TO PAR'].apply(lambda s : s != "WD" and s != "DQ")
        df = df[df.finished]
        pro_data[year].append(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


Reduce the dataframe to only the players that are in the masters in 2020

Pro data is a list of dataframes where each dataframe is a single tournement. The indexes are the player names and the columns are the scores in each round


In [18]:
pd.isnull([np.nan, 0])

array([ True, False])

In [5]:
def add_dicts(s1, s2):
    '''Function that adds the values of two pandas series'''
    result = {}
    for name in s1:
        if name in s2: 
            result[name] = s1[name] + s2[name]
        else: 
            result[name] = s1[name]
            
    for name in s2: 
        if name not in s1:
            result[name] = s2[name]
    return result

def create_dict()

### Build the Massey Matrix

In [47]:
tounement_dfs = []
player_dict = {} # key : name, value : np array where true = win, false = loss, rows are rounds, index is other player
player_wins = {}
player_losses = {}
massey_dict = {}
columns = []
rows = []
for year in dfs:
    for df in dfs[year]:
        # 4 X num players matrix (numpy)
        all_rounds = df[['R1', 'R2', 'R3', 'R4']].values.T
    #     print(all_rounds)
        rows = []
        for i, name in enumerate(df.index): 
                player_rounds = df.loc[name, ['R1', 'R2', 'R3', 'R4']].values
                try:
                    player_rounds = player_rounds.reshape(4, 1)
                except: 
                    player_rounds = player_rounds[0].reshape(4, 1)

                #difference between scores per round
                diff = all_rounds - player_rounds                
                where_are_NaNs = pd.isnull(diff)
                diff[where_are_NaNs] = 0
        #         print(diff)
                wins = diff > 0 & (pd.notnull(diff))
        #         print(wins)
                total_matches_arr = pd.notnull(diff).sum(axis=0) 
        #         print(total_matches_arr)
                total_matches_arr[i] = 0
                total_matches = total_matches_arr.sum()
                total_wins = wins.sum()
                total_losses = total_matches - total_wins

                    #massey method 
#                 massey_diff = massey_diff.fillna()
#                 massey_sum =  massey_diff.values.sum()
#                 massey_sum =  massey_diff.sum()
                massey_sum = diff.sum()
                if name in massey_dict: 
                    massey_dict[name] += massey_sum 
                else:
                    massey_dict[name] = massey_sum

                # add wins and losses to the player's total
                if name in player_wins:
                    player_wins[name] += total_wins
                    player_losses[name] += total_losses
                else: 
                    player_wins[name] = total_wins
                    player_losses[name] = total_losses

#                 player_series = pd.DataFrame({"player" : df.index, "games" : total_matches_arr}).fillna(0).reset_index()
                player_series = dict(zip(df.index, total_matches_arr))
                
                if name in player_dict:
                    player_dict[name] = player_series.groupby('player').sum().add(player_dict[name].groupby('player').sum(), fill_value=0).reset_index()
                else: 
                    player_dict[name] = player_series
                    
                if name in player_dict: 
                    player_dict[name] = add_series(player_series, player_dict[name])
                else:
                    player_dict[name] = player_series

# massey_A = pd.DataFrame(player_dict)
# tournement_dfs.append(d)
# massey_A = massey_A.reindex(massey_A.columns)

Combine all of the players

#### Ensure that the columns are in the same order as the rows

In [None]:
def combine_dfs(df, )

In [48]:
player_dict['Tiger Woods']

Unnamed: 0,player,index,games
0,A-Shun Wu,108.0,4.0
1,A.J. McInerney,78.0,4.0
2,Aaron Baddeley,3519.0,208.0
3,Aaron Rai,51.0,4.0
4,Aaron Townsend,90.0,4.0
...,...,...,...
1149,Zack Miller,355.0,16.0
1150,Zack Sucher,401.0,20.0
1151,Zander Lombard,190.0,8.0
1152,Zane Scotland,55.0,4.0


In [150]:
massey_A = massey_A.reindex(massey_A.columns)

In [124]:
massey_A.head()

Unnamed: 0,Kevin Tway,Brandt Snedeker,Ryan Moore,Sam Ryder,Sungjae Im,Troy Merritt,Aaron Baddeley,Luke List,J.B. Holmes,Chase Wright,...,Steve Flesch,Callum Bruce,Hunter Richardson,Marcos Montenegro,Hunter Stewart,Philip Eriksson,Poom Saksansin,Paul Peterson,Carter Page,John Lyras
Kevin Tway,8342.0,-62.0,-50.0,-40.0,-59.0,-43.0,-18.0,-30.0,-46.0,-33.0,...,0.0,0.0,0.0,0.0,0.0,-4.0,-4.0,-2.0,-2.0,-2.0
Brandt Snedeker,-62.0,9480.0,-46.0,-38.0,-69.0,-37.0,-25.0,-38.0,-53.0,-27.0,...,0.0,0.0,0.0,0.0,0.0,-4.0,-4.0,-4.0,-2.0,-2.0
Ryan Moore,-50.0,-46.0,7971.0,-52.0,-63.0,-45.0,-28.0,-40.0,-45.0,-29.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Sam Ryder,-40.0,-38.0,-52.0,8098.0,-64.0,-37.0,-32.0,-39.0,-37.0,-39.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-4.0,-2.0,-2.0
Sungjae Im,-59.0,-69.0,-63.0,-64.0,12865.0,-48.0,-45.0,-53.0,-50.0,-63.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-4.0,-2.0,-2.0


In [None]:
massey_A = massey_A*-1
massey_A = massey_A.fillna(0)

In [None]:
massey = pd.Series(massey_dict)
massey

## Build the right hand vector and the diagonal in the Massey matrix

In [None]:
massey = pd.Series(massey_dict)
v = []
for name in d.index:
    score = massey.loc[name] 
#     v.append(massey[name])
    v.append(score)
    # set the diagonal
    massey_A.loc[name, name] = player_wins[name] + player_losses[name]
v = np.array(v)

## Save the Massey matrix as a csv

In [None]:
massey.sort_values()

In [None]:
massey_A.to_csv('massey.csv')

In [None]:
len(v), len(massey_A)

In [None]:
massey_A.index

In [None]:
r =  np.linalg.solve(massey_A.values, v)

In [None]:
rating = pd.DataFrame({'player' : massey_A.index.values, 'rating' : r}).set_index('player')

In [None]:
def get_rankings(rankings, names):
    rank_indexes = np.argsort(rankings)
    ranked_names = [0]*len(names)
    for i in range(len(names)):
        ranked_names[i] = names[rank_indexes[i]]
    ranked_names.reverse()
    return ranked_names

In [None]:
rating

In [None]:
ranks = rating.loc[players_2020].sort_values('rating')

In [None]:
ranks.tail(50)

In [None]:
ranks.sort_values('rank').tail(10)

### Make the list one dimensional

In [172]:
data_list = [] # one dimensional list
for row in data:
     data_list += row

In [173]:
import pandas as pd

In [191]:
df = pd.DataFrame(data_list, columns=['winner', "loser"])
df.head()

Unnamed: 0,winner,loser
0,Joaquin Niemann,Tom Hoge
1,Joaquin Niemann,Brian Harman
2,Joaquin Niemann,Harris English
3,Joaquin Niemann,Nate Lashley
4,Joaquin Niemann,Richy Werenski


In [190]:
pd.get_dummies(df['winner']).sum()

Adam Long         136
Andrew Novak      108
Austin Cook       136
Beau Hossler       87
Brendan Steele    108
                 ... 
Tom Hoge          152
Tyler McCumber     98
Viktor Hovland    142
Vince Covello      98
Zack Sucher       124
Length: 67, dtype: int64

In [196]:
players = {}
for match in data_list:
    if match[0] not in players:
        players[match[0]] = {'win_against' : [], 'lose_against' : []}
        
    if match[1] not in players:
        players[match[1]] = {'win_against' : [], 'lose_against' : []}
    
    players[match[0]]['win_against'].append(match[1])
    players[match[1]]['lose_against'].append(match[1])
    

In [197]:
player_df = pd.DataFrame(players)
player_df.head()

Unnamed: 0,Joaquin Niemann,Tom Hoge,Brian Harman,Harris English,Nate Lashley,Richy Werenski,Sebastian Munoz,Scottie Scheffler,Robby Shelton,Viktor Hovland,...,Kristoffer Ventura,Ryan Blaum,Brendon de Jonge,Michael Gellerman,Conrad Shindler,Patton Kizzire,Mason Williams,Freddie Jacobson,Shawn Stefani,Joe Boros
win_against,"[Tom Hoge, Brian Harman, Harris English, Nate ...","[Brian Harman, Harris English, Nate Lashley, R...","[Sebastian Munoz, Scottie Scheffler, Robby She...","[Sebastian Munoz, Scottie Scheffler, Robby She...","[Sebastian Munoz, Scottie Scheffler, Robby She...","[Sebastian Munoz, Scottie Scheffler, Robby She...","[Viktor Hovland, Matt Jones, Mark Hubbard, Lan...","[Viktor Hovland, Matt Jones, Mark Hubbard, Lan...","[Viktor Hovland, Matt Jones, Mark Hubbard, Lan...","[Lanto Griffin, Bud Cauley, Austin Cook, Kevin...",...,[],[],[],[],[],[],[],[],[],[]
lose_against,[],[Tom Hoge],"[Brian Harman, Brian Harman]","[Harris English, Harris English]","[Nate Lashley, Nate Lashley]","[Richy Werenski, Richy Werenski]","[Sebastian Munoz, Sebastian Munoz, Sebastian M...","[Scottie Scheffler, Scottie Scheffler, Scottie...","[Robby Shelton, Robby Shelton, Robby Shelton, ...","[Viktor Hovland, Viktor Hovland, Viktor Hovlan...",...,"[Kristoffer Ventura, Kristoffer Ventura, Krist...","[Ryan Blaum, Ryan Blaum, Ryan Blaum, Ryan Blau...","[Brendon de Jonge, Brendon de Jonge, Brendon d...","[Michael Gellerman, Michael Gellerman, Michael...","[Conrad Shindler, Conrad Shindler, Conrad Shin...","[Patton Kizzire, Patton Kizzire, Patton Kizzir...","[Mason Williams, Mason Williams, Mason William...","[Freddie Jacobson, Freddie Jacobson, Freddie J...","[Shawn Stefani, Shawn Stefani, Shawn Stefani, ...","[Joe Boros, Joe Boros, Joe Boros, Joe Boros, J..."


In [207]:
winner_dict = {} # Number of each times a player beat the other (winner is row, loser is col)
total_match_dict = {} # Sum of total number of times each player played the other
for winner in df['winner'].unique():
    winner_dict[winner] = {winner : 0}
    for loser in df[df['winner'] == winner]['loser']:
        if loser not in winner_dict[winner]:
            # They havent played yet
            winner_dict[winner][loser] = 0
            
        winner_dict[winner][loser] += 1

In [215]:
winner_dict

{'Joaquin Niemann': {'Tom Hoge': 1,
  'Brian Harman': 1,
  'Harris English': 1,
  'Nate Lashley': 1,
  'Richy Werenski': 1,
  'Sebastian Munoz': 1,
  'Scottie Scheffler': 1,
  'Robby Shelton': 1,
  'Viktor Hovland': 1,
  'Matt Jones': 1,
  'Mark Hubbard': 1,
  'Lanto Griffin': 1,
  'Bud Cauley': 1,
  'Austin Cook': 1,
  'Kevin Na': 1,
  'Joseph Bramlett': 1,
  'Adam Long': 1,
  'Harold Varner III': 1,
  'Scott Piercy': 1,
  'Bronson Burgoon': 1,
  'Harry Higgs': 1,
  'Sungjae Im': 1,
  'Nick Taylor': 1,
  'Rob Oppenheim': 1,
  'Doc Redman': 1,
  'Cameron Smith': 1,
  'Zack Sucher': 1,
  'Scott Harrington': 1,
  'Keegan Bradley': 1,
  'Denny McCarthy': 1,
  'Doug Ghim': 1,
  'Morgan Hoffmann': 1,
  'Sam Ryder': 1,
  'Scott Brown': 1,
  'Danny Lee': 1,
  'Brice Garnett': 1,
  'Peter Uihlein': 1,
  'Hank Lebioda': 1,
  'Cameron Tringale': 1,
  'Brendan Steele': 1,
  'Joel Dahmen': 1,
  'D.J. Trahan': 1,
  'Andrew Novak': 1,
  'Grayson Murray': 1,
  'Mark D. Anderson': 1,
  'Jonathan Byrd'

### Match Dataframe
Winner is the row index, column index is the loser. 

The value is the number of times that the winner beat the loser.

In [208]:
match_df = pd.DataFrame(winner_dict).fillna(0).astype(int)

In [209]:
wins = match_df.sum(axis=1)
losses = match_df.sum(axis=0)
total = wins + losses

### Make the vector v

In [212]:
match_df.head()

Unnamed: 0,Joaquin Niemann,Tom Hoge,Brian Harman,Harris English,Nate Lashley,Richy Werenski,Sebastian Munoz,Scottie Scheffler,Robby Shelton,Viktor Hovland,...,Roberto Castro,J.J. Spaun,Rhein Gibson,Russell Henley,Jason Dufner,Cameron Percy,Johnson Wagner,Sebastian Cappelen,Robert Streb,Beau Hossler
Tom Hoge,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Brian Harman,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Harris English,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Nate Lashley,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Richy Werenski,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [179]:
v_series = 1 + (1/2)*(wins - losses)


Validate v with Tiger Woods

In [69]:
(1 + (1/2)*(wins['Tiger Woods'] - losses['Tiger Woods'])) == v_series['Tiger Woods']

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

### Make the matrix A

We need total matches against each person

In [213]:
total_matches = {}
for player in match_df: # iterates over rows
    # For every other player
    # add the wins and losses
    if player not in total_matches:
        total_matches[player] = {}
        
    for other in match_df: # for every other player
        if other not in total_matches:
            total_matches[other] = {}
            
        if player == other:
            # Same player make it 2 + total matches
#             total_matches[player][other] = 2 + total[player]
            total_matches[player][other] = 0
            
        else:
            total_matches[player][other] = -1*(match_df.loc[player, other] + match_df.loc[other, player])
            total_matches[other][player] = total_matches[player][other]
    

KeyError: 'Joaquin Niemann'

The values of this dataframe form the matrix A

In [157]:
total_matches_df = pd.DataFrame(total_matches).astype(int)
total_matches_df.head()

Unnamed: 0,Joaquin Niemann,Tom Hoge,Brian Harman,Harris English,Nate Lashley,Richy Werenski,Sebastian Munoz,Scottie Scheffler,Robby Shelton,Viktor Hovland,...,David Lingmerth,MJ Daffue,Brendon de Jonge,Jay McLuen,Daniel Chopra,Tom Lewis,Hayden Buckley,Sam Saunders,Rod Perry,Davis Love III
Joaquin Niemann,0,-3,-4,-5,-2,-3,-3,-5,-5,-4,...,-1,0,-1,0,0,-2,-1,-1,-1,-1
Tom Hoge,-3,0,-5,-8,-2,-5,-6,-5,-7,-3,...,-2,0,-1,0,0,-2,-1,-3,-1,-1
Brian Harman,-4,-5,0,-5,-1,-2,-4,-6,-5,-4,...,-1,0,-1,0,0,-1,-1,-2,-1,-3
Harris English,-5,-8,-5,0,-2,-3,-6,-6,-8,-4,...,-2,0,-1,0,-1,-3,-1,-2,-1,-3
Nate Lashley,-2,-2,-1,-2,0,0,-3,-3,-3,-3,...,-1,0,-1,0,0,-1,0,-1,-1,-1


Now we need to make sure that v is in the correct order

In [161]:
sum_df = total_matches_df.sum()*-1

In [166]:
sum_df['Tom Hoge']

969

In [165]:
total['Tom Hoge']

1017.0

In [155]:
total_matches_df['Tom Hoge'].sum() - 1019 + total['Tom Hoge']

48.0

In [144]:
import numpy as np

In [145]:
v_lst = []
for player in total_matches_df: # goes in order of the rows
    v_lst.append(v_series[player])
    
v = np.array(v_lst)

In [146]:
v[0]

13.5

In [147]:
A = total_matches_df.values

In [148]:
rankings_arr = np.linalg.solve(A, v)

In [149]:
np.amax(rankings)

0.25

In [150]:
rankings = {}
for i in range(len(rankings_arr)):
    rankings[total_matches_df.index[i]] = rankings_arr[i]
rankings = pd.Series(rankings)

In [151]:
rankings

Joaquin Niemann   -0.438461
Tom Hoge          -0.553864
Brian Harman      -0.520519
Harris English    -0.735778
Nate Lashley      -0.424828
                     ...   
Tom Lewis         -0.169049
Hayden Buckley    -0.417729
Sam Saunders      -0.125463
Rod Perry         -0.443530
Davis Love III    -0.124328
Length: 303, dtype: float64

In [152]:
rankings.sort_values()

Rory McIlroy        -0.927810
Tyrrell Hatton      -0.919660
Jon Rahm            -0.895888
Patrick Reed        -0.801863
Bryson DeChambeau   -0.771709
                       ...   
Yongjun Bae (a)     -0.016577
Marcus Kinhult      -0.012190
Whee Kim             0.001258
Michael Kim          0.002154
Yi Keun Chang        0.021820
Length: 303, dtype: float64

In [130]:
new_df = pd.DataFrame(pd.concat([rankings, wins, losses, total], axis=1)) 

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [131]:
new_df.head()

Unnamed: 0,0,1,2,3
3,0.25,1,1.0,2.0
Aaron Baddeley,-0.457617,364,413.0,777.0
Aaron Wise,-0.395688,321,309.0,630.0
Abraham Ancer,-0.603094,279,510.0,789.0
Adam Hadwin,-0.622431,134,264.0,398.0


In [136]:
df.sort_values(by=0, axis=0)

KeyError: 0