In [1]:
import pandas as pd

# Section 3; Part 43
Intro to DataFrames

In [3]:
nba = pd.read_csv('datasets/nba.csv')

# NBA Dataframe we'll use in this section
# Notes: 
#   - Has NaN values (for blanks in the CSVs)
#   - Has blank row at the end
#   - Index is generated automatically because we didn't specify
#   - Numeric values are shown as floats (not the ints as in the CSV)
#       This is done because there are NaN values in the column
nba

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0


# Section 3; Part 44
Shared methods and attributes Series and DataFrame objects

In [4]:
# head - Returns first rows similar to series
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [5]:
# tail - Returns last rows
nba.tail()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,,,,,,


In [6]:
# index - Describes the index of the dataframe
nba.index

RangeIndex(start=0, stop=458, step=1)

In [7]:
# values - Returns multidimensional array of all values in dataframe
nba.values

array([['Avery Bradley', 'Boston Celtics', 0.0, ..., 180.0, 'Texas',
        7730337.0],
       ['Jae Crowder', 'Boston Celtics', 99.0, ..., 235.0, 'Marquette',
        6796117.0],
       ['John Holland', 'Boston Celtics', 30.0, ..., 205.0,
        'Boston University', nan],
       ..., 
       ['Tibor Pleiss', 'Utah Jazz', 21.0, ..., 256.0, nan, 2900000.0],
       ['Jeff Withey', 'Utah Jazz', 24.0, ..., 231.0, 'Kansas', 947276.0],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=object)

In [8]:
# shape - Python tuple of number of rows and colums
nba.shape

(458, 9)

In [10]:
# dtypes - Describes each column (series object is returned)
nba.dtypes

Name         object
Team         object
Number      float64
Position     object
Age         float64
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

In [11]:
# columns - Dataframe specific - Returns an array of column names
nba.columns

Index([u'Name', u'Team', u'Number', u'Position', u'Age', u'Height', u'Weight',
       u'College', u'Salary'],
      dtype='object')

In [12]:
# axes - Dataframe specific - Returns index/columns results
nba.axes

[RangeIndex(start=0, stop=458, step=1),
 Index([u'Name', u'Team', u'Number', u'Position', u'Age', u'Height', u'Weight',
        u'College', u'Salary'],
       dtype='object')]

In [14]:
# info - Summary of dataframe
nba.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 458 entries, 0 to 457
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null object
Number      457 non-null float64
Position    457 non-null object
Age         457 non-null float64
Height      457 non-null object
Weight      457 non-null float64
College     373 non-null object
Salary      446 non-null float64
dtypes: float64(4), object(5)
memory usage: 32.3+ KB


In [15]:
# get_dtype_counts() - Returns `dtypes` portion from above
nba.get_dtype_counts()

float64    4
object     5
dtype: int64

# Section 3; Part 45
Differences between shared methods

In [19]:
rev = pd.read_csv('datasets/revenue.csv', index_col = "Date")
rev.head(3)

Unnamed: 0_level_0,New York,Los Angeles,Miami
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/16,985,122,499
1/2/16,738,788,534
1/3/16,14,20,933


In [20]:
s = pd.Series([1,2,3])
s

0    1
1    2
2    3
dtype: int64

In [21]:
s.sum()

6L

In [22]:
# Sum on a dataframe results in a new Series, index = Columns and value = sum of everything in that column
rev.sum()

New York       5475
Los Angeles    5134
Miami          5641
dtype: int64

In [24]:
# Same result
rev.sum(axis = "index") # or rev.sum(axis = 0)

New York       5475
Los Angeles    5134
Miami          5641
dtype: int64

In [26]:
# To sum horizontally (in this case, by date instead of by city)
rev.sum(axis = "columns") # or rev.sum(axis = 1)

Date
1/1/16     1606
1/2/16     2060
1/3/16      967
1/4/16     2519
1/5/16      438
1/6/16     1935
1/7/16     1234
1/8/16     2313
1/9/16     2623
1/10/16     555
dtype: int64

`axis` is not available on a Series since it is 1D

# Section 3; Part 46
Select one column from a dataframe

In [27]:
nba = pd.read_csv('datasets/nba.csv')

In [28]:
# "simpler" syntax
# Columns with singular names (no spaces)
# This is case sensitive
# use `.NAMEOFCOLUM`
# Returns a series

# This method only works when columns do not have spaces
nba.Name

0                Avery Bradley
1                  Jae Crowder
2                 John Holland
3                  R.J. Hunter
4                Jonas Jerebko
5                 Amir Johnson
6                Jordan Mickey
7                 Kelly Olynyk
8                 Terry Rozier
9                 Marcus Smart
10             Jared Sullinger
11               Isaiah Thomas
12                 Evan Turner
13                 James Young
14                Tyler Zeller
15            Bojan Bogdanovic
16                Markel Brown
17             Wayne Ellington
18     Rondae Hollis-Jefferson
19                Jarrett Jack
20              Sergey Karasev
21             Sean Kilpatrick
22                Shane Larkin
23                 Brook Lopez
24            Chris McCullough
25                 Willie Reed
26             Thomas Robinson
27                  Henry Sims
28                Donald Sloan
29              Thaddeus Young
                ...           
428            Al-Farouq Aminu
429     

In [29]:
nba.Number

0       0.0
1      99.0
2      30.0
3      28.0
4       8.0
5      90.0
6      55.0
7      41.0
8      12.0
9      36.0
10      7.0
11      4.0
12     11.0
13     13.0
14     44.0
15     44.0
16     22.0
17     21.0
18     24.0
19      2.0
20     10.0
21      6.0
22      0.0
23     11.0
24      1.0
25     33.0
26     41.0
27     14.0
28     15.0
29     30.0
       ... 
428     8.0
429     5.0
430    23.0
431    17.0
432     4.0
433     9.0
434    35.0
435    11.0
436     0.0
437     3.0
438    44.0
439    24.0
440     2.0
441    21.0
442    33.0
443     3.0
444    10.0
445    11.0
446    15.0
447    27.0
448    20.0
449     5.0
450     2.0
451    23.0
452    41.0
453     8.0
454    25.0
455    21.0
456    24.0
457     NaN
Name: Number, dtype: float64

In [30]:
# Second method (works when columns have spaces...and in any other case)
# Bracket syntax
nba["Name"]

0                Avery Bradley
1                  Jae Crowder
2                 John Holland
3                  R.J. Hunter
4                Jonas Jerebko
5                 Amir Johnson
6                Jordan Mickey
7                 Kelly Olynyk
8                 Terry Rozier
9                 Marcus Smart
10             Jared Sullinger
11               Isaiah Thomas
12                 Evan Turner
13                 James Young
14                Tyler Zeller
15            Bojan Bogdanovic
16                Markel Brown
17             Wayne Ellington
18     Rondae Hollis-Jefferson
19                Jarrett Jack
20              Sergey Karasev
21             Sean Kilpatrick
22                Shane Larkin
23                 Brook Lopez
24            Chris McCullough
25                 Willie Reed
26             Thomas Robinson
27                  Henry Sims
28                Donald Sloan
29              Thaddeus Young
                ...           
428            Al-Farouq Aminu
429     

In [31]:
# Can method chain on returned objects (series)
nba['Name'].head(3)

0    Avery Bradley
1      Jae Crowder
2     John Holland
Name: Name, dtype: object

# Section 3; Part 47
Selecting two or more columns from a dataframe

In [33]:
nba = pd.read_csv('datasets/nba.csv')
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [36]:
# Returns a new dataframe, not series
nba[["Name", "Team"]].head(3)

Unnamed: 0,Name,Team
0,Avery Bradley,Boston Celtics
1,Jae Crowder,Boston Celtics
2,John Holland,Boston Celtics


In [37]:
# Can change order of columns in returned frame
nba[["Team", "Name"]].head(3)

Unnamed: 0,Team,Name
0,Boston Celtics,Avery Bradley
1,Boston Celtics,Jae Crowder
2,Boston Celtics,John Holland


# Section 3; Part 48
Add new column to Dataframe

In [38]:
nba = pd.read_csv('datasets/nba.csv')
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [39]:
# If we assign a value to an unknown column, it will create a new column with that name and those values
# If the value DOES exist and we use equality, it will OVER WRITE existing values
# This always adds after existing columns

# Add a scalar value - Single value assigned to all
nba['Sport'] = "Basketball"
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Sport
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Basketball
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Basketball
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,Basketball


In [41]:
nba['League'] = "National Basketball Association"
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Sport,League
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Basketball,National Basketball Association
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Basketball,National Basketball Association
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,Basketball,National Basketball Association


In [42]:
# Reset for next example
nba = pd.read_csv('datasets/nba.csv')
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [43]:
# Use `insert` method
# loc - Index location within the columns (Name = 0, Team = 1, Number = 2, Position = 3, etc)

# Insert into 3rd location (between Number and Position)
nba.insert(3, column="Sport", value="Basketball")
nba.head(3)

Unnamed: 0,Name,Team,Number,Sport,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,Basketball,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,Basketball,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,Basketball,SG,27.0,6-5,205.0,Boston University,


In [44]:
# Insert between Height and Weight
nba.insert(7, column="League", value="National Basketball Association")
nba.head(3)

Unnamed: 0,Name,Team,Number,Sport,Position,Age,Height,League,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,Basketball,PG,25.0,6-2,National Basketball Association,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,Basketball,SF,25.0,6-6,National Basketball Association,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,Basketball,SG,27.0,6-5,National Basketball Association,205.0,Boston University,


# Section 3; Part 49
Broadcasting operations

 - Do something to ALL items in series

In [45]:
nba = pd.read_csv('datasets/nba.csv')
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [46]:
# add 5 to all ages
nba["Age"].add(5)

0      30.0
1      30.0
2      32.0
3      27.0
4      34.0
5      34.0
6      26.0
7      30.0
8      27.0
9      27.0
10     29.0
11     32.0
12     32.0
13     25.0
14     31.0
15     32.0
16     29.0
17     33.0
18     26.0
19     37.0
20     27.0
21     31.0
22     28.0
23     33.0
24     26.0
25     31.0
26     30.0
27     31.0
28     33.0
29     32.0
       ... 
428    30.0
429    28.0
430    29.0
431    32.0
432    28.0
433    33.0
434    39.0
435    29.0
436    30.0
437    29.0
438    28.0
439    31.0
440    35.0
441    25.0
442    33.0
443    28.0
444    29.0
445    25.0
446    29.0
447    28.0
448    31.0
449    28.0
450    33.0
451    31.0
452    25.0
453    31.0
454    29.0
455    31.0
456    31.0
457     NaN
Name: Age, dtype: float64

In [47]:
# Short hand for above
nba["Age"] + 5

0      30.0
1      30.0
2      32.0
3      27.0
4      34.0
5      34.0
6      26.0
7      30.0
8      27.0
9      27.0
10     29.0
11     32.0
12     32.0
13     25.0
14     31.0
15     32.0
16     29.0
17     33.0
18     26.0
19     37.0
20     27.0
21     31.0
22     28.0
23     33.0
24     26.0
25     31.0
26     30.0
27     31.0
28     33.0
29     32.0
       ... 
428    30.0
429    28.0
430    29.0
431    32.0
432    28.0
433    33.0
434    39.0
435    29.0
436    30.0
437    29.0
438    28.0
439    31.0
440    35.0
441    25.0
442    33.0
443    28.0
444    29.0
445    25.0
446    29.0
447    28.0
448    31.0
449    28.0
450    33.0
451    31.0
452    25.0
453    31.0
454    29.0
455    31.0
456    31.0
457     NaN
Name: Age, dtype: float64

In [48]:
# Subtract 5M from all in Salary
nba["Salary"] - 5000000  # or .sub()

0       2730337.0
1       1796117.0
2             NaN
3      -3851360.0
4             0.0
5       7000000.0
6      -3829040.0
7      -2834840.0
8      -3175640.0
9      -1568960.0
10     -2430740.0
11      1912869.0
12     -1574490.0
13     -3250160.0
14     -2383025.0
15     -1574490.0
16     -4154941.0
17     -3500000.0
18     -3664520.0
19      1300000.0
20     -3400160.0
21     -4865785.0
22     -3500000.0
23     14689000.0
24     -3859760.0
25     -4052724.0
26     -4018652.0
27     -4052724.0
28     -4052724.0
29      6235955.0
          ...    
428     3042895.0
429    -4374907.0
430    -4052724.0
431     1980802.0
432    -2105941.0
433     1000000.0
434       16000.0
435    -1924120.0
436     -763713.0
437    -2474840.0
438    -4474907.0
439    -3584480.0
440    -2145060.0
441    -2362280.0
442     -225000.0
443    -2341760.0
444     4463484.0
445    -1222280.0
446     7000000.0
447    -3824120.0
448    10409570.0
449    -3651560.0
450    -2950000.0
451    -4018652.0
452    -27

In [49]:
# Convert weight from pounds to kilograms
nba["Weight"].mul(0.453592)

0       81.646560
1      106.594120
2       92.986360
3       83.914520
4      104.779752
5      108.862080
6      106.594120
7      107.954896
8       86.182480
9       99.790240
10     117.933920
11      83.914520
12      99.790240
13      97.522280
14     114.758776
15      97.975872
16      86.182480
17      90.718400
18      99.790240
19      90.718400
20      94.347136
21      99.336648
22      79.378600
23     124.737800
24      90.718400
25      99.790240
26     107.501304
27     112.490816
28      92.986360
29     100.243832
          ...    
428     97.522280
429     93.439952
430     95.254320
431    108.862080
432     97.522280
433     97.522280
434    120.201880
435    111.130040
436     88.450440
437     90.718400
438     83.914520
439    106.594120
440     78.471416
441    108.862080
442    103.418976
443     86.636072
444     97.068688
445     86.182480
446    120.201880
447    111.130040
448    102.511792
449     93.439952
450    102.511792
451     93.439952
452    106

In [50]:
# Save new series to dataframe
nba['Weight in Kilograms'] = nba['Weight'] * 0.453592
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Weight in Kilograms
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,81.64656
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,106.59412
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,92.98636


In [52]:
# Salary in millions
nba['Salary in Millions'] = nba['Salary'].div(1000000)
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Weight in Kilograms,Salary in Millions
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,81.64656,7.730337
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,106.59412,6.796117
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,92.98636,


# Section 3; Part 50
A review of the `.value_counts()` method

 - This is only available on a Series - not a dataframe

In [53]:
nba = pd.read_csv('datasets/nba.csv')

In [54]:
nba['Team'].value_counts()

New Orleans Pelicans      19
Memphis Grizzlies         18
Milwaukee Bucks           16
New York Knicks           16
Denver Nuggets            15
Charlotte Hornets         15
Los Angeles Lakers        15
Chicago Bulls             15
San Antonio Spurs         15
Philadelphia 76ers        15
Toronto Raptors           15
Detroit Pistons           15
Boston Celtics            15
Miami Heat                15
Utah Jazz                 15
Portland Trail Blazers    15
Los Angeles Clippers      15
Phoenix Suns              15
Golden State Warriors     15
Cleveland Cavaliers       15
Washington Wizards        15
Atlanta Hawks             15
Brooklyn Nets             15
Houston Rockets           15
Dallas Mavericks          15
Sacramento Kings          15
Indiana Pacers            15
Oklahoma City Thunder     15
Orlando Magic             14
Minnesota Timberwolves    14
Name: Team, dtype: int64

In [55]:
nba["Position"].value_counts()

SG    102
PF    100
PG     92
SF     85
C      78
Name: Position, dtype: int64

# Section 3; Part 51
Dropping rows with null values using `.dropna()`

In [56]:
nba = pd.read_csv('datasets/nba.csv')

In [57]:
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,


In [58]:
nba.tail(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,,,,,,


In [59]:
# dropna - Removes any rows from dataframe where any columns have NaN (by default)
#   determined by the `how` parameter; Default = any
nba.dropna()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0
10,Jared Sullinger,Boston Celtics,7.0,C,24.0,6-9,260.0,Ohio State,2569260.0
11,Isaiah Thomas,Boston Celtics,4.0,PG,27.0,5-9,185.0,Washington,6912869.0
12,Evan Turner,Boston Celtics,11.0,SG,27.0,6-7,220.0,Ohio State,3425510.0


In [61]:
# how = all
# Remove only where ALL columns are NaN
nba.dropna(how='all')

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0


In [62]:
# Also uses `inplace`
nba.dropna(how='all', inplace=True)

In [63]:
# Drop COLUMN that has NaN
# Change `axis` value from default of 0/"index" to 1/"columns"
nba.dropna(axis=1)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0


In [64]:
# Only remove row is Null is in a specific column
# Use `subset` and provide a list of column names
nba.dropna(subset = ["Salary"])

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0
10,Jared Sullinger,Boston Celtics,7.0,C,24.0,6-9,260.0,Ohio State,2569260.0


# Section 3; Part 52
Fill in null values with the `.fillna()` method

In [65]:
nba = pd.read_csv('datasets/nba.csv')

In [67]:
# Default isn't always intuitive; This example replaces ALL NaN with 0 which doesn't make sense on "College" column
nba.fillna(0).head(10)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,0,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,0,12000000.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0


In [68]:
# Fill value on a specific column
nba["Salary"].fillna(0, inplace=True)
nba.head(10)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0


In [70]:
nba["College"].fillna("No College", inplace=True)
nba.head(10)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,No College,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,No College,12000000.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0


# Section 3; Part 53
The `astype()` method

 - Convert datatype of a series from one type to another
 - Can only convert if there are no NaN in the series

In [72]:
# Import nba and remove the last row which has all NaN
nba = pd.read_csv('datasets/nba.csv').dropna(how="all")

In [74]:
# First we need to deal with the NaN values
nba['Salary'].fillna(0, inplace=True)
nba['College'].fillna("None", inplace=True)
nba.head(6)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0


In [75]:
# In our original CSV, all of the numeric values are integers, but because of the NaN values they were imported as floats
# Show our types
nba.dtypes

Name         object
Team         object
Number      float64
Position     object
Age         float64
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

In [76]:
# Show our types (alternative)
nba.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null object
Number      457 non-null float64
Position    457 non-null object
Age         457 non-null float64
Height      457 non-null object
Weight      457 non-null float64
College     457 non-null object
Salary      457 non-null float64
dtypes: float64(4), object(5)
memory usage: 35.7+ KB


In [79]:
# Convert Salary to an int
# astype does not have an inplace parameter, so it needs to be reassigned to salary series
nba['Salary'] = nba['Salary'].astype("int")

In [80]:
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0


In [83]:
nba['Age'] = nba['Age'].astype("int")
nba['Number'] = nba['Number'].astype("int")
nba['Weight'] = nba['Weight'].astype("int")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0,PG,25,6-2,180,Texas,7730337
1,Jae Crowder,Boston Celtics,99,SF,25,6-6,235,Marquette,6796117
2,John Holland,Boston Celtics,30,SG,27,6-5,205,Boston University,0


In [84]:
# Certain datatypes take of different amounts of space. Notice that we've reduced the memory footprint compared to above
nba.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null object
Number      457 non-null int32
Position    457 non-null object
Age         457 non-null int32
Height      457 non-null object
Weight      457 non-null int32
College     457 non-null object
Salary      457 non-null int32
dtypes: int32(4), object(5)
memory usage: 28.6+ KB


In [85]:
# Category datatype
# Ideal when we have a small number of unique values in a column
#    In this example - Position column or Team column
nba['Position'].nunique()

5

In [87]:
nba['Position'] = nba['Position'].astype("category")
nba['Team'] = nba['Team'].astype("category")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0,PG,25,6-2,180,Texas,7730337
1,Jae Crowder,Boston Celtics,99,SF,25,6-6,235,Marquette,6796117
2,John Holland,Boston Celtics,30,SG,27,6-5,205,Boston University,0


In [88]:
# Check dtypes and notice we have two categories and we've reduced memory usage
nba.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 9 columns):
Name        457 non-null object
Team        457 non-null category
Number      457 non-null int32
Position    457 non-null category
Age         457 non-null int32
Height      457 non-null object
Weight      457 non-null int32
College     457 non-null object
Salary      457 non-null int32
dtypes: category(2), int32(4), object(3)
memory usage: 22.6+ KB


# Section 3; Part 52
Sort a dataframe with the `sort_values()` method

In [89]:
nba = pd.read_csv('datasets/nba.csv')

In [90]:
# Sort entire DF only by the Name column
nba.sort_values("Name")

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
152,Aaron Brooks,Chicago Bulls,0.0,PG,31.0,6-0,161.0,Oregon,2250000.0
356,Aaron Gordon,Orlando Magic,0.0,PF,20.0,6-9,220.0,Arizona,4171680.0
328,Aaron Harrison,Charlotte Hornets,9.0,SG,21.0,6-6,210.0,Kentucky,525093.0
404,Adreian Payne,Minnesota Timberwolves,33.0,PF,25.0,6-10,237.0,Michigan State,1938840.0
312,Al Horford,Atlanta Hawks,15.0,C,30.0,6-10,245.0,Florida,12000000.0
330,Al Jefferson,Charlotte Hornets,25.0,C,31.0,6-10,289.0,,13500000.0
428,Al-Farouq Aminu,Portland Trail Blazers,8.0,SF,25.0,6-9,215.0,Wake Forest,8042895.0
368,Alan Anderson,Washington Wizards,6.0,SG,33.0,6-6,220.0,Michigan State,4000000.0
135,Alan Williams,Phoenix Suns,15.0,C,23.0,6-8,260.0,UC Santa Barbara,83397.0
444,Alec Burks,Utah Jazz,10.0,SG,24.0,6-6,214.0,Colorado,9463484.0


In [92]:
# By default NaN values are placed at the end 
# Can be changed from default of na_position = "last" to na_position = "first"
nba.sort_values("Salary")

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
32,Thanasis Antetokounmpo,New York Knicks,43.0,SF,23.0,6-7,205.0,,30888.0
291,Orlando Johnson,New Orleans Pelicans,0.0,SG,27.0,6-5,220.0,UC Santa Barbara,55722.0
130,Phil Pressey,Phoenix Suns,25.0,PG,25.0,5-11,175.0,Missouri,55722.0
135,Alan Williams,Phoenix Suns,15.0,C,23.0,6-8,260.0,UC Santa Barbara,83397.0
175,Jordan McRae,Cleveland Cavaliers,12.0,SG,25.0,6-5,179.0,Tennessee,111196.0
92,Jeff Ayres,Los Angeles Clippers,19.0,PF,29.0,6-9,250.0,Arizona State,111444.0
184,Lorenzo Brown,Detroit Pistons,17.0,PG,25.0,6-5,189.0,North Carolina State,111444.0
21,Sean Kilpatrick,Brooklyn Nets,6.0,SG,26.0,6-4,219.0,Cincinnati,134215.0
45,Tony Wroten,New York Knicks,5.0,SG,23.0,6-6,205.0,Washington,167406.0
282,Bryce Dejean-Jones,New Orleans Pelicans,31.0,SG,23.0,6-6,203.0,Iowa State,169883.0


# Section 3; Part 55
Sort dataframe with `sort_values()` across multiple columns

In [93]:
nba = pd.read_csv('datasets/nba.csv')

In [94]:
# Sort by Team then Name in ascending order
nba.sort_values(["Team", "Name"])

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
312,Al Horford,Atlanta Hawks,15.0,C,30.0,6-10,245.0,Florida,12000000.0
318,Dennis Schroder,Atlanta Hawks,17.0,PG,22.0,6-1,172.0,,1763400.0
323,Jeff Teague,Atlanta Hawks,0.0,PG,27.0,6-2,186.0,Wake Forest,8000000.0
309,Kent Bazemore,Atlanta Hawks,24.0,SF,26.0,6-5,201.0,Old Dominion,2000000.0
311,Kirk Hinrich,Atlanta Hawks,12.0,SG,35.0,6-4,190.0,Kansas,2854940.0
313,Kris Humphries,Atlanta Hawks,43.0,PF,31.0,6-9,235.0,Minnesota,1000000.0
314,Kyle Korver,Atlanta Hawks,26.0,SG,35.0,6-7,212.0,Creighton,5746479.0
317,Lamar Patterson,Atlanta Hawks,13.0,SG,24.0,6-5,225.0,Pittsburgh,525093.0
316,Mike Muscala,Atlanta Hawks,31.0,PF,24.0,6-11,240.0,Bucknell,947276.0
319,Mike Scott,Atlanta Hawks,32.0,PF,27.0,6-8,237.0,Virginia,3333333.0


In [95]:
# Reverse both sorts
nba.sort_values(["Team", "Name"], ascending = False)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
379,Ramon Sessions,Washington Wizards,7.0,PG,30.0,6-3,190.0,Nevada,2170465.0
378,Otto Porter Jr.,Washington Wizards,22.0,SF,23.0,6-8,198.0,Georgetown,4662960.0
375,Nene Hilario,Washington Wizards,42.0,C,33.0,6-11,250.0,,13000000.0
376,Markieff Morris,Washington Wizards,5.0,PF,26.0,6-10,245.0,Kansas,8000000.0
381,Marcus Thornton,Washington Wizards,15.0,SF,29.0,6-4,205.0,LSU,200600.0
373,Marcin Gortat,Washington Wizards,13.0,C,32.0,6-11,240.0,,11217391.0
377,Kelly Oubre Jr.,Washington Wizards,12.0,SF,20.0,6-7,205.0,Kansas,1920240.0
382,John Wall,Washington Wizards,2.0,PG,25.0,6-4,195.0,Kentucky,15851950.0
371,Jarell Eddie,Washington Wizards,8.0,SG,24.0,6-7,218.0,Virginia Tech,561716.0
370,Jared Dudley,Washington Wizards,1.0,SF,30.0,6-7,225.0,Boston College,4375000.0


In [96]:
# Sort team ascending, but names in descending
nba.sort_values(["Team", "Name"], ascending = [True, False])

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
322,Walter Tavares,Atlanta Hawks,22.0,C,24.0,7-3,260.0,,1000000.0
310,Tim Hardaway Jr.,Atlanta Hawks,10.0,SG,24.0,6-6,205.0,Michigan,1304520.0
321,Tiago Splitter,Atlanta Hawks,11.0,C,31.0,6-11,245.0,,9756250.0
320,Thabo Sefolosha,Atlanta Hawks,25.0,SF,32.0,6-7,220.0,,4000000.0
315,Paul Millsap,Atlanta Hawks,4.0,PF,31.0,6-8,246.0,Louisiana Tech,18671659.0
319,Mike Scott,Atlanta Hawks,32.0,PF,27.0,6-8,237.0,Virginia,3333333.0
316,Mike Muscala,Atlanta Hawks,31.0,PF,24.0,6-11,240.0,Bucknell,947276.0
317,Lamar Patterson,Atlanta Hawks,13.0,SG,24.0,6-5,225.0,Pittsburgh,525093.0
314,Kyle Korver,Atlanta Hawks,26.0,SG,35.0,6-7,212.0,Creighton,5746479.0
313,Kris Humphries,Atlanta Hawks,43.0,PF,31.0,6-9,235.0,Minnesota,1000000.0


# Section 3; Part 56
Sort dataframe by `sort_index()`

 - Works very similar to Series, since it only has a single index

In [97]:
nba = pd.read_csv('datasets/nba.csv')

In [98]:
# Mess up our dataframe so the indexes are out of place
nba.sort_values(["Number", "Salary", "Name"], inplace=True)
nba.tail(10)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
75,Delon Wright,Toronto Raptors,55.0,PG,24.0,6-5,190.0,Utah,1509360.0
359,Brandon Jennings,Orlando Magic,55.0,PG,26.0,6-1,169.0,,8344497.0
222,Johnny O'Bryant III,Milwaukee Bucks,77.0,PF,23.0,6-9,257.0,LSU,845059.0
391,Joffrey Lauvergne,Denver Nuggets,77.0,C,24.0,6-11,220.0,,1709719.0
398,Nemanja Bjelica,Minnesota Timberwolves,88.0,PF,28.0,6-10,240.0,,3950001.0
372,Drew Gooden,Washington Wizards,90.0,PF,34.0,6-10,250.0,Kansas,3300000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0
68,Lucas Nogueira,Toronto Raptors,92.0,C,23.0,7-0,220.0,,1842000.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
457,,,,,,,,,


In [99]:
# Sort the index in ascending order
nba.sort_index()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
5,Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
8,Terry Rozier,Boston Celtics,12.0,PG,22.0,6-2,190.0,Louisville,1824360.0
9,Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0


# Section 3; Part 52
Rank values with the `rank()` method

 - Called on a series to generate a new series of ranks

In [101]:
# Need to remove NaNs for rank to work
nba = pd.read_csv('datasets/nba.csv').dropna(how = "all")
nba['Salary'] = nba['Salary'].fillna(0).astype("int")
nba.head(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0


In [106]:
# Give the player with highest salary a rank of 1, second highest 2, etc
#   This default ranks in reverse order (highest salary is at the end) (fix by changing ascending)
nba['Salary Rank'] = nba['Salary'].rank(ascending=False).astype("int")
nba.head(5)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Salary Rank
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337,97
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117,110
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0,452
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640,322
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000,147


In [108]:
nba.sort_values(by="Salary", ascending=False)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Salary Rank
109,Kobe Bryant,Los Angeles Lakers,24.0,SF,37.0,6-6,212.0,,25000000,1
169,LeBron James,Cleveland Cavaliers,23.0,SF,31.0,6-8,250.0,,22970500,2
33,Carmelo Anthony,New York Knicks,7.0,SF,32.0,6-8,240.0,Syracuse,22875000,3
251,Dwight Howard,Houston Rockets,12.0,C,30.0,6-11,265.0,,22359364,4
339,Chris Bosh,Miami Heat,1.0,PF,32.0,6-11,235.0,Georgia Tech,22192730,5
100,Chris Paul,Los Angeles Clippers,3.0,PG,31.0,6-0,175.0,Wake Forest,21468695,6
414,Kevin Durant,Oklahoma City Thunder,35.0,SF,27.0,6-9,240.0,Texas,20158622,7
164,Derrick Rose,Chicago Bulls,1.0,PG,27.0,6-3,190.0,Memphis,20093064,8
349,Dwyane Wade,Miami Heat,3.0,SG,34.0,6-4,220.0,Marquette,20000000,9
174,Kevin Love,Cleveland Cavaliers,0.0,PF,27.0,6-10,251.0,UCLA,19689000,11
