# DataFrames I

- A 2d table consisting of rows and columns
- `NaN` are for missing values
- `hasnans` attribute exists only on series. `columns` exists onli on a dataframe
- `info` method returns a summary of the pandas object

In [2]:
import pandas as pd

In [3]:
# Load datasets
nba_df = pd.read_csv("nba.csv")

In [7]:
s = pd.Series([1,2,3,4,5])

In [10]:
print(s.index, nba_df.index)

RangeIndex(start=0, stop=5, step=1) RangeIndex(start=0, stop=592, step=1)


In [14]:
# we can see dfs are 2 dimensional arrays
nba_df.values

array([['Saddiq Bey', 'Atlanta Hawks', 'F', ..., 215.0, 'Villanova',
        4556983.0],
       ['Bogdan Bogdanovic', 'Atlanta Hawks', 'G', ..., 225.0,
        'Fenerbahce', 18700000.0],
       ['Kobe Bufkin', 'Atlanta Hawks', 'G', ..., 195.0, 'Michigan',
        4094244.0],
       ...,
       ['Tristan Vukcevic', 'Washington Wizards', 'F', ..., 220.0,
        'Real Madrid', nan],
       ['Delon Wright', 'Washington Wizards', 'G', ..., 185.0, 'Utah',
        8195122.0],
       [nan, nan, nan, ..., nan, nan, nan]], shape=(592, 7), dtype=object)

In [15]:
s.shape

(5,)

In [16]:
nba_df.shape

(592, 7)

In [17]:
s.dtypes

dtype('int64')

In [19]:
nba_df.dtypes

Name         object
Team         object
Position     object
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

In [20]:
s.hasnans

False

In [33]:
nba_df['Salary'].hasnans

True

In [24]:
nba_df.columns

Index(['Name', 'Team', 'Position', 'Height', 'Weight', 'College', 'Salary'], dtype='object')

In [34]:
nba_df.axes

[RangeIndex(start=0, stop=592, step=1),
 Index(['Name', 'Team', 'Position', 'Height', 'Weight', 'College', 'Salary'], dtype='object')]

In [27]:
nba_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 592 entries, 0 to 591
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      591 non-null    object 
 1   Team      591 non-null    object 
 2   Position  584 non-null    object 
 3   Height    585 non-null    object 
 4   Weight    584 non-null    float64
 5   College   578 non-null    object 
 6   Salary    488 non-null    float64
dtypes: float64(2), object(5)
memory usage: 32.5+ KB


## Differences between Shared Methods

In [39]:
revenue_df = pd.read_csv("revenue.csv", index_col="Date")
revenue_df

Unnamed: 0_level_0,New York,Los Angeles,Miami
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/26,985,122,499
1/2/26,738,788,534
1/3/26,14,20,933
1/4/26,730,904,885
1/5/26,114,71,253
1/6/26,936,502,497
1/7/26,123,996,115
1/8/26,935,492,886
1/9/26,846,954,823
1/10/26,54,285,216


In [41]:
revenue_df.sum() # gives sums per columns

New York       5475
Los Angeles    5134
Miami          5641
dtype: int64

In [42]:
revenue_df.sum(axis="index") # traverse index and add 

New York       5475
Los Angeles    5134
Miami          5641
dtype: int64

In [43]:
revenue_df.sum(axis="columns") # traverse the columns and add that up per index

Date
1/1/26     1606
1/2/26     2060
1/3/26      967
1/4/26     2519
1/5/26      438
1/6/26     1935
1/7/26     1234
1/8/26     2313
1/9/26     2623
1/10/26     555
dtype: int64

In [45]:
revenue_df.sum(axis="columns").sum()

np.int64(16250)

## Select One Column from a DataFrame
- Columns out of the data frame are series
- Series pulled out is a view of the df

In [46]:
nba_df.Team

0           Atlanta Hawks
1           Atlanta Hawks
2           Atlanta Hawks
3           Atlanta Hawks
4           Atlanta Hawks
              ...        
587    Washington Wizards
588    Washington Wizards
589    Washington Wizards
590    Washington Wizards
591                   NaN
Name: Team, Length: 592, dtype: object

In [47]:
nba_df.Salary

0       4556983.0
1      18700000.0
2       4094244.0
3      20616000.0
4       2581522.0
          ...    
587     1719864.0
588    10250000.0
589           NaN
590     8195122.0
591           NaN
Name: Salary, Length: 592, dtype: float64

In [48]:
nba_df["Name"]

0             Saddiq Bey
1      Bogdan Bogdanovic
2            Kobe Bufkin
3           Clint Capela
4         Bruno Fernando
             ...        
587         Ryan Rollins
588        Landry Shamet
589     Tristan Vukcevic
590         Delon Wright
591                  NaN
Name: Name, Length: 592, dtype: object

## Select Multiple Columns from a DataFrame
- When passing a list of column names to the accessor of a dataframe a df is returned
- This df is a copy and not a view which is weird and different behavior to the series/column extraction

In [49]:
nba_df[["Name", "Team"]] # Returns back dataframe and this is a copy and not a view

Unnamed: 0,Name,Team
0,Saddiq Bey,Atlanta Hawks
1,Bogdan Bogdanovic,Atlanta Hawks
2,Kobe Bufkin,Atlanta Hawks
3,Clint Capela,Atlanta Hawks
4,Bruno Fernando,Atlanta Hawks
...,...,...
587,Ryan Rollins,Washington Wizards
588,Landry Shamet,Washington Wizards
589,Tristan Vukcevic,Washington Wizards
590,Delon Wright,Washington Wizards


## Add New Column to DataFrame


In [4]:
nba = nba_df.copy()

In [5]:
nba["Sport"] = "Basketball" # Insert value in every row of column. Can also be series same length and indexes

In [6]:
nba.head()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary,Sport
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0,Basketball
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0,Basketball
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0,Basketball
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0,Basketball
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0,Basketball


In [12]:
nba = nba_df.copy()

In [8]:
nba.insert(loc=3, column="Sport", value="Basketball") # Insert column name sport instantiate with scalar Basketball at column index 3

In [9]:
nba.head()

Unnamed: 0,Name,Team,Position,Sport,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,Basketball,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,Basketball,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,Basketball,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,Basketball,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,Basketball,6-10,240.0,Maryland,2581522.0


In [13]:
nba["Salary Doubled"] = nba["Salary"] * 2

In [14]:
nba.head()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary,Salary Doubled
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0,9113966.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0,37400000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0,8188488.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0,41232000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0,5163044.0


In [26]:
nba["First Name"] = nba["Name"].map(lambda name: name.split(" ")[0] if type(name) == str else name)
nba["Last Name"] = nba["Name"].map(lambda name: name.split(" ")[1] if type(name) == str else name)

In [27]:
nba.head()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary,Salary Doubled,First Name,Last Name
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0,9113966.0,Saddiq,Bey
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0,37400000.0,Bogdan,Bogdanovic
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0,8188488.0,Kobe,Bufkin
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0,41232000.0,Clint,Capela
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0,5163044.0,Bruno,Fernando


## Drop Rows with Missing Values
- `dropna` method deletes rows with `NaN` aka missing values.
- pass the `how` param an arg of `"all"` to delete rows where all columns have `NaN`
- `subset` param customizes/limits cols that pandas will use to drop rows with `NaN`

In [45]:
nba = nba_df.copy()

In [46]:
nba = nba.dropna()

In [47]:
nba["Salary"].hasnans

False

In [48]:
nba = nba_df.copy()

In [49]:
nba.dropna(how="all", inplace=True)

In [50]:
nba["Salary"].hasnans

True

In [51]:
nba = nba_df.copy()

In [52]:
nba.dropna(subset=["College"], inplace=True)

In [53]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,


In [54]:
nba_df

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [55]:
nba = nba_df.copy()

In [56]:
nba.dropna(subset=["College", "Salary"], inplace=True) # If nan in college or salary for that row drop it

In [57]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
585,Eugene Omoruyi,Washington Wizards,F,6-6,235.0,Oregon,559782.0
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0


In [58]:
nba_df

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


## Fill in Missing Values with the fillna Method

In [66]:
nba = nba_df.copy().dropna(how="all")

In [62]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,


In [63]:
nba.fillna(0) # Replace all missing with zero

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,0.0


In [67]:
nba["Salary"] = nba["Salary"].fillna(0) # Just do to series so that string based columns don't get a number

In [69]:
nba.iloc[589]

Name          Tristan Vukcevic
Team        Washington Wizards
Position                     F
Height                    6-10
Weight                   220.0
College            Real Madrid
Salary                     0.0
Name: 589, dtype: object

In [70]:
nba["College"] = nba["College"].fillna(value="Unknown")

In [71]:
nba["College"].value_counts()

College
Kentucky               29
Duke                   25
UCLA                   15
Unknown                13
Kansas                 12
                       ..
Wisconsin-Milwaukee     1
Winthrop                1
Olimpia Milano          1
Bucknell                1
Toledo                  1
Name: count, Length: 183, dtype: int64

## The astype Method 1

In [76]:
nba = nba_df.copy().dropna(how="all")
nba["Salary"] = nba["Salary"].fillna(0)
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,0.0


In [77]:
nba.dtypes

Name         object
Team         object
Position     object
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

In [79]:
nba["Salary"] = nba["Salary"].astype(int)

In [80]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,0


In [81]:
nba.dtypes

Name         object
Team         object
Position     object
Height       object
Weight      float64
College      object
Salary        int64
dtype: object

In [83]:
nba["Weight"] = nba["Weight"].fillna(0).astype("int")

In [84]:
nba.dtypes

Name        object
Team        object
Position    object
Height      object
Weight       int64
College     object
Salary       int64
dtype: object

In [85]:
nba.head()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215,Villanova,4556983
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225,Fenerbahce,18700000
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195,Michigan,4094244
3,Clint Capela,Atlanta Hawks,C,6-10,256,Elan Chalon,20616000
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240,Maryland,2581522


## The astype Method 2
- The `category` type is ideal for columns with a limited number of unique values
- The `nunique` method will return a **Series** with the number of unique values in every column
- With categories, pandas does not create a seperate value in memory for each "cell".

In [89]:
nba = nba_df.copy().dropna(how="all")

In [90]:
nba["Team"] = nba["Team"].astype("category")

In [94]:
nba["Team"]

0           Atlanta Hawks
1           Atlanta Hawks
2           Atlanta Hawks
3           Atlanta Hawks
4           Atlanta Hawks
              ...        
586    Washington Wizards
587    Washington Wizards
588    Washington Wizards
589    Washington Wizards
590    Washington Wizards
Name: Team, Length: 591, dtype: category
Categories (30, object): ['Atlanta Hawks', 'Boston Celtics', 'Brooklyn Nets', 'Charlotte Hornets', ..., 'San Antonio Spurs', 'Toronto Raptors', 'Utah Jazz', 'Washington Wizards']

In [93]:
nba["Team"].nunique()

30

In [95]:
nba["College"] = nba["College"].astype("category")

In [99]:
nba.dtypes

Name          object
Team        category
Position      object
Height        object
Weight       float64
College     category
Salary       float64
dtype: object

In [102]:
nba.nunique()

Name        591
Team         30
Position      7
Height       20
Weight       93
College     182
Salary      298
dtype: int64

In [103]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
Index: 591 entries, 0 to 590
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   Name      591 non-null    object  
 1   Team      591 non-null    category
 2   Position  584 non-null    object  
 3   Height    585 non-null    object  
 4   Weight    584 non-null    float64 
 5   College   578 non-null    category
 6   Salary    488 non-null    float64 
dtypes: category(2), float64(2), object(3)
memory usage: 36.2+ KB


In [104]:
nba["Position"] = nba["Position"].astype("category")

In [105]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
Index: 591 entries, 0 to 590
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   Name      591 non-null    object  
 1   Team      591 non-null    category
 2   Position  584 non-null    category
 3   Height    585 non-null    object  
 4   Weight    584 non-null    float64 
 5   College   578 non-null    category
 6   Salary    488 non-null    float64 
dtypes: category(3), float64(2), object(2)
memory usage: 32.5+ KB


## Sort Values

In [106]:
nba = nba_df.copy()
nba.tail()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0
591,,,,,,,


In [110]:
nba.sort_values(by="Salary", na_position='last') # na_position first, last

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
5,Trent Forrest,Atlanta Hawks,G,6-4,210.0,Florida State,508891.0
22,JD Davison,Boston Celtics,G,6-1,195.0,Alabama,559782.0
55,Leaky Black,Charlotte Hornets,F,6-9,209.0,North Carolina,559782.0
52,Jalen Wilson,Brooklyn Nets,F,6-8,220.0,Kansas,559782.0
585,Eugene Omoruyi,Washington Wizards,F,6-6,235.0,Oregon,559782.0
...,...,...,...,...,...,...,...
547,Gary Trent Jr.,Toronto Raptors,G-F,6-5,209.0,Duke,
578,Taj Gibson,Washington Wizards,F,6-9,232.0,Southern California,
584,Kendrick Nunn,Washington Wizards,G,6-3,190.0,Oakland,
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,


In [112]:
nba.sort_values(by=["College", "Name"]) # Can sort by multiple

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
63,Brandon Miller,Charlotte Hornets,F,6-9,200.0,Alabama,10880364.0
568,Collin Sexton,Utah Jazz,G,6-2,190.0,Alabama,17325000.0
361,Herbert Jones,New Orleans Pelicans,F,6-7,206.0,Alabama,
22,JD Davison,Boston Celtics,G,6-1,195.0,Alabama,559782.0
177,JaMychal Green,Golden State Warriors,F-C,6-9,227.0,Alabama,
...,...,...,...,...,...,...,...
564,Nick Ongenda,Utah Jazz,,,,,1119563.0
74,Onuralp Bitim,Chicago Bulls,F,6-6,215.0,,559782.0
134,Souley Boum,Denver Nuggets,,,,,1119563.0
274,Tarik Biberovic,Memphis Grizzlies,F,6-7,218.0,,


In [115]:
nba.sort_values(by=["Team", "Name"], ascending=[True, False]) # Combine with multiple boolean array for ascending for
# Alphabetical team and reverse alphabitized names in each team

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
12,Wesley Matthews,Atlanta Hawks,G,6-5,220.0,Marquette,3196448.0
5,Trent Forrest,Atlanta Hawks,G,6-4,210.0,Florida State,508891.0
17,Trae Young,Atlanta Hawks,G,6-1,164.0,Oklahoma,40064220.0
10,Seth Lundy,Atlanta Hawks,G,6-6,220.0,Penn State,559782.0
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
...,...,...,...,...,...,...,...
576,Daniel Gafford,Washington Wizards,F-C,6-10,234.0,Arkansas,12402000.0
581,Corey Kispert,Washington Wizards,F,6-6,224.0,Gonzaga,3722040.0
574,Bilal Coulibaly,Washington Wizards,G,6-6,195.0,Metropolitans 92,6614256.0
579,Anthony Gill,Washington Wizards,F,6-8,230.0,Virginia,1997238.0


## Rank Values with the rank Method
- the `rank` method assigns a numeric ranking to each **Series** value
- Pandas will assign the same rank to equal values and create a "gap" in the dataset for the ranks.

In [118]:
nba = nba_df.copy()
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [120]:
nba["Salary"] = nba["Salary"].fillna(0).astype(int)

In [122]:
nba["Salary"].rank(ascending=False) # Highest salary rank 0, Lowest rank (length of the series - 1)

0      231.0
1       80.0
2      243.0
3       69.0
4      308.0
       ...  
587    394.5
588    140.0
589    540.5
590    163.0
591    540.5
Name: Salary, Length: 592, dtype: float64

In [124]:
nba["Salary Rank"] = nba["Salary"].rank(ascending=False).astype(int)
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary,Salary Rank
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983,231
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000,80
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244,243
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000,69
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522,308
...,...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864,394
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000,140
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,0,540
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122,163


In [126]:
nba.sort_values(by="Salary", ascending=False).tail()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary,Salary Rank
35,Robert Williams III,Boston Celtics,C-F,6-9,237.0,Texas A&M,0,540
47,Dennis Smith Jr.,Brooklyn Nets,G,6-2,205.0,North Carolina State,0,540
28,Svi Mykhailiuk,Boston Celtics,G-F,6-7,205.0,Kansas,0,540
23,Blake Griffin,Boston Celtics,F,6-9,250.0,Oklahoma,0,540
26,Mfiondu Kabengele,Boston Celtics,C,6-10,250.0,Florida State,0,540


In [127]:
nba.sort_values(by="Salary", ascending=False).head()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary,Salary Rank
175,Stephen Curry,Golden State Warriors,G,6-2,185.0,Davidson,51915615,1
461,Kevin Durant,Phoenix Suns,F,6-10,240.0,Texas,47649433,2
436,Joel Embiid,Philadelphia 76ers,C-F,7-0,280.0,Kansas,47607350,4
261,LeBron James,Los Angeles Lakers,F,6-9,250.0,St. Vincent-St. Mary HS (OH),47607350,4
145,Nikola Jokic,Denver Nuggets,C,6-11,284.0,Mega Basket,47607350,4
