In [1]:
import pandas as pd

## Methods and Attributes between Series and DataFrames
- A **DataFrame** is a 2-dimensional table consisting of rows and columns.
- Pandas uses a `NaN` designation for cells that have a missing value. It is short for "not a number". Most operations on `NaN` values will produce `NaN` values.
- Like with a **Series**, Pandas assigns an index position/label to each **DataFrame** row.
- The **DataFrame** and **Series** have common and exclusive methods/attributes.
- The `hasnans` attribute exists only a **Series**. The `columns` attribute exists only on a **DataFrame**.
- Some methods/attributes will return different types of data.
- The `info` method returns a summary of the pandas object.

In [2]:
nba=pd.read_csv("nba.csv")

In [3]:
nba.head()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0


In [4]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [5]:
nba.tail()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0
591,,,,,,,


In [6]:
s = pd.Series([1, 2, 3, 4, 5])
s

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [7]:
m=nba["Name"]

In [8]:
m

0             Saddiq Bey
1      Bogdan Bogdanovic
2            Kobe Bufkin
3           Clint Capela
4         Bruno Fernando
             ...        
587         Ryan Rollins
588        Landry Shamet
589     Tristan Vukcevic
590         Delon Wright
591                  NaN
Name: Name, Length: 592, dtype: object

In [9]:
s.index

RangeIndex(start=0, stop=5, step=1)

In [10]:
nba.index

RangeIndex(start=0, stop=592, step=1)

In [11]:
s.values

array([1, 2, 3, 4, 5])

In [12]:
nba.values

array([['Saddiq Bey', 'Atlanta Hawks', 'F', ..., 215.0, 'Villanova',
        4556983.0],
       ['Bogdan Bogdanovic', 'Atlanta Hawks', 'G', ..., 225.0,
        'Fenerbahce', 18700000.0],
       ['Kobe Bufkin', 'Atlanta Hawks', 'G', ..., 195.0, 'Michigan',
        4094244.0],
       ...,
       ['Tristan Vukcevic', 'Washington Wizards', 'F', ..., 220.0,
        'Real Madrid', nan],
       ['Delon Wright', 'Washington Wizards', 'G', ..., 185.0, 'Utah',
        8195122.0],
       [nan, nan, nan, ..., nan, nan, nan]], shape=(592, 7), dtype=object)

In [13]:
s.shape

(5,)

In [14]:
nba.shape

(592, 7)

In [15]:
s.dtypes

dtype('int64')

In [16]:
nba.dtypes

Name         object
Team         object
Position     object
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

In [17]:
s.hasnans

False

In [18]:
nba.columns

Index(['Name', 'Team', 'Position', 'Height', 'Weight', 'College', 'Salary'], dtype='object')

In [19]:
s.axes

[RangeIndex(start=0, stop=5, step=1)]

In [20]:
nba.axes

[RangeIndex(start=0, stop=592, step=1),
 Index(['Name', 'Team', 'Position', 'Height', 'Weight', 'College', 'Salary'], dtype='object')]

In [21]:
s.info()

<class 'pandas.core.series.Series'>
RangeIndex: 5 entries, 0 to 4
Series name: None
Non-Null Count  Dtype
--------------  -----
5 non-null      int64
dtypes: int64(1)
memory usage: 172.0 bytes


In [22]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 592 entries, 0 to 591
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      591 non-null    object 
 1   Team      591 non-null    object 
 2   Position  584 non-null    object 
 3   Height    585 non-null    object 
 4   Weight    584 non-null    float64
 5   College   578 non-null    object 
 6   Salary    488 non-null    float64
dtypes: float64(2), object(5)
memory usage: 32.5+ KB


## Differences between Shared Methods
- The `sum` method adds a **Series's** values.
- On a **DataFrame**, the `sum` method defaults to adding the values by traversing the index (row values).
- The `axis` parameter customizes the direction that we add across. Pass `"columns"` or `1` to add "across" the columns.

In [23]:
revenue=pd.read_csv("revenue.csv",index_col="Date")
revenue

Unnamed: 0_level_0,New York,Los Angeles,Miami
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/26,985,122,499
1/2/26,738,788,534
1/3/26,14,20,933
1/4/26,730,904,885
1/5/26,114,71,253
1/6/26,936,502,497
1/7/26,123,996,115
1/8/26,935,492,886
1/9/26,846,954,823
1/10/26,54,285,216


In [24]:
revenue.sum()
#here by default the it starts from 0th index of each row and after that only goes to next index of each row

New York       5475
Los Angeles    5134
Miami          5641
dtype: int64

In [25]:
revenue.sum(axis="index")
# same as default method

New York       5475
Los Angeles    5134
Miami          5641
dtype: int64

In [26]:
revenue.sum(axis="columns")
#here it adds all the index of each column , then only it goes to the next row 

Date
1/1/26     1606
1/2/26     2060
1/3/26      967
1/4/26     2519
1/5/26      438
1/6/26     1935
1/7/26     1234
1/8/26     2313
1/9/26     2623
1/10/26     555
dtype: int64

## Select One Column from a DataFrame
- We can use attribute syntax (`df.column_name`) to select a column from a **DataFrame**. The syntax will not work if the column name has spaces.
- We can also use square bracket syntax (`df["column name"]`) which will work for any column name.
- Pandas extracts a column from a **DataFrame** as a **Series**.
- The **Series** is a view, so changes to the **Series** *will* affect the **DataFrame**.
- Pandas will display a warning if you mutate the **Series**. Use the `copy` method to create a duplicate.

In [27]:
nba = pd.read_csv("nba.csv")
nba.head()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0


In [28]:
nba["Team"]

0           Atlanta Hawks
1           Atlanta Hawks
2           Atlanta Hawks
3           Atlanta Hawks
4           Atlanta Hawks
              ...        
587    Washington Wizards
588    Washington Wizards
589    Washington Wizards
590    Washington Wizards
591                   NaN
Name: Team, Length: 592, dtype: object

In [29]:
names = nba["Name"].copy()
names

0             Saddiq Bey
1      Bogdan Bogdanovic
2            Kobe Bufkin
3           Clint Capela
4         Bruno Fernando
             ...        
587         Ryan Rollins
588        Landry Shamet
589     Tristan Vukcevic
590         Delon Wright
591                  NaN
Name: Name, Length: 592, dtype: object

In [30]:
names.iloc[0] = "Whatever"
names.head()

0             Whatever
1    Bogdan Bogdanovic
2          Kobe Bufkin
3         Clint Capela
4       Bruno Fernando
Name: Name, dtype: object

In [31]:
nba.head()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0


## Select Multiple Columns from a DataFrame
- Use square brackets with a list of names to extract multiple **DataFrame** columns.
- Pandas stores the result in a new **DataFrame** (a copy).

In [32]:
nba=pd.read_csv("nba.csv")
nba.head()

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0


In [33]:
nba[["Name","Team"]]

Unnamed: 0,Name,Team
0,Saddiq Bey,Atlanta Hawks
1,Bogdan Bogdanovic,Atlanta Hawks
2,Kobe Bufkin,Atlanta Hawks
3,Clint Capela,Atlanta Hawks
4,Bruno Fernando,Atlanta Hawks
...,...,...
587,Ryan Rollins,Washington Wizards
588,Landry Shamet,Washington Wizards
589,Tristan Vukcevic,Washington Wizards
590,Delon Wright,Washington Wizards


In [34]:
## instead of list inside a list we can assign to a single variable 
columns_to_select=["Position","Height","Weight"]
nba[columns_to_select]

Unnamed: 0,Position,Height,Weight
0,F,6-7,215.0
1,G,6-5,225.0
2,G,6-5,195.0
3,C,6-10,256.0
4,F-C,6-10,240.0
...,...,...,...
587,G,6-3,180.0
588,G,6-4,190.0
589,F,6-10,220.0
590,G,6-5,185.0


## Add New Column to DataFrame
- Use square bracket extraction syntax with an equal sign to add a new **Series** to a **DataFrame**.
- The `insert` method allows us to insert an element at a specific column index.
- On the right-hand side, we can reference an existing **DataFrame** column and perform a broadcasting operation on it to create the new **Series**.

In [35]:
nba=pd.read_csv("nba.csv")
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [36]:
## to add columns at the end
nba["sport"]="Basketball"

In [37]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary,sport
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0,Basketball
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0,Basketball
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0,Basketball
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0,Basketball
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0,Basketball
...,...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0,Basketball
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0,Basketball
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,,Basketball
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0,Basketball


In [38]:
## to add columns inbetween 
nba.insert(loc=2,column="Sport",value="Basketball")

In [39]:
nba

Unnamed: 0,Name,Team,Sport,Position,Height,Weight,College,Salary,sport
0,Saddiq Bey,Atlanta Hawks,Basketball,F,6-7,215.0,Villanova,4556983.0,Basketball
1,Bogdan Bogdanovic,Atlanta Hawks,Basketball,G,6-5,225.0,Fenerbahce,18700000.0,Basketball
2,Kobe Bufkin,Atlanta Hawks,Basketball,G,6-5,195.0,Michigan,4094244.0,Basketball
3,Clint Capela,Atlanta Hawks,Basketball,C,6-10,256.0,Elan Chalon,20616000.0,Basketball
4,Bruno Fernando,Atlanta Hawks,Basketball,F-C,6-10,240.0,Maryland,2581522.0,Basketball
...,...,...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,Basketball,G,6-3,180.0,Toledo,1719864.0,Basketball
588,Landry Shamet,Washington Wizards,Basketball,G,6-4,190.0,Wichita State,10250000.0,Basketball
589,Tristan Vukcevic,Washington Wizards,Basketball,F,6-10,220.0,Real Madrid,,Basketball
590,Delon Wright,Washington Wizards,Basketball,G,6-5,185.0,Utah,8195122.0,Basketball


In [40]:
nba["Salary Doubled"]=nba["Salary"] * 2

In [41]:
nba

Unnamed: 0,Name,Team,Sport,Position,Height,Weight,College,Salary,sport,Salary Doubled
0,Saddiq Bey,Atlanta Hawks,Basketball,F,6-7,215.0,Villanova,4556983.0,Basketball,9113966.0
1,Bogdan Bogdanovic,Atlanta Hawks,Basketball,G,6-5,225.0,Fenerbahce,18700000.0,Basketball,37400000.0
2,Kobe Bufkin,Atlanta Hawks,Basketball,G,6-5,195.0,Michigan,4094244.0,Basketball,8188488.0
3,Clint Capela,Atlanta Hawks,Basketball,C,6-10,256.0,Elan Chalon,20616000.0,Basketball,41232000.0
4,Bruno Fernando,Atlanta Hawks,Basketball,F-C,6-10,240.0,Maryland,2581522.0,Basketball,5163044.0
...,...,...,...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,Basketball,G,6-3,180.0,Toledo,1719864.0,Basketball,3439728.0
588,Landry Shamet,Washington Wizards,Basketball,G,6-4,190.0,Wichita State,10250000.0,Basketball,20500000.0
589,Tristan Vukcevic,Washington Wizards,Basketball,F,6-10,220.0,Real Madrid,,Basketball,
590,Delon Wright,Washington Wizards,Basketball,G,6-5,185.0,Utah,8195122.0,Basketball,16390244.0


## A Review of the value_counts Method
- The `value_counts` method counts the number of times that each unique value occurs in a **Series**.

In [42]:
nba=pd.read_csv("nba.csv")
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [43]:
nba["Team"].value_counts(ascending=True)

Team
New Orleans Pelicans      16
Brooklyn Nets             17
Portland Trail Blazers    17
Golden State Warriors     17
Orlando Magic             18
Sacramento Kings          18
Boston Celtics            18
Atlanta Hawks             18
Los Angeles Clippers      19
Los Angeles Lakers        19
Detroit Pistons           19
Minnesota Timberwolves    19
Cleveland Cavaliers       19
Chicago Bulls             19
Charlotte Hornets         20
Houston Rockets           20
Phoenix Suns              20
San Antonio Spurs         20
Utah Jazz                 21
Toronto Raptors           21
Indiana Pacers            21
Philadelphia 76ers        21
New York Knicks           21
Oklahoma City Thunder     21
Washington Wizards        21
Miami Heat                22
Denver Nuggets            22
Memphis Grizzlies         22
Milwaukee Bucks           22
Dallas Mavericks          23
Name: count, dtype: int64

## Drop Rows with Missing Values
- Pandas uses a `NaN` designation for cells that have a missing value.
- The `dropna` method deletes rows with missing values. Its default behavior is to remove a row if it has *any* missing values.
- Pass the `how` parameter an argument of "all" to delete rows where all the values are `NaN`.
- The `subset` parameters customizes/limits the columns that pandas will use to drop rows with missing values.

In [44]:
nba=pd.read_csv("nba.csv")
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [45]:
nba.dropna()
## these removes all the rows that are having a missing value

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
585,Eugene Omoruyi,Washington Wizards,F,6-6,235.0,Oregon,559782.0
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0


In [46]:
nba.dropna(how="any")
## these removes all the rows that are having a missing value

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
585,Eugene Omoruyi,Washington Wizards,F,6-6,235.0,Oregon,559782.0
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0


In [47]:
nba.dropna(how="all")
## these removes the rows where all the columns have a null value

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,


In [48]:
nba.dropna(subset=["Team","Salary"])
## these removes the only rows in which we specifying the columns that has a null value

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
585,Eugene Omoruyi,Washington Wizards,F,6-6,235.0,Oregon,559782.0
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0


## Fill in Missing Values with the fillna Method
- The `fillna` method replaces missing `NaN` values with its argument.
- The `fillna` method is available on both **DataFrames** and **Series**.
- An extracted **Series** is a view on the original **DataFrame**, but the `fillna` method returns a copy.

In [49]:
nba=pd.read_csv("nba.csv").dropna(how="all")
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,


In [50]:
nba.dropna(how="all")


Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,


In [51]:
nba["Salary"]=nba["Salary"].fillna(value=0.0)
## here the rhs side doesnt make a view , it  makes a copy so we agian assign it to the salary column

In [52]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,0.0


In [53]:
nba["Salary"]=nba["Team"].fillna(value="Unknown")

## The astype Method I
- The `astype` method converts a **Series's** values to a specified type.
- Pass in the specified type as either a string or the core Python data type.
- Pandas cannot convert `NaN` values to numeric types, so we need to eliminate/replace them before we perform the conversion.
- The `dtypes` attribute returns a **Series** with the **DataFrame's** columns and their types.

In [54]:
nba=pd.read_csv("nba.csv").dropna(how="all")
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,


In [55]:
nba.dtypes

Name         object
Team         object
Position     object
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

In [56]:
nba["Salary"]=nba["Salary"].fillna(0)

In [57]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,0.0


In [58]:
nba["Salary"]=nba["Salary"].astype(int)

In [59]:
nba.dtypes

Name         object
Team         object
Position     object
Height       object
Weight      float64
College      object
Salary        int64
dtype: object

## The astype Method II
- The `category` type is ideal for columns with a limited number of unique values.
- The `nunique` method will return a **Series** with the number of unique values in each column.
- With categories, pandas does not create a separate value in memory for each "cell". Rather, the cells point to a single copy for each unique value.

In [60]:
nba=pd.read_csv("nba.csv").dropna(how="all")
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,


In [61]:
nba.nunique()

Name        591
Team         30
Position      7
Height       20
Weight       93
College     182
Salary      298
dtype: int64

In [62]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
Index: 591 entries, 0 to 590
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      591 non-null    object 
 1   Team      591 non-null    object 
 2   Position  584 non-null    object 
 3   Height    585 non-null    object 
 4   Weight    584 non-null    float64
 5   College   578 non-null    object 
 6   Salary    488 non-null    float64
dtypes: float64(2), object(5)
memory usage: 36.9+ KB


In [63]:
nba["Position"]=nba["Position"].astype(dtype="category")

In [64]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,


## Sort a DataFrame with the sort_values Method I
- The `sort_values` method sorts a **DataFrame** by the values in one or more columns. The default sort is an ascending one (alphabetical for strings).
- The first parameter (`by`) expects the column(s) to sort by.
- If sorting by a single column, pass a string with its name.
- The `ascending` parameter customizes the sort order.
- The `na_position` parameter customizes where pandas places `NaN` values.

In [65]:
nba=pd.read_csv("nba.csv").dropna(how="all")
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,


In [66]:
nba.sort_values(by="Name")

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
122,A.J. Lawson,Dallas Mavericks,G,6-6,179.0,South Carolina,
324,AJ Green,Milwaukee Bucks,G,6-5,190.0,Northern Iowa,1901769.0
6,AJ Griffin,Atlanta Hawks,F,6-6,220.0,Duke,3712920.0
141,Aaron Gordon,Denver Nuggets,F,6-8,235.0,Arizona,22266182.0
198,Aaron Holiday,Houston Rockets,G,6-0,185.0,UCLA,2346614.0
...,...,...,...,...,...,...,...
515,Zach Collins,San Antonio Spurs,F-C,6-11,250.0,Gonzaga,7700000.0
83,Zach LaVine,Chicago Bulls,G,6-5,200.0,UCLA,40064220.0
149,Zeke Nnaji,Denver Nuggets,F-C,6-9,240.0,Arizona,4306281.0
291,Ziaire Williams,Memphis Grizzlies,F,6-9,185.0,Stanford,4810200.0


In [67]:
nba.sort_values(by="Name",ascending=False)

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
370,Zion Williamson,New Orleans Pelicans,F,6-6,284.0,Duke,34005250.0
291,Ziaire Williams,Memphis Grizzlies,F,6-9,185.0,Stanford,4810200.0
149,Zeke Nnaji,Denver Nuggets,F-C,6-9,240.0,Arizona,4306281.0
83,Zach LaVine,Chicago Bulls,G,6-5,200.0,UCLA,40064220.0
515,Zach Collins,San Antonio Spurs,F-C,6-11,250.0,Gonzaga,7700000.0
...,...,...,...,...,...,...,...
198,Aaron Holiday,Houston Rockets,G,6-0,185.0,UCLA,2346614.0
141,Aaron Gordon,Denver Nuggets,F,6-8,235.0,Arizona,22266182.0
6,AJ Griffin,Atlanta Hawks,F,6-6,220.0,Duke,3712920.0
324,AJ Green,Milwaukee Bucks,G,6-5,190.0,Northern Iowa,1901769.0


In [68]:
nba.sort_values(by="Salary",na_position="first")

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
23,Blake Griffin,Boston Celtics,F,6-9,250.0,Oklahoma,
26,Mfiondu Kabengele,Boston Celtics,C,6-10,250.0,Florida State,
28,Svi Mykhailiuk,Boston Celtics,G-F,6-7,205.0,Kansas,
35,Robert Williams III,Boston Celtics,C-F,6-9,237.0,Texas A&M,
39,Nic Claxton,Brooklyn Nets,C,6-11,215.0,Georgia,
...,...,...,...,...,...,...,...
261,LeBron James,Los Angeles Lakers,F,6-9,250.0,St. Vincent-St. Mary HS (OH),47607350.0
145,Nikola Jokic,Denver Nuggets,C,6-11,284.0,Mega Basket,47607350.0
436,Joel Embiid,Philadelphia 76ers,C-F,7-0,280.0,Kansas,47607350.0
461,Kevin Durant,Phoenix Suns,F,6-10,240.0,Texas,47649433.0


## Sort a DataFrame with the sort_values Method II
- To sort by multiple columns, pass the `by` parameter a list of column names. Pandas will sort in the specified column order (first to last).
- Pass the `ascending` parameter a Boolean to sort all columns in a consistent order (all ascending or all descending).
- Pass `ascending` a list to customize the sort order *per* column. The `ascending` list length must match the `by` list.

In [69]:
nba=pd.read_csv("nba.csv")
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


In [70]:
nba.sort_values(by=["Team","Name"])
## these sort values by team and then in each team it sort values by name

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
6,AJ Griffin,Atlanta Hawks,F,6-6,220.0,Duke,3712920.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
8,De'Andre Hunter,Atlanta Hawks,F-G,6-8,221.0,Virginia,20089286.0
...,...,...,...,...,...,...,...
578,Taj Gibson,Washington Wizards,F,6-9,232.0,Southern California,
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
580,Tyus Jones,Washington Wizards,G,6-2,196.0,Duke,14000000.0
573,Xavier Cooks,Washington Wizards,F,6-8,183.0,Winthrop,1719864.0


In [71]:
nba.sort_values(by=["Team","Name"],ascending=[True,False])
## here these sort values by team in ascending order and then in each team it sort the names in descending order

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
12,Wesley Matthews,Atlanta Hawks,G,6-5,220.0,Marquette,3196448.0
5,Trent Forrest,Atlanta Hawks,G,6-4,210.0,Florida State,508891.0
17,Trae Young,Atlanta Hawks,G,6-1,164.0,Oklahoma,40064220.0
10,Seth Lundy,Atlanta Hawks,G,6-6,220.0,Penn State,559782.0
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
...,...,...,...,...,...,...,...
576,Daniel Gafford,Washington Wizards,F-C,6-10,234.0,Arkansas,12402000.0
581,Corey Kispert,Washington Wizards,F,6-6,224.0,Gonzaga,3722040.0
574,Bilal Coulibaly,Washington Wizards,G,6-6,195.0,Metropolitans 92,6614256.0
579,Anthony Gill,Washington Wizards,F,6-8,230.0,Virginia,1997238.0


## Sort a DataFrame by its Index
- The `sort_index` method sorts the **DataFrame** by its index positions/labels.

In [72]:
nba = pd.read_csv("nba.csv")
nba = nba.sort_values(["Team", "Name"])
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
6,AJ Griffin,Atlanta Hawks,F,6-6,220.0,Duke,3712920.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
8,De'Andre Hunter,Atlanta Hawks,F-G,6-8,221.0,Virginia,20089286.0
...,...,...,...,...,...,...,...
578,Taj Gibson,Washington Wizards,F,6-9,232.0,Southern California,
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
580,Tyus Jones,Washington Wizards,G,6-2,196.0,Duke,14000000.0
573,Xavier Cooks,Washington Wizards,F,6-8,183.0,Winthrop,1719864.0


In [73]:
nba.sort_index(ascending=True)

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522.0
...,...,...,...,...,...,...,...
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864.0
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,
590,Delon Wright,Washington Wizards,G,6-5,185.0,Utah,8195122.0


## Rank Values with the rank Method
- The `rank` method assigns a numeric ranking to each **Series** value.
- Pandas will assign the same rank to equal values and create a "gap" in the dataset for the ranks.

In [74]:
nba = pd.read_csv("nba.csv").dropna(how="all")
nba["Salary"] = nba["Salary"].fillna(0).astype(int)
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,0


In [75]:
nba["Salary"].rank()
## here by default ascending = true it gives rank based on the salary that is lowest=1st rank

0      361.0
1      512.0
2      349.0
3      523.0
4      284.0
       ...  
586    544.0
587    197.5
588    452.0
589     52.0
590    429.0
Name: Salary, Length: 591, dtype: float64

In [76]:
nba["Salary Rank"]=nba["Salary"].rank(ascending=False)

In [77]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary,Salary Rank
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983,231.0
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000,80.0
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244,243.0
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000,69.0
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522,308.0
...,...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357,48.0
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864,394.5
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000,140.0
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,0,540.0


In [78]:
nba.sort_values("Salary",ascending=False)

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary,Salary Rank
175,Stephen Curry,Golden State Warriors,G,6-2,185.0,Davidson,51915615,1.0
461,Kevin Durant,Phoenix Suns,F,6-10,240.0,Texas,47649433,2.0
145,Nikola Jokic,Denver Nuggets,C,6-11,284.0,Mega Basket,47607350,4.0
436,Joel Embiid,Philadelphia 76ers,C-F,7-0,280.0,Kansas,47607350,4.0
261,LeBron James,Los Angeles Lakers,F,6-9,250.0,St. Vincent-St. Mary HS (OH),47607350,4.0
...,...,...,...,...,...,...,...,...
547,Gary Trent Jr.,Toronto Raptors,G-F,6-5,209.0,Duke,0,540.0
23,Blake Griffin,Boston Celtics,F,6-9,250.0,Oklahoma,0,540.0
26,Mfiondu Kabengele,Boston Celtics,C,6-10,250.0,Florida State,0,540.0
28,Svi Mykhailiuk,Boston Celtics,G-F,6-7,205.0,Kansas,0,540.0


## Rank values with a rank method

- The rank method assigns a numeric ranking to each series value.
- pandas will assign the same rank to equal values and create a 'gap' in the dataset for the ranks.

In [79]:
nba = pd.read_csv("nba.csv").dropna(how='all')
nba['Salary']=nba['Salary'].fillna(0).astype(int)
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522
...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,0


In [80]:
nba['Salary'].rank()
## by default ascending = True , therefore here with the lowest salary is ranked first 


0      361.0
1      512.0
2      349.0
3      523.0
4      284.0
       ...  
586    544.0
587    197.5
588    452.0
589     52.0
590    429.0
Name: Salary, Length: 591, dtype: float64

In [81]:
nba['Salary'].rank(ascending=False)
## now here the man with the highest salary is ranked first

0      231.0
1       80.0
2      243.0
3       69.0
4      308.0
       ...  
586     48.0
587    394.5
588    140.0
589    540.0
590    163.0
Name: Salary, Length: 591, dtype: float64

In [82]:
nba['Salary Rank']=nba["Salary"].rank(ascending=False).astype(int)

In [83]:
nba

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary,Salary Rank
0,Saddiq Bey,Atlanta Hawks,F,6-7,215.0,Villanova,4556983,231
1,Bogdan Bogdanovic,Atlanta Hawks,G,6-5,225.0,Fenerbahce,18700000,80
2,Kobe Bufkin,Atlanta Hawks,G,6-5,195.0,Michigan,4094244,243
3,Clint Capela,Atlanta Hawks,C,6-10,256.0,Elan Chalon,20616000,69
4,Bruno Fernando,Atlanta Hawks,F-C,6-10,240.0,Maryland,2581522,308
...,...,...,...,...,...,...,...,...
586,Jordan Poole,Washington Wizards,G,6-4,194.0,Michigan,27955357,48
587,Ryan Rollins,Washington Wizards,G,6-3,180.0,Toledo,1719864,394
588,Landry Shamet,Washington Wizards,G,6-4,190.0,Wichita State,10250000,140
589,Tristan Vukcevic,Washington Wizards,F,6-10,220.0,Real Madrid,0,540


In [84]:
nba.sort_values(by='Salary',ascending=False)

Unnamed: 0,Name,Team,Position,Height,Weight,College,Salary,Salary Rank
175,Stephen Curry,Golden State Warriors,G,6-2,185.0,Davidson,51915615,1
461,Kevin Durant,Phoenix Suns,F,6-10,240.0,Texas,47649433,2
145,Nikola Jokic,Denver Nuggets,C,6-11,284.0,Mega Basket,47607350,4
436,Joel Embiid,Philadelphia 76ers,C-F,7-0,280.0,Kansas,47607350,4
261,LeBron James,Los Angeles Lakers,F,6-9,250.0,St. Vincent-St. Mary HS (OH),47607350,4
...,...,...,...,...,...,...,...,...
547,Gary Trent Jr.,Toronto Raptors,G-F,6-5,209.0,Duke,0,540
23,Blake Griffin,Boston Celtics,F,6-9,250.0,Oklahoma,0,540
26,Mfiondu Kabengele,Boston Celtics,C,6-10,250.0,Florida State,0,540
28,Svi Mykhailiuk,Boston Celtics,G-F,6-7,205.0,Kansas,0,540
