In [2]:
import pandas as pd

In [4]:
nba = pd.read_csv('nba.csv') # importing the df, has missing values(NaN)
nba
# Whenever a column has NaN, pandas stores that column's values as floating numbers.

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


### Methods and Attributes for DataFrames

DataFrames and Series have shared and exclusive methods and attributes between them.

In [5]:
nba.head(n = 4)
nba.tail(3)

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,,,,,,


In [6]:
nba.index

RangeIndex(start=0, stop=458, step=1)

In [9]:
nba.values

array([['Avery Bradley', 'Boston Celtics', 0.0, ..., 180.0, 'Texas',
        7730337.0],
       ['Jae Crowder', 'Boston Celtics', 99.0, ..., 235.0, 'Marquette',
        6796117.0],
       ['John Holland', 'Boston Celtics', 30.0, ..., 205.0,
        'Boston University', nan],
       ...,
       ['Tibor Pleiss', 'Utah Jazz', 21.0, ..., 256.0, nan, 2900000.0],
       ['Jeff Withey', 'Utah Jazz', 24.0, ..., 231.0, 'Kansas', 947276.0],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=object)

In [10]:
nba.shape # rows and cols

(458, 9)

In [12]:
nba.dtypes # returns types of all columns

Name         object
Team         object
Number      float64
Position     object
Age         float64
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

In [13]:
nba.columns

Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
       'College', 'Salary'],
      dtype='object')

In [14]:
nba.axes

[RangeIndex(start=0, stop=458, step=1),
 Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
        'College', 'Salary'],
       dtype='object')]

In [15]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 458 entries, 0 to 457
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      457 non-null    object 
 1   Team      457 non-null    object 
 2   Number    457 non-null    float64
 3   Position  457 non-null    object 
 4   Age       457 non-null    float64
 5   Height    457 non-null    object 
 6   Weight    457 non-null    float64
 7   College   373 non-null    object 
 8   Salary    446 non-null    float64
dtypes: float64(4), object(5)
memory usage: 32.3+ KB


### Differences between Shared Methods

In [16]:
revenue = pd.read_csv('revenue.csv', index_col = 'Date')
revenue

Unnamed: 0_level_0,New York,Los Angeles,Miami
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/16,985,122,499
1/2/16,738,788,534
1/3/16,14,20,933
1/4/16,730,904,885
1/5/16,114,71,253
1/6/16,936,502,497
1/7/16,123,996,115
1/8/16,935,492,886
1/9/16,846,954,823
1/10/16,54,285,216


In [22]:
revenue.sum() # revenue.sum(axis = 'index')

New York       5475
Los Angeles    5134
Miami          5641
dtype: int64

In [23]:
revenue.sum(axis = 'columns') # axis not needed for series because it is 1-D

Date
1/1/16     1606
1/2/16     2060
1/3/16      967
1/4/16     2519
1/5/16      438
1/6/16     1935
1/7/16     1234
1/8/16     2313
1/9/16     2623
1/10/16     555
dtype: int64

### Select columns from a DataFrame

In [15]:
nba = pd.read_csv('nba.csv')

In [26]:
nba.Name # returns a series, df is made up of series

0      Avery Bradley
1        Jae Crowder
2       John Holland
3        R.J. Hunter
4      Jonas Jerebko
           ...      
453     Shelvin Mack
454        Raul Neto
455     Tibor Pleiss
456      Jeff Withey
457              NaN
Name: Name, Length: 458, dtype: object

In [27]:
type(nba.Name)

pandas.core.series.Series

In [30]:
nba['Name'].head() # use this syntax. Beacuse it works even when column names have a space. 

0    Avery Bradley
1      Jae Crowder
2     John Holland
3      R.J. Hunter
4    Jonas Jerebko
Name: Name, dtype: object

In [31]:
nba[['Name', 'Team']] # selecting multiple cols instead of just 1, returns a df

Unnamed: 0,Name,Team
0,Avery Bradley,Boston Celtics
1,Jae Crowder,Boston Celtics
2,John Holland,Boston Celtics
3,R.J. Hunter,Boston Celtics
4,Jonas Jerebko,Boston Celtics
...,...,...
453,Shelvin Mack,Utah Jazz
454,Raul Neto,Utah Jazz
455,Tibor Pleiss,Utah Jazz
456,Jeff Withey,Utah Jazz


In [32]:
columns_to_select = ['Salary', 'Team', 'Name'] # neater way of doing it
nba[columns_to_select]     

Unnamed: 0,Salary,Team,Name
0,7730337.0,Boston Celtics,Avery Bradley
1,6796117.0,Boston Celtics,Jae Crowder
2,,Boston Celtics,John Holland
3,1148640.0,Boston Celtics,R.J. Hunter
4,5000000.0,Boston Celtics,Jonas Jerebko
...,...,...,...
453,2433333.0,Utah Jazz,Shelvin Mack
454,900000.0,Utah Jazz,Raul Neto
455,2900000.0,Utah Jazz,Tibor Pleiss
456,947276.0,Utah Jazz,Jeff Withey


### Add new column to a DF

In [16]:
nba['Sport'] = 'Basketball' # adding col with value, by default added to the end of df
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Sport
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0,Basketball
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0,Basketball
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,,Basketball
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0,Basketball
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0,Basketball


In [17]:
nba.insert(loc = 3, column = 'League', value = 'National Basketball Assoc') # add col at specific location in df
nba.head()

Unnamed: 0,Name,Team,Number,League,Position,Age,Height,Weight,College,Salary,Sport
0,Avery Bradley,Boston Celtics,0.0,National Basketball Assoc,PG,25.0,6-2,180.0,Texas,7730337.0,Basketball
1,Jae Crowder,Boston Celtics,99.0,National Basketball Assoc,SF,25.0,6-6,235.0,Marquette,6796117.0,Basketball
2,John Holland,Boston Celtics,30.0,National Basketball Assoc,SG,27.0,6-5,205.0,Boston University,,Basketball
3,R.J. Hunter,Boston Celtics,28.0,National Basketball Assoc,SG,22.0,6-5,185.0,Georgia State,1148640.0,Basketball
4,Jonas Jerebko,Boston Celtics,8.0,National Basketball Assoc,PF,29.0,6-10,231.0,,5000000.0,Basketball


### Create New Column from Existing Column

In [19]:
nba['Age in a Decade'] = nba['Age'] + 10  # adds a new col to df using values from a col already in df

# nba['Age in a Decade'] = nba['Age'].add(10) # alt method syntax, .sub(), .mul() etc also exist 
nba.head()

Unnamed: 0,Name,Team,Number,League,Position,Age,Height,Weight,College,Salary,Sport,Age in a Decade
0,Avery Bradley,Boston Celtics,0.0,National Basketball Assoc,PG,25.0,6-2,180.0,Texas,7730337.0,Basketball,35.0
1,Jae Crowder,Boston Celtics,99.0,National Basketball Assoc,SF,25.0,6-6,235.0,Marquette,6796117.0,Basketball,35.0
2,John Holland,Boston Celtics,30.0,National Basketball Assoc,SG,27.0,6-5,205.0,Boston University,,Basketball,37.0
3,R.J. Hunter,Boston Celtics,28.0,National Basketball Assoc,SG,22.0,6-5,185.0,Georgia State,1148640.0,Basketball,32.0
4,Jonas Jerebko,Boston Celtics,8.0,National Basketball Assoc,PF,29.0,6-10,231.0,,5000000.0,Basketball,39.0


In [20]:
nba['Age'] = nba['Age'] + 10 # overwrites existing cols values with new values

### Review of value_counts()

In [22]:
nba.value_counts().head(n=3) # useless when done in entire df. 

Name           Team           Number  League                     Position  Age   Height  Weight  College   Salary     Sport       Age in a Decade
Aaron Brooks   Chicago Bulls  0.0     National Basketball Assoc  PG        41.0  6-0     161.0   Oregon    2250000.0  Basketball  41.0               1
Mike Muscala   Atlanta Hawks  31.0    National Basketball Assoc  PF        34.0  6-11    240.0   Bucknell  947276.0   Basketball  34.0               1
Mike Dunleavy  Chicago Bulls  34.0    National Basketball Assoc  SG        45.0  6-9     230.0   Duke      4500000.0  Basketball  45.0               1
dtype: int64

In [23]:
nba['Position'].value_counts() # useful when done on a series (returns count of distinct values)

SG    102
PF    100
PG     92
SF     85
C      78
Name: Position, dtype: int64

### Drop Rows with Null Values

In [3]:
nba = pd.read_csv('nba.csv')
nba.tail()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,,,,,,


In [26]:
nba.dropna()
nba.dropna(how = 'any') # drops rows which have NaN value in any col

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
...,...,...,...,...,...,...,...,...,...
449,Rodney Hood,Utah Jazz,5.0,SG,23.0,6-8,206.0,Duke,1348440.0
451,Chris Johnson,Utah Jazz,23.0,SF,26.0,6-6,206.0,Dayton,981348.0
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0


In [28]:
nba.dropna(how = 'all') # drops rows which have NaN value in all col only

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0


In [29]:
nba.dropna(subset = ['College']) # drops rows which have NaN value in College
nba.dropna(subset = ['College', 'Salary']) # drops rows which have NaN value in College or Salary

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
6,Jordan Mickey,Boston Celtics,55.0,PF,21.0,6-8,235.0,LSU,1170960.0
7,Kelly Olynyk,Boston Celtics,41.0,C,25.0,7-0,238.0,Gonzaga,2165160.0
...,...,...,...,...,...,...,...,...,...
449,Rodney Hood,Utah Jazz,5.0,SG,23.0,6-8,206.0,Duke,1348440.0
451,Chris Johnson,Utah Jazz,23.0,SF,26.0,6-6,206.0,Dayton,981348.0
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0


#### Fill in Missing DataFrame values with fillna()

In [12]:
# nba.fillna(0) # replaceas all missing values with 0 across cols, useless
nba['College'] = nba['College'].fillna(value = 'Unknown') # applied to a series
nba                                                       # impacts the original df as series acts as a view

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,Unknown,5000000.0
...,...,...,...,...,...,...,...,...,...
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,Unknown,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,Unknown,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [14]:
nba['Salary'].copy().fillna(0.0, inplace = True) #creates a copy of the series, does not affect original df

In [15]:
nba

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,Unknown,5000000.0
...,...,...,...,...,...,...,...,...,...
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,Unknown,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,Unknown,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


### The astype Method

Method to convert column values from one type to another. Can help conserve memory as well.

In [21]:
nba = pd.read_csv('nba.csv').dropna(how = 'all')
nba.tail()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [20]:
nba['Age'].hasnans # checking for NaN values, type conversion of col values wont work if NaN values exist in it.

# Can either delete rows or fill in missing values using methods.

False

In [19]:
nba['Age'] = nba['Age'].astype('int') # convert age col values from float to int
nba.tail()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
452,Trey Lyles,Utah Jazz,41.0,PF,20,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26,7-0,231.0,Kansas,947276.0


#### Category Columns

Convert col to category col to save memory, use when col has defined set of distinct values smaller in number compared to total number of entries(rows).

In [22]:
len(nba)

457

In [24]:
nba['Position'].nunique()

5

In [25]:
nba.dtypes
nba.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      457 non-null    object 
 1   Team      457 non-null    object 
 2   Number    457 non-null    float64
 3   Position  457 non-null    object 
 4   Age       457 non-null    float64
 5   Height    457 non-null    object 
 6   Weight    457 non-null    float64
 7   College   373 non-null    object 
 8   Salary    446 non-null    float64
dtypes: float64(4), object(5)
memory usage: 35.7+ KB


In [27]:
nba['Position'] = nba['Position'].astype('category') # converting Position column to category column, ratio is 5-> 457 rows

In [29]:
nba['Position'] # the dytpe is now category

0      PG
1      SF
2      SG
3      SG
4      PF
       ..
452    PF
453    PG
454    PG
455     C
456     C
Name: Position, Length: 457, dtype: category
Categories (5, object): ['C', 'PF', 'PG', 'SF', 'SG']

In [28]:
nba.info() # overall memory occupied by nba df has reduced

<class 'pandas.core.frame.DataFrame'>
Int64Index: 457 entries, 0 to 456
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   Name      457 non-null    object  
 1   Team      457 non-null    object  
 2   Number    457 non-null    float64 
 3   Position  457 non-null    category
 4   Age       457 non-null    float64 
 5   Height    457 non-null    object  
 6   Weight    457 non-null    float64 
 7   College   373 non-null    object  
 8   Salary    446 non-null    float64 
dtypes: category(1), float64(4), object(4)
memory usage: 32.8+ KB


### Sort a DataFrame with sort_values()

In [30]:
nba = pd.read_csv('nba.csv')
nba.tail()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
457,,,,,,,,,


In [32]:
nba.sort_values(by = 'Name', ascending = True) # sort rows(entries) by Name, default is ascending order

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
152,Aaron Brooks,Chicago Bulls,0.0,PG,31.0,6-0,161.0,Oregon,2250000.0
356,Aaron Gordon,Orlando Magic,0.0,PF,20.0,6-9,220.0,Arizona,4171680.0
328,Aaron Harrison,Charlotte Hornets,9.0,SG,21.0,6-6,210.0,Kentucky,525093.0
404,Adreian Payne,Minnesota Timberwolves,33.0,PF,25.0,6-10,237.0,Michigan State,1938840.0
312,Al Horford,Atlanta Hawks,15.0,C,30.0,6-10,245.0,Florida,12000000.0
...,...,...,...,...,...,...,...,...,...
270,Xavier Munford,Memphis Grizzlies,14.0,PG,24.0,6-3,180.0,Rhode Island,
402,Zach LaVine,Minnesota Timberwolves,8.0,PG,21.0,6-5,189.0,UCLA,2148360.0
271,Zach Randolph,Memphis Grizzlies,50.0,PF,34.0,6-9,260.0,Michigan State,9638555.0
237,Zaza Pachulia,Dallas Mavericks,27.0,C,32.0,6-11,275.0,,5200000.0


In [36]:
nba.sort_values('Salary', na_position = 'last') # keeps NaN values(salary) at end by default
nba = nba.sort_values('Salary', na_position = 'first') # NaN values put first, we did overwriting here into nba
nba

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
46,Elton Brand,Philadelphia 76ers,42.0,PF,37.0,6-9,254.0,Duke,
171,Dahntay Jones,Cleveland Cavaliers,30.0,SG,35.0,6-6,225.0,Duke,
264,Jordan Farmar,Memphis Grizzlies,4.0,PG,29.0,6-2,180.0,UCLA,
269,Ray McCallum,Memphis Grizzlies,5.0,PG,24.0,6-3,190.0,Detroit,
...,...,...,...,...,...,...,...,...,...
339,Chris Bosh,Miami Heat,1.0,PF,32.0,6-11,235.0,Georgia Tech,22192730.0
251,Dwight Howard,Houston Rockets,12.0,C,30.0,6-11,265.0,,22359364.0
33,Carmelo Anthony,New York Knicks,7.0,SF,32.0,6-8,240.0,Syracuse,22875000.0
169,LeBron James,Cleveland Cavaliers,23.0,SF,31.0,6-8,250.0,,22970500.0


In [37]:
nba = pd.read_csv('nba.csv').dropna(how = 'all')
nba.tail()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
452,Trey Lyles,Utah Jazz,41.0,PF,20.0,6-10,234.0,Kentucky,2239800.0
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [38]:
# Sorting DF by multiple columns

nba.sort_values(by = ['Team', 'Name'], ascending = True) # sorts by Team and in each Team by Name, both in asc order

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
312,Al Horford,Atlanta Hawks,15.0,C,30.0,6-10,245.0,Florida,12000000.0
318,Dennis Schroder,Atlanta Hawks,17.0,PG,22.0,6-1,172.0,,1763400.0
323,Jeff Teague,Atlanta Hawks,0.0,PG,27.0,6-2,186.0,Wake Forest,8000000.0
309,Kent Bazemore,Atlanta Hawks,24.0,SF,26.0,6-5,201.0,Old Dominion,2000000.0
311,Kirk Hinrich,Atlanta Hawks,12.0,SG,35.0,6-4,190.0,Kansas,2854940.0
...,...,...,...,...,...,...,...,...,...
381,Marcus Thornton,Washington Wizards,15.0,SF,29.0,6-4,205.0,LSU,200600.0
376,Markieff Morris,Washington Wizards,5.0,PF,26.0,6-10,245.0,Kansas,8000000.0
375,Nene Hilario,Washington Wizards,42.0,C,33.0,6-11,250.0,,13000000.0
378,Otto Porter Jr.,Washington Wizards,22.0,SF,23.0,6-8,198.0,Georgetown,4662960.0


In [39]:
nba.sort_values(['Team', 'Name'], ascending = [True, False]) # sorts by Team in asc order and in each Team by Name in desc

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
322,Walter Tavares,Atlanta Hawks,22.0,C,24.0,7-3,260.0,,1000000.0
310,Tim Hardaway Jr.,Atlanta Hawks,10.0,SG,24.0,6-6,205.0,Michigan,1304520.0
321,Tiago Splitter,Atlanta Hawks,11.0,C,31.0,6-11,245.0,,9756250.0
320,Thabo Sefolosha,Atlanta Hawks,25.0,SF,32.0,6-7,220.0,,4000000.0
315,Paul Millsap,Atlanta Hawks,4.0,PF,31.0,6-8,246.0,Louisiana Tech,18671659.0
...,...,...,...,...,...,...,...,...,...
374,JJ Hickson,Washington Wizards,21.0,C,27.0,6-9,242.0,North Carolina State,273038.0
380,Garrett Temple,Washington Wizards,17.0,SG,30.0,6-6,195.0,LSU,1100602.0
372,Drew Gooden,Washington Wizards,90.0,PF,34.0,6-10,250.0,Kansas,3300000.0
369,Bradley Beal,Washington Wizards,3.0,SG,22.0,6-5,207.0,Florida,5694674.0


### Sort DataFrame by its Index

In [41]:
nba = nba.sort_values(['Team', 'Name'])
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
312,Al Horford,Atlanta Hawks,15.0,C,30.0,6-10,245.0,Florida,12000000.0
318,Dennis Schroder,Atlanta Hawks,17.0,PG,22.0,6-1,172.0,,1763400.0
323,Jeff Teague,Atlanta Hawks,0.0,PG,27.0,6-2,186.0,Wake Forest,8000000.0
309,Kent Bazemore,Atlanta Hawks,24.0,SF,26.0,6-5,201.0,Old Dominion,2000000.0
311,Kirk Hinrich,Atlanta Hawks,12.0,SG,35.0,6-4,190.0,Kansas,2854940.0


In [43]:
nba = nba.sort_index(ascending = True) # sorting by index -> here it is number(index can be a number,strings etc)
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


### Rank Values with rank Method

In [46]:
nba = pd.read_csv('nba.csv').dropna(how = 'all')
nba['Salary'] = nba['Salary'].fillna(0).astype('int')
nba.head()

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,0
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000


In [47]:
nba['Salary'].rank() # provides ranks for values in the Salary column (ascending = True, rank 1 to smallest salary)

0      361.0
1      348.0
2        6.0
3      136.0
4      311.0
       ...  
452    208.0
453    217.0
454     75.0
455    243.5
456     91.0
Name: Salary, Length: 457, dtype: float64

In [48]:
nba['Salary'].rank(ascending = False).astype('int') # Provides rank 1 to biggest Salary, better

0       97
1      110
2      452
3      322
4      147
      ... 
452    250
453    241
454    383
455    214
456    367
Name: Salary, Length: 457, dtype: int32

In [49]:
nba['Salary Rank'] = nba['Salary'].rank(ascending = False).astype('int') # 
nba.sort_values('Salary', ascending = False)

# Salary rank and value, desc order, same value will give same rank 

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary,Salary Rank
109,Kobe Bryant,Los Angeles Lakers,24.0,SF,37.0,6-6,212.0,,25000000,1
169,LeBron James,Cleveland Cavaliers,23.0,SF,31.0,6-8,250.0,,22970500,2
33,Carmelo Anthony,New York Knicks,7.0,SF,32.0,6-8,240.0,Syracuse,22875000,3
251,Dwight Howard,Houston Rockets,12.0,C,30.0,6-11,265.0,,22359364,4
339,Chris Bosh,Miami Heat,1.0,PF,32.0,6-11,235.0,Georgia Tech,22192730,5
...,...,...,...,...,...,...,...,...,...,...
353,Dorell Wright,Miami Heat,11.0,SF,30.0,6-9,205.0,,0,452
264,Jordan Farmar,Memphis Grizzlies,4.0,PG,29.0,6-2,180.0,UCLA,0,452
409,Greg Smith,Minnesota Timberwolves,4.0,PF,25.0,6-10,250.0,Fresno State,0,452
273,Alex Stepheson,Memphis Grizzlies,35.0,PF,28.0,6-10,270.0,USC,0,452
