# Pandas sort_values()

This is a notebook for the medium article [11 Tricks to Master Values Sorting in Pandas](https://bindichen.medium.com/11-tricks-to-master-values-sorting-in-pandas-7f2cfbf19730)

Please check out article for instructions

**License**: [BSD 2-Clause](https://opensource.org/licenses/BSD-2-Clause)


In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({
    'product': ['keyboard', 'mouse', 'desk', 'monitor', 'chair'],
    'category': ['C', 'C', 'O', 'C', 'O'],
    'year': [2002, 2002, 2005, 2001, 2003],
    'cost': ['$52', '$24', '$250', '$500', '$150'],
    'promotion_time': ['20hr', '30hr', '20hr', '20hr', '2hr'],
})

df

Unnamed: 0,product,category,year,cost,promotion_time
0,keyboard,C,2002,$52,20hr
1,mouse,C,2002,$24,30hr
2,desk,O,2005,$250,20hr
3,monitor,C,2001,$500,20hr
4,chair,O,2003,$150,2hr


## 1.Sorting a Series

In [3]:
# Sorting a series, result is in ASC by default
df['year'].sort_values()

3    2001
0    2002
1    2002
4    2003
2    2005
Name: year, dtype: int64

In [4]:
# For a DESC result
df['year'].sort_values(ascending=False)

2    2005
4    2003
1    2002
0    2002
3    2001
Name: year, dtype: int64

## 2. Sorting a Dataframe by a single column

In [5]:
df.sort_values('year')

Unnamed: 0,product,category,year,cost,promotion_time
3,monitor,C,2001,$500,20hr
0,keyboard,C,2002,$52,20hr
1,mouse,C,2002,$24,30hr
4,chair,O,2003,$150,2hr
2,desk,O,2005,$250,20hr


In [6]:
df.sort_values('year', ascending=False)

Unnamed: 0,product,category,year,cost,promotion_time
2,desk,O,2005,$250,20hr
4,chair,O,2003,$150,2hr
0,keyboard,C,2002,$52,20hr
1,mouse,C,2002,$24,30hr
3,monitor,C,2001,$500,20hr


## 3. Sorting a DataFrame by multiple columns

In [7]:
df.sort_values(['category', 'year'])

Unnamed: 0,product,category,year,cost,promotion_time
3,monitor,C,2001,$500,20hr
0,keyboard,C,2002,$52,20hr
1,mouse,C,2002,$24,30hr
4,chair,O,2003,$150,2hr
2,desk,O,2005,$250,20hr


In [8]:
df.sort_values(['category', 'year'], ascending=[True, False])

Unnamed: 0,product,category,year,cost,promotion_time
0,keyboard,C,2002,$52,20hr
1,mouse,C,2002,$24,30hr
3,monitor,C,2001,$500,20hr
2,desk,O,2005,$250,20hr
4,chair,O,2003,$150,2hr


## 4. Applying transformation before sorting

In [9]:
# sort by the string value because of the $ symbol
df.sort_values('cost')

Unnamed: 0,product,category,year,cost,promotion_time
4,chair,O,2003,$150,2hr
1,mouse,C,2002,$24,30hr
2,desk,O,2005,$250,20hr
3,monitor,C,2001,$500,20hr
0,keyboard,C,2002,$52,20hr


In [10]:
df.sort_values(
    'cost', 
    key=lambda val: val.str.replace('$', '').astype('float64')
)

Unnamed: 0,product,category,year,cost,promotion_time
1,mouse,C,2002,$24,30hr
0,keyboard,C,2002,$52,20hr
4,chair,O,2003,$150,2hr
2,desk,O,2005,$250,20hr
3,monitor,C,2001,$500,20hr


## 5. Apply transformation on multiple columns

In [11]:
def sort_by_cost_time(x):
    if x.name == 'cost':
        return x.str.replace('$', '').astype('float64')
    elif x.name == 'promotion_time':
        return x.str.replace('hr', '').astype('int')
    else:
        return x

In [12]:
df.sort_values(['year', 'promotion_time', 'cost'], key=sort_by_cost_time)

Unnamed: 0,product,category,year,cost,promotion_time
3,monitor,C,2001,$500,20hr
0,keyboard,C,2002,$52,20hr
1,mouse,C,2002,$24,30hr
4,chair,O,2003,$150,2hr
2,desk,O,2005,$250,20hr


## 6. Dealing with Custom Sort

In [13]:
df = pd.DataFrame({
    'cloth_id': [1001, 1002, 1003, 1004, 1005, 1006],
    'size': ['S', 'XL', 'M', 'XS', 'L', 'S'],
})
df

Unnamed: 0,cloth_id,size
0,1001,S
1,1002,XL
2,1003,M
3,1004,XS
4,1005,L
5,1006,S


In [14]:
from pandas.api.types import CategoricalDtype

cat_size_order = CategoricalDtype(
    ['XS', 'S', 'M', 'L', 'XL'], 
    ordered=True
)

In [15]:
df['size'] = df['size'].astype(cat_size_order)
df['size']

0     S
1    XL
2     M
3    XS
4     L
5     S
Name: size, dtype: category
Categories (5, object): ['XS' < 'S' < 'M' < 'L' < 'XL']

In [16]:
df.sort_values('size')

Unnamed: 0,cloth_id,size
3,1004,XS
0,1001,S
5,1006,S
2,1003,M
4,1005,L
1,1002,XL


## 7. Handling missing values

In [17]:
df = pd.DataFrame({
    'id': [4, 2, 3, np.nan, 6, 5],
    'name': ['A', 'B', 'C', 'D', 'E', 'F'],
})

df.sort_values('id')

Unnamed: 0,id,name
1,2.0,B
2,3.0,C
0,4.0,A
5,5.0,F
4,6.0,E
3,,D


In [18]:
df.sort_values('id', na_position='first')

Unnamed: 0,id,name
3,,D
1,2.0,B
2,3.0,C
0,4.0,A
5,5.0,F
4,6.0,E


## 8. Sorting values in place

In [19]:
df = pd.DataFrame({
    'id': [4, 2, 3, np.nan, 6, 5],
    'name': ['A', 'B', 'C', 'D', 'E', 'F'],
})
df

Unnamed: 0,id,name
0,4.0,A
1,2.0,B
2,3.0,C
3,,D
4,6.0,E
5,5.0,F


In [20]:
df.sort_values('id', inplace=True)

In [21]:
df

Unnamed: 0,id,name
1,2.0,B
2,3.0,C
0,4.0,A
5,5.0,F
4,6.0,E
3,,D


## 9. Ignoring the index

In [22]:
df = pd.DataFrame({
    'id': [4, 2, 3, np.nan, 6, 5],
    'name': ['A', 'B', 'C', 'D', 'E', 'F'],
})
df

Unnamed: 0,id,name
0,4.0,A
1,2.0,B
2,3.0,C
3,,D
4,6.0,E
5,5.0,F


In [23]:
df.sort_values('id')

Unnamed: 0,id,name
1,2.0,B
2,3.0,C
0,4.0,A
5,5.0,F
4,6.0,E
3,,D


In [24]:
df.sort_values('id', ignore_index=True)

Unnamed: 0,id,name
0,2.0,B
1,3.0,C
2,4.0,A
3,5.0,F
4,6.0,E
5,,D


## 10. Sorting a DataFrame by row(s)

In [25]:
df = pd.DataFrame({
    'Tom': [70, 67, 90, 50],
    'Jacky': [90, 85, 76, 70],
    'Lucy': [56, 60, 75, 77],
}, index=['Math', 'Chemistry', 'Physics', 'Art'])
df

Unnamed: 0,Tom,Jacky,Lucy
Math,70,90,56
Chemistry,67,85,60
Physics,90,76,75
Art,50,70,77


In [26]:
df.sort_values('Chemistry', axis=1)

Unnamed: 0,Lucy,Tom,Jacky
Math,56,70,90
Chemistry,60,67,85
Physics,75,90,76
Art,77,50,70


## 11. Choosing different sorting algorithms

In [29]:
df.sort_values('Chemistry', kind='heapsort', axis=1)

Unnamed: 0,Lucy,Tom,Jacky
Math,56,70,90
Chemistry,60,67,85
Physics,75,90,76
Art,77,50,70


### Thanks for reading

This is a notebook for the medium article [11 Tricks to Master Values Sorting in Pandas](https://bindichen.medium.com/11-tricks-to-master-values-sorting-in-pandas-7f2cfbf19730)

Please check out article for instructions

**License**: [BSD 2-Clause](https://opensource.org/licenses/BSD-2-Clause)