## **[MySQL ORDER BY](https://www.mysqltutorial.org/mysql-order-by/)**

Sort the rows in a result set using the MySQL ORDER BY clause.

In [1]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from IPython.display import Image, SVG

from sqlalchemy_utils import database_exists, create_database
from sqlalchemy import create_engine, inspect, MetaData, text
from sqlalchemy_schemadisplay import create_schema_graph
import pymysql

pd.set_option(
    'display.max_columns', None,
    'expand_frame_repr', True,
    # 'max_rows', 10, 
    'display.max_colwidth', None,
    'display.max_rows', 10,
    # 'precision', 2,
    # 'width', 45
)

pd.set_option('display.width', 65)

In [2]:
connect_args={'ssl':{'fake_flag_to_enable_tls': True}}

engine = create_engine('mysql+pymysql://namlq:abc123@localhost/classicmodels',
                       connect_args=connect_args, echo=False
                         )
inspector = inspect(engine)

### Introduction to the MySQL ORDER BY clause

syntax:

### MySQL ORDER BY examples

#### A) Sort the result set by one column

In [3]:
string = '''
SELECT
    contactLastName,
    contactFirstName
FROM
    customers
ORDER BY
    contactLastname;
'''

df1 = pd.read_sql(string, engine)

In [4]:
df2 = (pd
 .read_sql_table('customers', engine,
                 columns=['contactLastName', 'contactFirstName'])
 .sort_values(by='contactLastName',
              kind='mergesort', # {'quicksort', 'mergesort', 'heapsort', 'stable'}
              ignore_index=True,
              # pandas is CASE-SENSITIVE, sql is CASE-insensitive
              key=lambda col: col.str.upper())
)

In [5]:
df1.equals(df2)

True

In [6]:
df1.head()

Unnamed: 0,contactLastName,contactFirstName
0,Accorti,Paolo
1,"Altagar,G M",Raanan
2,Andersen,Mel
3,Anton,Carmen
4,Ashworth,Rachel


In [7]:
string = '''
SELECT
    contactLastName,
    contactFirstName
FROM
    customers
ORDER BY
    contactLastName DESC; 
'''

df1 = pd.read_sql(string, engine)

In [8]:
df2 = (pd
 .read_sql_table('customers', engine,
                 columns=['contactLastName', 'contactFirstName']
                )
 .sort_values(by='contactLastName', ascending=False,
              kind='mergesort', 
              ignore_index=True,
              key=lambda col: col.str.upper())
)

In [9]:
df1.equals(df2)

True

In [10]:
df1.head()

Unnamed: 0,contactLastName,contactFirstName
0,Young,Jeff
1,Young,Julie
2,Young,Mary
3,Young,Dorothy
4,Yoshido,Juri


#### B) Sort the result set by multiple columns

In [11]:
string = '''
SELECT
    contactLastName,
    contactFirstName
FROM
    customers
ORDER BY
    contactLastName DESC,
    contactFirstName ASC;
'''

df1 = pd.read_sql(string, engine)

In [12]:
df2 = (pd
 .read_sql_table('customers', engine,
                 columns=['contactLastName', 'contactFirstName'])
 .sort_values(by=['contactLastName', 'contactFirstName'],
              ascending=[False, True],
              # kind='heapsort', 
              ignore_index=True,
              key = lambda col: col.str.upper())
)

In [13]:
df1.equals(df2)

True

In [14]:
df1.head()

Unnamed: 0,contactLastName,contactFirstName
0,Young,Dorothy
1,Young,Jeff
2,Young,Julie
3,Young,Mary
4,Yoshido,Juri


#### C) Sort the result set by an expression

In [15]:
string = '''
SELECT
    orderNumber,
    orderLineNumber,
    quantityOrdered * priceEach AS subtotal
FROM
    orderdetails
ORDER BY
    orderNumber, subtotal DESC;
'''

df1 = pd.read_sql(string, engine)

In [16]:
df2 = (pd
 .read_sql_table('orderdetails', engine)
 .assign(subtotal = lambda df_: df_.quantityOrdered * df_.priceEach)
 [['orderNumber', 'orderLineNumber', 'subtotal']]
 .sort_values(by=['orderNumber', 'subtotal'], 
              kind='quicksort', # 'quicksort', 'mergesort', 'heapsort', 'stable'
              ascending=[True, False],
              ignore_index=True)
      )

In [17]:
df1.equals(df2)

False

In [18]:
(df1 != df2).sum()

orderNumber          0
orderLineNumber      0
subtotal           745
dtype: int64

In [19]:
(df1.subtotal - df2.subtotal).abs().sum()

3.6004621506435797e-10

In [20]:
df1.head()

Unnamed: 0,orderNumber,orderLineNumber,subtotal
0,10100,3,4080.0
1,10100,2,2754.5
2,10100,1,1729.21
3,10100,4,1660.12
4,10101,1,4343.56


#### D) Sort data using a custom list

syntax of `FIELD()` function:

In [21]:
pd.read_sql("SELECT FIELD ('A', 'A', 'B', 'C') as position", engine)

Unnamed: 0,position
0,1


In [22]:
pd.read_sql("SELECT FIELD ('B', 'A', 'B', 'C') as position", engine)

Unnamed: 0,position
0,2


In [23]:
pd.read_sql('SELECT status, COUNT(*) count FROM orders GROUP BY status', engine)

Unnamed: 0,status,count
0,Shipped,303
1,Resolved,4
2,Cancelled,6
3,On Hold,4
4,Disputed,3
5,In Process,6


In [24]:
string = '''
SELECT
    orderNumber, status
FROM
    orders
ORDER BY FIELD(status,
        'In Process',
        'On Hold',
        'Cancelled',
        'Resolved',
        'Disputed',
        'Shipped');
'''

df1 = pd.read_sql(string, engine)

In [25]:
categories = ['In Process', 'On Hold', 'Cancelled', 'Resolved', 'Disputed', 'Shipped']
df2 = (pd
 .read_sql_table('orders', engine)
 [['orderNumber', 'status']]
 .sort_values(
     by='status',
     kind='mergesoft',
     key=lambda col: pd.Categorical(col, categories=categories, ordered=True), 
     ignore_index=True)
)

In [26]:
df1.equals(df2)

True

In [27]:
df1.head()

Unnamed: 0,orderNumber,status
0,10420,In Process
1,10421,In Process
2,10422,In Process
3,10423,In Process
4,10424,In Process


### ORDER BY and NULL

In [28]:
string = '''
SELECT 
    firstName, lastName, reportsTo
FROM employees
ORDER BY reportsTo, lastName, firstName
;'''

df1 = pd.read_sql(string, engine)

In [29]:
df2 = (pd
 .read_sql_table('employees', engine)
 [['firstName', 'lastName', 'reportsTo']]
 .sort_values(
     by=['reportsTo', 'lastName', 'firstName'],
     ascending=[True, True, True],
     na_position='first',
     ignore_index=True)
)

In [30]:
df1.equals(df2)

True

In [31]:
df1.head()

Unnamed: 0,firstName,lastName,reportsTo
0,Diane,Murphy,
1,Jeff,Firrelli,1002.0
2,Mary,Patterson,1002.0
3,Gerard,Bondur,1056.0
4,Anthony,Bow,1056.0


In [32]:
string = '''
SELECT
    firstName, lastName, reportsTo
FROM
    employees
ORDER BY reportsTo DESC, lastName, firstName;
'''

df1 = pd.read_sql(string, engine)

In [33]:
df2 = (pd
 .read_sql_table('employees', engine)
 [['firstName', 'lastName', 'reportsTo']]
 .sort_values(
     by=['reportsTo', 'lastName', 'firstName'],
     ascending=[False, True, True],
     na_position='last',
     ignore_index=True)
)

In [34]:
df1.equals(df2)

True

In [35]:
df1.tail()

Unnamed: 0,firstName,lastName,reportsTo
18,Mami,Nishi,1056.0
19,William,Patterson,1056.0
20,Jeff,Firrelli,1002.0
21,Mary,Patterson,1002.0
22,Diane,Murphy,
