In [52]:
# Sorting and Ranking

import numpy as np
import pandas as pd
import matplotlib as plt

obj = pd.Series(np.arange(4), index=['d','a','b','c'])

obj 

d    0
a    1
b    2
c    3
dtype: int64

In [53]:
obj.sort_index()


a    1
b    2
c    3
d    0
dtype: int64

In [54]:
frame = pd.DataFrame(np.arange(8).reshape((2,4)),
                    index = ['mondstadt','inazuma'],
                    columns = ['d', 'a','b','c'])

frame 

Unnamed: 0,d,a,b,c
mondstadt,0,1,2,3
inazuma,4,5,6,7


In [55]:
frame.sort_index()
# sort by index on either axis

Unnamed: 0,d,a,b,c
inazuma,4,5,6,7
mondstadt,0,1,2,3


In [56]:
frame.sort_index(axis="columns")


Unnamed: 0,a,b,c,d
mondstadt,1,2,3,0
inazuma,5,6,7,4


In [57]:
frame.sort_index(axis="columns", ascending=False)
# sorted in descending order

Unnamed: 0,d,c,b,a
mondstadt,0,3,2,1
inazuma,4,7,6,5


In [58]:
obj = pd.Series([4,7,-3,2])
obj.sort_values() # a Series by its values

2   -3
3    2
0    4
1    7
dtype: int64

In [59]:
obj = pd.Series([4, np.nan, 7, np.nan, -3, 2])
obj.sort_values()
# any missing values are sorted to the end of the series

4   -3.0
5    2.0
0    4.0
2    7.0
1    NaN
3    NaN
dtype: float64

In [60]:
obj.sort_values(na_position="first")
# missing values sorted tothe start instead by 
# using the "na_position" properties

1    NaN
3    NaN
4   -3.0
5    2.0
0    4.0
2    7.0
dtype: float64

In [61]:
frame = pd.DataFrame({"b":[4,7,-3,2], "a":[0,1,0,1]})

frame

# using the data in one or more columns as the sort keys

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [62]:
frame.sort_values("b")

Unnamed: 0,b,a
2,-3,0
3,2,1
0,4,0
1,7,1


In [63]:
frame.sort_values(["a","b"])
# sorted by multiple columns, pass a list of names

Unnamed: 0,b,a
2,-3,0
0,4,0
3,2,1
1,7,1


In [64]:
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])

obj.rank()

# ranking assigns rank from one through the number of valid
# data points in an array.

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [65]:
obj.rank(method="first")

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

In [66]:
obj.rank(ascending=False)

0    1.5
1    7.0
2    1.5
3    3.5
4    5.0
5    6.0
6    3.5
dtype: float64

In [67]:
obj.rank(method="average")

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [68]:
frame = pd.DataFrame({"b": [4.3, 7, -3, 1], "a":[0,1,0,1],
                      "c": [-2,5,8,-2.5]}, 
                      index=("mondstadt-0", "liyue-1", "inazuma-2", "sumeru-3"))

frame

# DataFrame can compute ranks iver the rows or columns

Unnamed: 0,b,a,c
mondstadt-0,4.3,0,-2.0
liyue-1,7.0,1,5.0
inazuma-2,-3.0,0,8.0
sumeru-3,1.0,1,-2.5


In [69]:
frame.rank(axis="columns")

Unnamed: 0,b,a,c
mondstadt-0,3.0,2.0,1.0
liyue-1,3.0,1.0,2.0
inazuma-2,1.0,2.0,3.0
sumeru-3,2.5,2.5,1.0


In [None]:
# Describing some method
# "average" - Default: assign the average rank to each entry in 
#  the equal group

# "min" - Use the minimum rank for the whole group
# "max" - Use the maximum rank for the whole group
# "first" - Assign ranks in the order the values 
# appear in the data

# "dense" - Like method="min", but ranks always increase by 1
# between groups rather than the number of equal 
# elements in a group


In [70]:
# Axis Indexes with Duplicate Labels

obj = pd.Series(np.arange(5), index=["a","a","b","b","c"])

obj

a    0
a    1
b    2
b    3
c    4
dtype: int64

In [71]:
obj.index.is_unique

False

In [72]:
obj["a"], obj["c"]


(a    0
 a    1
 dtype: int64,
 np.int64(4))

In [73]:
df = pd.DataFrame(np.random.standard_normal((5,3)),
                  index=["a","a","b","b","c"])

df

Unnamed: 0,0,1,2
a,1.33236,-0.030931,-0.654003
a,-0.088436,-0.087337,-0.587778
b,-0.865072,0.06297,-0.287504
b,1.736459,0.279659,-0.604346
c,0.016955,0.873647,-0.632535


In [74]:
df.loc["b"]

Unnamed: 0,0,1,2
b,-0.865072,0.06297,-0.287504
b,1.736459,0.279659,-0.604346


In [75]:
df.loc["c"]

0    0.016955
1    0.873647
2   -0.632535
Name: c, dtype: float64