In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.seed(42)  # For reproducibility
df=pd.DataFrame(data=np.random.normal(loc=100,scale=50,size=(8,2)),
                columns=['parks','schools'],
                index=['San Francisco','Los Angeles','San Diego',
                        'Sacramento','San Jose','Fresno','Long Beach','Oakland']
                )

df=df.astype(int)
df

Unnamed: 0,parks,schools
San Francisco,124,93
Los Angeles,132,176
San Diego,88,88
Sacramento,178,138
San Jose,76,127
Fresno,76,76
Long Beach,112,4
Oakland,13,71


In [5]:
df_1=df.copy()
df_1['rank_parks']=df_1['parks'].rank(ascending=False)
df_1

Unnamed: 0,parks,schools,rank_parks
San Francisco,124,93,3.0
Los Angeles,132,176,2.0
San Diego,88,88,5.0
Sacramento,178,138,1.0
San Jose,76,127,6.5
Fresno,76,76,6.5
Long Beach,112,4,4.0
Oakland,13,71,8.0


In [4]:
df_2=df.copy()
df_2.rank()

Unnamed: 0,parks,schools
San Francisco,6.0,5.0
Los Angeles,7.0,8.0
San Diego,4.0,4.0
Sacramento,8.0,7.0
San Jose,2.5,6.0
Fresno,2.5,3.0
Long Beach,5.0,1.0
Oakland,1.0,2.0


In [7]:
df_3=pd.DataFrame([1,2,3,4,5,3,5,6,7,7,9],columns=['sample']).sort_values(by='sample')
df_3

Unnamed: 0,sample
0,1
1,2
2,3
5,3
3,4
4,5
6,5
7,6
8,7
9,7


In [8]:
df_3['average_rank']=df_3['sample'].rank(method='average')
df_3['min_rank']=df_3['sample'].rank(method='min')
df_3['max_rank']=df_3['sample'].rank(method='max')
df_3['dense_rank']=df_3['sample'].rank(method='dense')
df_3['first_rank']=df_3['sample'].rank(method='first')
df_3

Unnamed: 0,sample,average_rank,min_rank,max_rank,dense_rank,first_rank
0,1,1.0,1.0,1.0,1.0,1.0
1,2,2.0,2.0,2.0,2.0,2.0
2,3,3.5,3.0,4.0,3.0,3.0
5,3,3.5,3.0,4.0,3.0,4.0
3,4,5.0,5.0,5.0,4.0,5.0
4,5,6.5,6.0,7.0,5.0,6.0
6,5,6.5,6.0,7.0,5.0,7.0
7,6,8.0,8.0,8.0,6.0,8.0
8,7,9.5,9.0,10.0,7.0,9.0
9,7,9.5,9.0,10.0,7.0,10.0


In [9]:
df_4=df.copy()
df_4['park_rank']=df_4['parks'].rank(ascending=False, pct=True)
df_4

Unnamed: 0,parks,schools,park_rank
San Francisco,124,93,0.375
Los Angeles,132,176,0.25
San Diego,88,88,0.625
Sacramento,178,138,0.125
San Jose,76,127,0.8125
Fresno,76,76,0.8125
Long Beach,112,4,0.5
Oakland,13,71,1.0


In [11]:
df_5=pd.DataFrame([('Foreign Cinema', 'Restaurant', 228),
('Tacko', 'Restaurant', 87.0),
('The Mission', 'Neighborhood', 223.8),
('Dolores Park', 'Park', 45.9),
('Chambers', 'Bar', 67.0),
('The Castro', 'Neighborhood', 150.0),
('The Painted Ladies', 'Bar', 100.0),
('Golden Gate Park', 'Park', 200.0)],
columns=('name', 'type', 'avg bill'))
df_5

Unnamed: 0,name,type,avg bill
0,Foreign Cinema,Restaurant,228.0
1,Tacko,Restaurant,87.0
2,The Mission,Neighborhood,223.8
3,Dolores Park,Park,45.9
4,Chambers,Bar,67.0
5,The Castro,Neighborhood,150.0
6,The Painted Ladies,Bar,100.0
7,Golden Gate Park,Park,200.0


In [14]:
df_5['sub_group_rank']=df_5.groupby('type')['avg bill'].rank(ascending=False)
df_5

Unnamed: 0,name,type,avg bill,sub_group_rank
0,Foreign Cinema,Restaurant,228.0,1.0
1,Tacko,Restaurant,87.0,2.0
2,The Mission,Neighborhood,223.8,1.0
3,Dolores Park,Park,45.9,2.0
4,Chambers,Bar,67.0,2.0
5,The Castro,Neighborhood,150.0,2.0
6,The Painted Ladies,Bar,100.0,1.0
7,Golden Gate Park,Park,200.0,1.0


In [26]:
df6=pd.DataFrame({'key': ['A', 'B', 'C', 'A', 'B', 'C'],
                  'data1': [0, 1, 2, 3, 4, 5],
                  'data2': [5, 0, 3, 3, 7, 9]},columns=['key', 'data1', 'data2'])
df6.shape

(6, 3)

In [27]:
df6.head(6)

Unnamed: 0,key,data1,data2
0,A,0,5
1,B,1,0
2,C,2,3
3,A,3,3
4,B,4,7
5,C,5,9


In [28]:
l=[0,1,0,1,2,0]
df6.groupby(l).sum()

Unnamed: 0,key,data1,data2
0,ACC,7,17
1,BA,4,3
2,B,4,7


In [29]:
df6.groupby('key').transform(lambda x: x-x.mean())

Unnamed: 0,data1,data2
0,-1.5,1.0
1,-1.5,-3.5
2,-1.5,-3.0
3,1.5,-1.0
4,1.5,3.5
5,1.5,3.0


In [30]:
def norm_by_data2(x):
    x['data1'] = x['data1'] / x['data2'].sum()
    return x
df6.groupby('key').apply(norm_by_data2)

  df6.groupby('key').apply(norm_by_data2)


Unnamed: 0_level_0,Unnamed: 1_level_0,key,data1,data2
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,0,A,0.0,5
A,3,A,0.375,3
B,1,B,0.142857,0
B,4,B,0.571429,7
C,2,C,0.166667,3
C,5,C,0.416667,9
