# Topics

# Notes

# Mistakes and Improvements

In [1]:
import pandas as pd
import numpy as np

In [2]:
null_data = np.array([ [1,np.nan,5,6],
                       [2,np.nan,np.nan,7],
                       [2,np.nan,np.nan,np.nan],
                       [2,5,6,9]])
null_data

array([[ 1., nan,  5.,  6.],
       [ 2., nan, nan,  7.],
       [ 2., nan, nan, nan],
       [ 2.,  5.,  6.,  9.]])

In [3]:
null_df = pd.DataFrame(null_data , columns=['A','B','C','D'])
null_df

Unnamed: 0,A,B,C,D
0,1.0,,5.0,6.0
1,2.0,,,7.0
2,2.0,,,
3,2.0,5.0,6.0,9.0


# Filling NaN values
fillna() is for learning purpose. In later stage, we will use more standard (machine learning based) approaches and imputers. 

In [4]:
null_df.fillna(value='hello')

Unnamed: 0,A,B,C,D
0,1.0,hello,5,6
1,2.0,hello,hello,7
2,2.0,hello,hello,hello
3,2.0,5,6,9


In [5]:
null_df.fillna(value=100)

Unnamed: 0,A,B,C,D
0,1.0,100.0,5.0,6.0
1,2.0,100.0,100.0,7.0
2,2.0,100.0,100.0,100.0
3,2.0,5.0,6.0,9.0


In [6]:
null_df

Unnamed: 0,A,B,C,D
0,1.0,,5.0,6.0
1,2.0,,,7.0
2,2.0,,,
3,2.0,5.0,6.0,9.0


In [7]:
# Fill with some mean
null_df.fillna(value=null_df['A'].mean())

Unnamed: 0,A,B,C,D
0,1.0,1.75,5.0,6.0
1,2.0,1.75,1.75,7.0
2,2.0,1.75,1.75,1.75
3,2.0,5.0,6.0,9.0


## Performing .groupby()
.groupby() won't simply give any output. We have to perform some aggregation operation with it like mean(),sum(),describe() , etc.

In [8]:
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
       'Sales':[200,120,340,124,243,350]}
df = pd.DataFrame(data)
df

Unnamed: 0,Company,Person,Sales
0,GOOG,Sam,200
1,GOOG,Charlie,120
2,MSFT,Amy,340
3,MSFT,Vanessa,124
4,FB,Carl,243
5,FB,Sarah,350


In [9]:
df.groupby('Company')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000000000812DB08>

In [10]:
x = df.groupby('Company')
print(x)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000000000818F248>


In [11]:
# Both works fine 
# df.groupby('Company').mean()
x.mean()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,296.5
GOOG,160.0
MSFT,232.0


In [12]:
x.sum()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,593
GOOG,320
MSFT,464


In [13]:
x.describe()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0
GOOG,2.0,160.0,56.568542,120.0,140.0,160.0,180.0,200.0
MSFT,2.0,232.0,152.735065,124.0,178.0,232.0,286.0,340.0


In [14]:
x.describe().loc['FB']

Sales  count      2.000000
       mean     296.500000
       std       75.660426
       min      243.000000
       25%      269.750000
       50%      296.500000
       75%      323.250000
       max      350.000000
Name: FB, dtype: float64

In [15]:
y = x.describe().loc['FB']
y

Sales  count      2.000000
       mean     296.500000
       std       75.660426
       min      243.000000
       25%      269.750000
       50%      296.500000
       75%      323.250000
       max      350.000000
Name: FB, dtype: float64

In [16]:
print(pd.DataFrame(y).transpose())

   Sales                                                       
   count   mean        std    min     25%    50%     75%    max
FB   2.0  296.5  75.660426  243.0  269.75  296.5  323.25  350.0


In [17]:
# We can directly do as well
print(pd.DataFrame(df.groupby('Company').describe().loc['FB']).transpose())


   Sales                                                       
   count   mean        std    min     25%    50%     75%    max
FB   2.0  296.5  75.660426  243.0  269.75  296.5  323.25  350.0


In [18]:
x.describe()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0
GOOG,2.0,160.0,56.568542,120.0,140.0,160.0,180.0,200.0
MSFT,2.0,232.0,152.735065,124.0,178.0,232.0,286.0,340.0


In [19]:
x.describe().iloc[[1,2],[1,2]]

Unnamed: 0_level_0,Sales,Sales
Unnamed: 0_level_1,mean,std
Company,Unnamed: 1_level_2,Unnamed: 2_level_2
GOOG,160.0,56.568542
MSFT,232.0,152.735065


In [20]:
# This extra square bracket will give error   x.describe().iloc[[1:3],[1:3]]
# Below works good
x.describe().iloc[1:3,1:3]

Unnamed: 0_level_0,Sales,Sales
Unnamed: 0_level_1,mean,std
Company,Unnamed: 1_level_2,Unnamed: 2_level_2
GOOG,160.0,56.568542
MSFT,232.0,152.735065


In [21]:
#  Why problem here ?
x.describe().loc[['GOOG','MSFT'],['mean','std']]

Company
GOOG
MSFT


In [22]:
# Here we have column inside column...that's why above was not working.....so extra ['Sales'] has to be written to access that column
x.describe()['Sales'].loc[['GOOG','MSFT'],['mean','std']]

Unnamed: 0_level_0,mean,std
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
GOOG,160.0,56.568542
MSFT,232.0,152.735065


In [23]:
# Help to understand above
x.describe()['Sales']

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0
GOOG,2.0,160.0,56.568542,120.0,140.0,160.0,180.0,200.0
MSFT,2.0,232.0,152.735065,124.0,178.0,232.0,286.0,340.0


In [24]:
# Directly this also possible...but x is more comfortable
df.groupby('Company').describe().iloc[[1,2],[1,2]]

Unnamed: 0_level_0,Sales,Sales
Unnamed: 0_level_1,mean,std
Company,Unnamed: 1_level_2,Unnamed: 2_level_2
GOOG,160.0,56.568542
MSFT,232.0,152.735065


## Transposing DataFrame

In [25]:
d = df.groupby('Company').describe()

In [26]:
type(d)

pandas.core.frame.DataFrame

In [27]:
# Use transpose method to transpose DataFrame
d.transpose()

Unnamed: 0,Company,FB,GOOG,MSFT
Sales,count,2.0,2.0,2.0
Sales,mean,296.5,160.0,232.0
Sales,std,75.660426,56.568542,152.735065
Sales,min,243.0,120.0,124.0
Sales,25%,269.75,140.0,178.0
Sales,50%,296.5,160.0,232.0
Sales,75%,323.25,180.0,286.0
Sales,max,350.0,200.0,340.0


In [28]:
d1 = d.transpose()
d1

Unnamed: 0,Company,FB,GOOG,MSFT
Sales,count,2.0,2.0,2.0
Sales,mean,296.5,160.0,232.0
Sales,std,75.660426,56.568542,152.735065
Sales,min,243.0,120.0,124.0
Sales,25%,269.75,140.0,178.0
Sales,50%,296.5,160.0,232.0
Sales,75%,323.25,180.0,286.0
Sales,max,350.0,200.0,340.0


In [29]:
d1.loc['Sales']

Company,FB,GOOG,MSFT
count,2.0,2.0,2.0
mean,296.5,160.0,232.0
std,75.660426,56.568542,152.735065
min,243.0,120.0,124.0
25%,269.75,140.0,178.0
50%,296.5,160.0,232.0
75%,323.25,180.0,286.0
max,350.0,200.0,340.0


In [30]:
# d.T   is equivalent to using d.transpose()
d.T

Unnamed: 0,Company,FB,GOOG,MSFT
Sales,count,2.0,2.0,2.0
Sales,mean,296.5,160.0,232.0
Sales,std,75.660426,56.568542,152.735065
Sales,min,243.0,120.0,124.0
Sales,25%,269.75,140.0,178.0
Sales,50%,296.5,160.0,232.0
Sales,75%,323.25,180.0,286.0
Sales,max,350.0,200.0,340.0


In [31]:
d

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0
GOOG,2.0,160.0,56.568542,120.0,140.0,160.0,180.0,200.0
MSFT,2.0,232.0,152.735065,124.0,178.0,232.0,286.0,340.0


In [32]:
d.loc['GOOG']['Sales']['std']

56.568542494923804

# Creating three DataFrames with Dictionary (really fast way to create DataFrames) and Merging three DataFrames

In [33]:
df1 = pd.DataFrame( { 'A':['A0','A1','A2','A3'],
                      'B':['B0','B1','B2','B3'],
                      'C':['C0','C1','C2','C3'],
                      'D':['D0','D1','D2','D3']}
                  , index=[0,1,2,3])
df1

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [34]:
df2 = pd.DataFrame( { 'A':['A4','A5','A6','A7'],
                      'B':['B4','B5','B6','B7'],
                      'C':['C4','C5','C6','C7'],
                      'D':['D4','D5','D6','D7']}
                  , index=[4,5,6,7])
df2

Unnamed: 0,A,B,C,D
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [35]:
df3 = pd.DataFrame( { 'A':['A8','A9','A10','A11'],
                      'B':['B8','B9','B10','B11'],
                      'C':['C8','C9','C10','C11'],
                      'D':['D8','D9','D10','D11']}
                  , index=[8,9,10,11])
df3

Unnamed: 0,A,B,C,D
8,A8,B8,C8,D8
9,A9,B9,C9,D9
10,A10,B10,C10,D10
11,A11,B11,C11,D11


In [36]:
df_cnt = pd.concat( [df1,df2,df3] , axis=0 )
df_cnt

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B9,C9,D9


In [37]:
df_cnt2 = pd.concat( [df1,df2,df3] , axis=1 )
df_cnt2

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1,A.2,B.2,C.2,D.2
0,A0,B0,C0,D0,,,,,,,,
1,A1,B1,C1,D1,,,,,,,,
2,A2,B2,C2,D2,,,,,,,,
3,A3,B3,C3,D3,,,,,,,,
4,,,,,A4,B4,C4,D4,,,,
5,,,,,A5,B5,C5,D5,,,,
6,,,,,A6,B6,C6,D6,,,,
7,,,,,A7,B7,C7,D7,,,,
8,,,,,,,,,A8,B8,C8,D8
9,,,,,,,,,A9,B9,C9,D9


In [38]:
df_cnt3 = pd.concat( [df1,df2,df3] , axis=0 )
print('The concatinated DataFrame is below','\n' + '-'*35 )
print(df_cnt3)
print('\n' + '-'*35)
print(df_cnt3.loc[[8,9,10]])
print('\n' + '-'*35)
print(df_cnt3.loc[8:11])

The concatinated DataFrame is below 
-----------------------------------
      A    B    C    D
0    A0   B0   C0   D0
1    A1   B1   C1   D1
2    A2   B2   C2   D2
3    A3   B3   C3   D3
4    A4   B4   C4   D4
5    A5   B5   C5   D5
6    A6   B6   C6   D6
7    A7   B7   C7   D7
8    A8   B8   C8   D8
9    A9   B9   C9   D9
10  A10  B10  C10  D10
11  A11  B11  C11  D11

-----------------------------------
      A    B    C    D
8    A8   B8   C8   D8
9    A9   B9   C9   D9
10  A10  B10  C10  D10

-----------------------------------
      A    B    C    D
8    A8   B8   C8   D8
9    A9   B9   C9   D9
10  A10  B10  C10  D10
11  A11  B11  C11  D11


In [39]:
# with iloc we can individually get any data from 0 to 11 .... and 0,1,2,3,0,1,2,3,0,1,2,3 is name that we gave and can access it using loc
df_cnt4 = pd.concat( [df1,df1,df1] , axis=0 )
df_cnt4

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A0,B0,C0,D0
1,A1,B1,C1,D1


In [40]:
df_cnt4.loc[3]

Unnamed: 0,A,B,C,D
3,A3,B3,C3,D3
3,A3,B3,C3,D3
3,A3,B3,C3,D3


In [41]:
df_cnt4.iloc[3]

A    A3
B    B3
C    C3
D    D3
Name: 3, dtype: object

In [42]:
df_cnt4.iloc[5]

A    A1
B    B1
C    C1
D    D1
Name: 1, dtype: object

## Performing .merge based on keys

In [43]:
left = pd.DataFrame({ 'key':['K0','K8','K2','K3'],
                       'A':['A0','A1','A2','A3'],
                       'B':['B0','B1','B2','B3']})

right = pd.DataFrame({ 'key':['K0','K1','K2','K3'],
                       'C':['C0','C1','C2','C3'],
                       'D':['D0','D1','D2','D3']})

In [44]:
left

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K8,A1,B1
2,K2,A2,B2
3,K3,A3,B3


In [45]:
right

Unnamed: 0,key,C,D
0,K0,C0,D0
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3


In [46]:
# jya key same hase tya merge karse
# here the key in column is same
merge1 = pd.merge(left,right, how='inner' , on = 'key')
merge1

Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K2,A2,B2,C2,D2
2,K3,A3,B3,C3,D3


In [47]:
# when the key in column is not same...like key1 and key2 here
m1 = pd.DataFrame({ 'key1':['K0','K8','K2','K3'],
                       'A':['A0','A1','A2','A3'],
                       'B':['B0','B1','B2','B3']})

m2 = pd.DataFrame({ 'key2':['K0','K1','K2','K3'],
                       'C':['C0','C1','C2','C3'],
                       'D':['D0','D1','D2','D3']})

In [48]:
m1

Unnamed: 0,key1,A,B
0,K0,A0,B0
1,K8,A1,B1
2,K2,A2,B2
3,K3,A3,B3


In [49]:
m2

Unnamed: 0,key2,C,D
0,K0,C0,D0
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3


In [50]:
merge2 = pd.merge(m1,m2, how='inner' , left_on = 'key1', right_on='key2')
merge2

Unnamed: 0,key1,A,B,key2,C,D
0,K0,A0,B0,K0,C0,D0
1,K2,A2,B2,K2,C2,D2
2,K3,A3,B3,K3,C3,D3


In [51]:
# DataFrame and key ulti pulti pass kari etle error
merge3 = pd.merge(m1,m2, how='inner' , left_on = 'key2', right_on='key1')
merge3

KeyError: 'key1'

In [52]:
merge4 = pd.merge(m2,m1, how='inner' , left_on = 'key2', right_on='key1')
merge4

Unnamed: 0,key2,C,D,key1,A,B
0,K0,C0,D0,K0,A0,B0
1,K2,C2,D2,K2,A2,B2
2,K3,C3,D3,K3,A3,B3


In [53]:
merge5 = pd.merge(m2,m1, how='outer' , left_on = 'key2', right_on='key1')
merge5

Unnamed: 0,key2,C,D,key1,A,B
0,K0,C0,D0,K0,A0,B0
1,K1,C1,D1,,,
2,K2,C2,D2,K2,A2,B2
3,K3,C3,D3,K3,A3,B3
4,,,,K8,A1,B1


In [54]:
merge6 = pd.merge(m2,m1, how='left' , left_on = 'key2', right_on='key1')
merge6

Unnamed: 0,key2,C,D,key1,A,B
0,K0,C0,D0,K0,A0,B0
1,K1,C1,D1,,,
2,K2,C2,D2,K2,A2,B2
3,K3,C3,D3,K3,A3,B3


In [55]:
merge7 = pd.merge(m2,m1, how='right' , left_on = 'key2', right_on='key1')
merge7

Unnamed: 0,key2,C,D,key1,A,B
0,K0,C0,D0,K0,A0,B0
1,K2,C2,D2,K2,A2,B2
2,K3,C3,D3,K3,A3,B3
3,,,,K8,A1,B1


## .join is close to .merge operation
.join performs merge based on indexes, other than that there is no difference between .merge() and .join()

In [56]:
left = pd.DataFrame({'A':['A0','A1','A2'],
                     'B':['B0','B1','B2']},
                    index = ['K0','K1','K2'])

right = pd.DataFrame({'C':['C0','C2','C3'],
                      'D':['D0','D2','D3']},
                     index = ['K0','K2','K3'])

In [57]:
left

Unnamed: 0,A,B
K0,A0,B0
K1,A1,B1
K2,A2,B2


In [58]:
right

Unnamed: 0,C,D
K0,C0,D0
K2,C2,D2
K3,C3,D3


In [59]:
left.join(right)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


In [60]:
right.join(left)

Unnamed: 0,C,D,A,B
K0,C0,D0,A0,B0
K2,C2,D2,A2,B2
K3,C3,D3,,


## Perform some operation on column and store it in another column...Also use lambda
like currency conversion,etc.

In [61]:
df = pd.DataFrame({'col1':[1,2,3,4,5,6,7,8,9,10],
                   'col2':[444,555,666,444,333,222,666,777,666,555],
                   'col3':'aaa bb c dd eeee fff gg h iii j'.split()})
df

Unnamed: 0,col1,col2,col3
0,1,444,aaa
1,2,555,bb
2,3,666,c
3,4,444,dd
4,5,333,eeee
5,6,222,fff
6,7,666,gg
7,8,777,h
8,9,666,iii
9,10,555,j


In [62]:
# create col10 and multiply data from col1 to 10 and store it in col10 
df['col10'] = df['col1']*10
df

Unnamed: 0,col1,col2,col3,col10
0,1,444,aaa,10
1,2,555,bb,20
2,3,666,c,30
3,4,444,dd,40
4,5,333,eeee,50
5,6,222,fff,60
6,7,666,gg,70
7,8,777,h,80
8,9,666,iii,90
9,10,555,j,100


### Use .apply() to apply some function

In [63]:
def test(x):
    if(x>500):
        return (10 * np.log10(x))
    else :
        return (x)

In [64]:
# If value in col2 is greater than 500 than do log and multiply by 10, otherwise keep it as it is
df['col2'].apply(test)

0    444.000000
1     27.442930
2     28.234742
3    444.000000
4    333.000000
5    222.000000
6     28.234742
7     28.904210
8     28.234742
9     27.442930
Name: col2, dtype: float64

In [65]:
# Stored it permanantly
df['col20'] = df['col2'].apply(test)
df

Unnamed: 0,col1,col2,col3,col10,col20
0,1,444,aaa,10,444.0
1,2,555,bb,20,27.44293
2,3,666,c,30,28.234742
3,4,444,dd,40,444.0
4,5,333,eeee,50,333.0
5,6,222,fff,60,222.0
6,7,666,gg,70,28.234742
7,8,777,h,80,28.90421
8,9,666,iii,90,28.234742
9,10,555,j,100,27.44293


### Using apply and lambda to perform operation on column data

In [66]:
df['col30'] = df['col1'].apply(lambda x : np.sqrt(x))
df

Unnamed: 0,col1,col2,col3,col10,col20,col30
0,1,444,aaa,10,444.0,1.0
1,2,555,bb,20,27.44293,1.414214
2,3,666,c,30,28.234742,1.732051
3,4,444,dd,40,444.0,2.0
4,5,333,eeee,50,333.0,2.236068
5,6,222,fff,60,222.0,2.44949
6,7,666,gg,70,28.234742,2.645751
7,8,777,h,80,28.90421,2.828427
8,9,666,iii,90,28.234742,3.0
9,10,555,j,100,27.44293,3.162278


In [67]:
df['col3 lenght'] = df['col3'].apply(len)
df

Unnamed: 0,col1,col2,col3,col10,col20,col30,col3 lenght
0,1,444,aaa,10,444.0,1.0,3
1,2,555,bb,20,27.44293,1.414214,2
2,3,666,c,30,28.234742,1.732051,1
3,4,444,dd,40,444.0,2.0,2
4,5,333,eeee,50,333.0,2.236068,4
5,6,222,fff,60,222.0,2.44949,3
6,7,666,gg,70,28.234742,2.645751,2
7,8,777,h,80,28.90421,2.828427,1
8,9,666,iii,90,28.234742,3.0,3
9,10,555,j,100,27.44293,3.162278,1


## Some operatoins available by default with column

In [68]:
print("Sum is :" , df['col2'].sum())

Sum is : 5328


In [69]:
print("Mean is :" , df['col2'].mean())

Mean is : 532.8


In [70]:
print("Standard Deviation is :" , df['col2'].std())

Standard Deviation is : 171.96046057160933


In [71]:
print("Max is :" , df['col2'].max())

Max is : 777


## Sorting column

In [72]:
# inplace=False by default
df.sort_values(by='col2')

Unnamed: 0,col1,col2,col3,col10,col20,col30,col3 lenght
5,6,222,fff,60,222.0,2.44949,3
4,5,333,eeee,50,333.0,2.236068,4
0,1,444,aaa,10,444.0,1.0,3
3,4,444,dd,40,444.0,2.0,2
1,2,555,bb,20,27.44293,1.414214,2
9,10,555,j,100,27.44293,3.162278,1
2,3,666,c,30,28.234742,1.732051,1
6,7,666,gg,70,28.234742,2.645751,2
8,9,666,iii,90,28.234742,3.0,3
7,8,777,h,80,28.90421,2.828427,1


In [73]:
# By default ascening
df.sort_values(by='col3')

Unnamed: 0,col1,col2,col3,col10,col20,col30,col3 lenght
0,1,444,aaa,10,444.0,1.0,3
1,2,555,bb,20,27.44293,1.414214,2
2,3,666,c,30,28.234742,1.732051,1
3,4,444,dd,40,444.0,2.0,2
4,5,333,eeee,50,333.0,2.236068,4
5,6,222,fff,60,222.0,2.44949,3
6,7,666,gg,70,28.234742,2.645751,2
7,8,777,h,80,28.90421,2.828427,1
8,9,666,iii,90,28.234742,3.0,3
9,10,555,j,100,27.44293,3.162278,1


In [74]:
# For descending...do ascending=False
df.sort_values(by='col3' , ascending=False)

Unnamed: 0,col1,col2,col3,col10,col20,col30,col3 lenght
9,10,555,j,100,27.44293,3.162278,1
8,9,666,iii,90,28.234742,3.0,3
7,8,777,h,80,28.90421,2.828427,1
6,7,666,gg,70,28.234742,2.645751,2
5,6,222,fff,60,222.0,2.44949,3
4,5,333,eeee,50,333.0,2.236068,4
3,4,444,dd,40,444.0,2.0,2
2,3,666,c,30,28.234742,1.732051,1
1,2,555,bb,20,27.44293,1.414214,2
0,1,444,aaa,10,444.0,1.0,3
