In [1]:
import numpy as np
import pandas as pd
from numpy.random import rand

In [2]:
np.random.seed(101)

In [3]:
df = pd.DataFrame(rand(5,4), ['A', 'B', 'C', 'D', 'E'], ['W','X','Y','Z'])

In [4]:
df

Unnamed: 0,W,X,Y,Z
A,0.516399,0.570668,0.028474,0.171522
B,0.685277,0.833897,0.306966,0.893613
C,0.721544,0.189939,0.554228,0.352132
D,0.181892,0.785602,0.965483,0.232354
E,0.083561,0.603548,0.728993,0.276239


In [5]:
df['W']

A    0.516399
B    0.685277
C    0.721544
D    0.181892
E    0.083561
Name: W, dtype: float64

In [6]:
df[['W','Y']]

Unnamed: 0,W,Y
A,0.516399,0.028474
B,0.685277,0.306966
C,0.721544,0.554228
D,0.181892,0.965483
E,0.083561,0.728993


In [7]:
df['new'] = df['W'] + df['Z']
df

Unnamed: 0,W,X,Y,Z,new
A,0.516399,0.570668,0.028474,0.171522,0.68792
B,0.685277,0.833897,0.306966,0.893613,1.57889
C,0.721544,0.189939,0.554228,0.352132,1.073676
D,0.181892,0.785602,0.965483,0.232354,0.414246
E,0.083561,0.603548,0.728993,0.276239,0.3598


In [8]:
df.drop('new', axis=1, inplace=True)

In [9]:
df

Unnamed: 0,W,X,Y,Z
A,0.516399,0.570668,0.028474,0.171522
B,0.685277,0.833897,0.306966,0.893613
C,0.721544,0.189939,0.554228,0.352132
D,0.181892,0.785602,0.965483,0.232354
E,0.083561,0.603548,0.728993,0.276239


In [10]:
df.loc[['E','B'],'W']

E    0.083561
B    0.685277
Name: W, dtype: float64

In [11]:
df.iloc[[1,2], [1,3]]

Unnamed: 0,X,Z
B,0.833897,0.893613
C,0.189939,0.352132


In [12]:
df > 0.5

Unnamed: 0,W,X,Y,Z
A,True,True,False,False
B,True,True,False,True
C,True,False,True,False
D,False,True,True,False
E,False,True,True,False


# Short and long way for boolean cols

In [13]:
df[df['W']>0.5][['W','Y']]

Unnamed: 0,W,Y
A,0.516399,0.028474
B,0.685277,0.306966
C,0.721544,0.554228


In [14]:
boolser = df['W'] > 0.5
result = df[boolser]
dfcol = ['W','Y']
result[dfcol]

Unnamed: 0,W,Y
A,0.516399,0.028474
B,0.685277,0.306966
C,0.721544,0.554228


For multiple con conditions "or" = | and "and"= &

In [15]:
df[(df['W']>0.5) & (df['Z']<0.8)]

Unnamed: 0,W,X,Y,Z
A,0.516399,0.570668,0.028474,0.171522
C,0.721544,0.189939,0.554228,0.352132


In [16]:
df[(df['W']>6) | (df['Z']<0.8)]

Unnamed: 0,W,X,Y,Z
A,0.516399,0.570668,0.028474,0.171522
C,0.721544,0.189939,0.554228,0.352132
D,0.181892,0.785602,0.965483,0.232354
E,0.083561,0.603548,0.728993,0.276239


Index= reset_index vs set_index

In [17]:
df.reset_index(inplace=False)

Unnamed: 0,index,W,X,Y,Z
0,A,0.516399,0.570668,0.028474,0.171522
1,B,0.685277,0.833897,0.306966,0.893613
2,C,0.721544,0.189939,0.554228,0.352132
3,D,0.181892,0.785602,0.965483,0.232354
4,E,0.083561,0.603548,0.728993,0.276239


In [18]:
newcol = 'WR CO AN DF TX'.split()
newcol

['WR', 'CO', 'AN', 'DF', 'TX']

In [19]:
df['State'] = newcol

df

Unnamed: 0,W,X,Y,Z,State
A,0.516399,0.570668,0.028474,0.171522,WR
B,0.685277,0.833897,0.306966,0.893613,CO
C,0.721544,0.189939,0.554228,0.352132,AN
D,0.181892,0.785602,0.965483,0.232354,DF
E,0.083561,0.603548,0.728993,0.276239,TX


In [20]:
df.set_index('State', inplace=False)

Unnamed: 0_level_0,W,X,Y,Z
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
WR,0.516399,0.570668,0.028474,0.171522
CO,0.685277,0.833897,0.306966,0.893613
AN,0.721544,0.189939,0.554228,0.352132
DF,0.181892,0.785602,0.965483,0.232354
TX,0.083561,0.603548,0.728993,0.276239


In [21]:
df

Unnamed: 0,W,X,Y,Z,State
A,0.516399,0.570668,0.028474,0.171522,WR
B,0.685277,0.833897,0.306966,0.893613,CO
C,0.721544,0.189939,0.554228,0.352132,AN
D,0.181892,0.785602,0.965483,0.232354,DF
E,0.083561,0.603548,0.728993,0.276239,TX


In [22]:
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [23]:
new_df = pd.DataFrame(rand(6,2), hier_index,['A', 'B'])

In [24]:
new_df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.685306,0.517867
G1,2,0.048485,0.137869
G1,3,0.186967,0.994318
G2,1,0.520665,0.57879
G2,2,0.734819,0.541962
G2,3,0.913154,0.80792


In [25]:
new_df.index.names = ['Grups', 'Nums']

In [26]:
new_df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grups,Nums,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.685306,0.517867
G1,2,0.048485,0.137869
G1,3,0.186967,0.994318
G2,1,0.520665,0.57879
G2,2,0.734819,0.541962
G2,3,0.913154,0.80792


In [27]:
new_df.loc['G2'].loc[3]['B']

0.8079201509879171

In [28]:
new_df.xs(1,level='Nums')

Unnamed: 0_level_0,A,B
Grups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.685306,0.517867
G2,0.520665,0.57879


In [30]:
df.unstack()

W      A     0.516399
       B     0.685277
       C     0.721544
       D     0.181892
       E    0.0835614
X      A     0.570668
       B     0.833897
       C     0.189939
       D     0.785602
       E     0.603548
Y      A    0.0284742
       B     0.306966
       C     0.554228
       D     0.965483
       E     0.728993
Z      A     0.171522
       B     0.893613
       C     0.352132
       D     0.232354
       E     0.276239
State  A           WR
       B           CO
       C           AN
       D           DF
       E           TX
dtype: object