In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import datetime
from io import StringIO

In [4]:
header = {'User-Agent': 'pandas'}
df = pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item', sep='\t', storage_options=header)

In [5]:
df

Unnamed: 0,item_code,item_name,display_level,selectable,sort_sequence
0,AA0,All items - old base,0,T,2
1,AA0R,Purchasing power of the consumer dollar - old ...,0,T,399
2,SA0,All items,0,T,1
3,SA0E,Energy,1,T,374
4,SA0L1,All items less food,1,T,358
...,...,...,...,...,...
394,SS68023,Tax return preparation and other accounting fees,4,T,352
395,SSEA011,College textbooks,3,T,314
396,SSFV031A,Food at elementary and secondary schools,3,T,122
397,SSGE013,Infants' equipment,3,T,355


In [12]:
dates = pd.date_range('1/1/2000', periods=8)
df = pd.DataFrame(np.random.randn(8, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2000-01-01,0.123576,0.367228,-2.604112,0.984284
2000-01-02,0.300501,-0.525221,-0.327388,0.538184
2000-01-03,-0.075298,-0.929728,-0.425729,-1.209097
2000-01-04,-0.908324,-0.677347,-0.029724,0.877145
2000-01-05,-0.01178,-1.051432,-0.222252,0.511529
2000-01-06,-0.506942,0.254,0.751907,-0.333903
2000-01-07,0.676016,0.580514,-0.332537,-0.643218
2000-01-08,-1.068924,0.352037,-2.533448,0.919841


In [13]:
df.loc[:, ['A', 'B']] = df[['B', 'A']]
df

Unnamed: 0,A,B,C,D
2000-01-01,0.123576,0.367228,-2.604112,0.984284
2000-01-02,0.300501,-0.525221,-0.327388,0.538184
2000-01-03,-0.075298,-0.929728,-0.425729,-1.209097
2000-01-04,-0.908324,-0.677347,-0.029724,0.877145
2000-01-05,-0.01178,-1.051432,-0.222252,0.511529
2000-01-06,-0.506942,0.254,0.751907,-0.333903
2000-01-07,0.676016,0.580514,-0.332537,-0.643218
2000-01-08,-1.068924,0.352037,-2.533448,0.919841


In [16]:
df.loc[:, ['B', 'A']] = df[['A', 'B']].to_numpy()
df

Unnamed: 0,A,B,C,D
2000-01-01,0.367228,0.123576,-2.604112,0.984284
2000-01-02,-0.525221,0.300501,-0.327388,0.538184
2000-01-03,-0.929728,-0.075298,-0.425729,-1.209097
2000-01-04,-0.677347,-0.908324,-0.029724,0.877145
2000-01-05,-1.051432,-0.01178,-0.222252,0.511529
2000-01-06,0.254,-0.506942,0.751907,-0.333903
2000-01-07,0.580514,0.676016,-0.332537,-0.643218
2000-01-08,0.352037,-1.068924,-2.533448,0.919841


In [49]:
x = pd.DataFrame({'x': [1,2,3], 'y': [4,5,6]})
x.iloc[1] = {'x': 5, 'y': 9}

In [56]:
x.iloc[:2, :2]

Unnamed: 0,x,y
0,1,4
1,5,9


In [36]:
df = pd.DataFrame({'x': [1,2,3], 'y': [4,5,6]}, index=np.arange(len(x)))
df

Unnamed: 0,x,y
0,1,4
1,2,5
2,3,6


In [40]:
df.index = pd.date_range('20010101', periods=len(x), name='order')

In [41]:
df

Unnamed: 0_level_0,x,y
order,Unnamed: 1_level_1,Unnamed: 2_level_1
2001-01-01,1,4
2001-01-02,2,5
2001-01-03,3,6


In [48]:
df.reset_index().loc[1]

order    2001-01-02 00:00:00
x                          2
y                          5
Name: 1, dtype: object

In [57]:
s1 = pd.Series(np.random.randn(6), index=list('abcdef'))
s1

a    0.156576
b   -1.229621
c   -1.247279
d   -1.347905
e   -1.334659
f   -0.973025
dtype: float64

In [59]:
s1.loc['c'] = 0
s1

a    0.156576
b   -1.229621
c    0.000000
d   -1.347905
e   -1.334659
f   -0.973025
dtype: float64

In [60]:
s1 = pd.Series(np.random.randn(10))
s1.loc[1]

-1.237066112602434

In [62]:
df1 = pd.DataFrame(np.random.randn(6, 4), index=list('abcdef'), columns=list('ABCD'))

In [64]:
df1.loc['d', 'A':'C']

A   -0.457139
B    0.540068
C    0.177437
Name: d, dtype: float64

In [72]:
df1[['B','A']]

Unnamed: 0,B,A
a,-0.472267,0.003782
b,1.539631,-0.354375
c,-0.23442,0.462662
d,0.540068,-0.457139
e,0.012027,0.48973
f,-0.779911,0.867359


In [73]:
s = pd.Series(list('abcde'), index=[0,3,2,5,4])
s.loc[:3]

0    a
3    b
dtype: object

In [74]:
s.loc[:5]

0    a
3    b
2    c
5    d
dtype: object

In [2]:
df1 = pd.DataFrame(np.random.randn(6,4), index=list('abcdef'), columns=list('ABCD'))
df1

Unnamed: 0,A,B,C,D
a,0.233969,-2.184527,0.936335,-0.742973
b,-0.669184,-0.347522,-0.049198,-0.948185
c,1.717791,1.110352,0.143623,1.733148
d,-0.526939,-1.12407,-0.900471,-0.135986
e,1.595212,0.51558,0.303817,-0.338451
f,-1.48757,0.516394,1.997309,0.009887


In [7]:
df1.columns[0:3]

Index(['A', 'B', 'C'], dtype='object')

In [8]:
df1['A'].loc[lambda x: x>0]

a    0.233969
c    1.717791
e    1.595212
Name: A, dtype: float64

In [22]:
# df = pd.read_csv('pd_methods.csv', index_col=0)
df.index.rename('Stt', inplace=True)

In [30]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=list('abc'))
# df.loc[df.iloc[[0, 2]], 'A']
df.index[[0, 2]]

Index(['a', 'c'], dtype='object')

In [32]:
df.loc[df.index[[0,2]], 'A']

a    1
c    3
Name: A, dtype: int64

In [46]:
df

Unnamed: 0,A,B
a,1,4
b,2,5
c,3,6


In [41]:
df.index[[0, 2]]

Index(['a', 'c'], dtype='object')

In [47]:
df.columns[[0, 1]]

Index(['A', 'B'], dtype='object')

In [59]:
df.columns.get_loc('A')

0

In [60]:
df.index.get_loc('a')

0

In [61]:
df.index.get_indexer(['a', 'b', 'c'])

array([0, 1, 2], dtype=int64)

In [62]:
df.columns.get_indexer(['A', 'B'])

array([0, 1], dtype=int64)

In [63]:
s = pd.Series(np.arange(6))
s.sample()

1    1
dtype: int32

In [64]:
s.sample(n=3)

2    2
0    0
3    3
dtype: int32

In [65]:
s.sample(frac=0.5)

0    0
4    4
2    2
dtype: int32

In [71]:
s.sample(n=3, weights=[0.4, 0.4, 0.4, 0.4, 0.4, 0.4])

0    0
3    3
4    4
dtype: int32

In [73]:
df2 = pd.DataFrame({'col1': [9, 8, 7, 6], 'weight_col': [0.5, 0.4, 0.1, 0]})
df2.sample(n=3, weights='weight_col')

Unnamed: 0,col1,weight_col
0,9,0.5
2,7,0.1
1,8,0.4


In [84]:
df2.sample(n=3, random_state=3)

Unnamed: 0,col1,weight_col
3,6,0.0
1,8,0.4
0,9,0.5


In [90]:
se = pd.Series([1, 2, 3])

In [101]:
se = se.reset_index()

In [104]:
df = pd.DataFrame(np.arange(6).reshape(3,2), columns=['A', 'B'])
df

Unnamed: 0,A,B
0,0,1
1,2,3
2,4,5


In [106]:
dates = pd.Series(pd.date_range('20010101', periods=10))
s = pd.Series(np.arange(10), index=dates)
df = pd.DataFrame(np.random.randn(10,4), index=dates, columns=list('ABCD'))

In [107]:
s.iat[5]

5

In [108]:
df.at[dates[5], 'A']

0.06143719498133536

In [109]:
df.iat[0, 3]

-1.1460152111000526

In [136]:
df2 = pd.DataFrame({'a': ['one', 'one', 'two', 'three', 'two', 'one', 'six'],
                   'b': ['x', 'y', 'y', 'x', 'y', 'x', 'x'],
                   'c': np.random.randn(7)})
criterion = df2['a'].map(lambda x: x.startswith('t'))
# df2[criterion]
# df2[[x.startswith('t') for x in df2['a']]]
# df2[criterion & (df2['b'] == 'x')]
df2.loc[criterion & (df2['b'] == 'x'), 'b':'c']

Unnamed: 0,b,c
3,x,0.038341


In [154]:
df = pd.DataFrame(np.random.randn(5, 2),
                  index=list('abcde'),
                  columns=['A', 'B'])
df

Unnamed: 0,A,B
a,0.806984,-0.150727
b,1.429061,-0.042105
c,0.181825,-1.285301
d,1.26009,1.197514
e,0.29879,-0.754171


In [155]:
df['A'] > 0.5

a     True
b     True
c    False
d     True
e    False
Name: A, dtype: bool

In [157]:
s = (df['A'] > 0.5)
s.values

array([ True,  True, False,  True, False])

In [162]:
print(s)

a     True
b     True
c    False
d     True
e    False
Name: A, dtype: bool


In [164]:
df.iloc[[True, False, True, True, False], 1]

a   -0.150727
c   -1.285301
d    1.197514
Name: B, dtype: float64

In [168]:
s = pd.Series(np.arange(6), index=pd.MultiIndex.from_product([[0,1], ['a', 'b', 'c']]))
s

0  a    0
   b    1
   c    2
1  a    3
   b    4
   c    5
dtype: int32

In [173]:
s1 = s.index.isin(['a', 'c'], level=1)

In [174]:
s.iloc[s1]

0  a    0
   c    2
1  a    3
   c    5
dtype: int32

In [175]:
df = pd.DataFrame({'vals': [1,2,3,4], 'ids': ['a', 'b', 'f', 'n'], 'ids2': ['a', 'n', 'c', 'n']})
df

Unnamed: 0,vals,ids,ids2
0,1,a,a
1,2,b,n
2,3,f,c
3,4,n,n


In [188]:
values = {'ids': ['a', 'c'], 'ids2': ['a', 'b'], 'vals': [1, 3]}
row_mask = df.isin(values).all()
row_mask

vals    False
ids     False
ids2    False
dtype: bool

In [183]:
df[row_mask]

Unnamed: 0,vals,ids,ids2
0,1,a,a


In [189]:
df = pd.DataFrame(np.random.randn(8, 4), index= pd.date_range('2000-01-01', periods=8), columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2000-01-01,-0.329708,0.706123,-2.713327,1.022615
2000-01-02,-0.09642,-0.481043,-1.034451,1.42925
2000-01-03,-0.181702,-0.335237,1.069624,-0.655576
2000-01-04,0.79095,-0.323941,-0.403812,-1.060557
2000-01-05,0.490567,0.407525,-1.077142,-0.138711
2000-01-06,0.880371,0.414202,-0.699189,-2.416261
2000-01-07,1.177994,0.572624,0.381155,-0.387474
2000-01-08,-0.91177,0.066977,-0.047273,1.33787


In [199]:
df[1:4]

Unnamed: 0,A,B,C,D
2000-01-02,-0.09642,-0.481043,-1.034451,1.42925
2000-01-03,-0.181702,-0.335237,1.069624,-0.655576
2000-01-04,0.79095,-0.323941,-0.403812,-1.060557


In [202]:
df

Unnamed: 0,A,B,C,D
2000-01-01,-0.329708,0.706123,-2.713327,1.022615
2000-01-02,-0.09642,-0.481043,-1.034451,1.42925
2000-01-03,-0.181702,-0.335237,1.069624,-0.655576
2000-01-04,0.79095,-0.323941,-0.403812,-1.060557
2000-01-05,0.490567,0.407525,-1.077142,-0.138711
2000-01-06,0.880371,0.414202,-0.699189,-2.416261
2000-01-07,1.177994,0.572624,0.381155,-0.387474
2000-01-08,-0.91177,0.066977,-0.047273,1.33787


In [207]:
df.where(df>0, df['A'], axis=1)

Unnamed: 0,A,B,C,D
2000-01-01,,0.706123,,1.022615
2000-01-02,,,,1.42925
2000-01-03,,,1.069624,
2000-01-04,0.79095,,,
2000-01-05,0.490567,0.407525,,
2000-01-06,0.880371,0.414202,,
2000-01-07,1.177994,0.572624,0.381155,
2000-01-08,,0.066977,,1.33787


In [7]:
df = pd.DataFrame({'col1': list('ABBAD'), 'col2': list('ZZDFF')})
df

Unnamed: 0,col1,col2
0,A,Z
1,B,Z
2,B,D
3,A,F
4,D,F


In [8]:
conditions = [(df['col1'] == 'A') & (df['col2'] == 'Z'),
             (df['col1'] == 'A') & (df['col2'] == 'F'),
             (df['col1'] == 'B')]
choices = ['green', 'yellow', 'red']
df['color'] = np.select(conditions, choices, default='black')
df

Unnamed: 0,col1,col2,color
0,A,Z,green
1,B,Z,red
2,B,D,red
3,A,F,yellow
4,D,F,black


In [12]:
df = pd.DataFrame(np.random.randn(10, 3), columns=list('abc'))

In [16]:
df.query('a<b and b<c')

Unnamed: 0,a,b,c
2,0.217156,0.279549,1.208464
3,-1.153873,-0.409144,1.031866
4,-0.291906,0.679745,1.140516
5,-0.243717,0.722663,2.008115
8,-1.93272,-0.826348,0.883411


In [24]:
n = 10
df = pd.DataFrame(np.random.randint(50, size=(n, 2)), columns=list('bc'))
df

Unnamed: 0,b,c
0,48,34
1,23,24
2,20,7
3,31,14
4,44,42
5,6,24
6,38,2
7,34,34
8,35,7
9,1,47


In [25]:
df.index.name = 'a'

In [29]:
df.query('a<b<c')

Unnamed: 0_level_0,b,c
a,Unnamed: 1_level_1,Unnamed: 2_level_1
1,23,24
5,6,24


In [30]:
df.query('index < b < c')

Unnamed: 0_level_0,b,c
a,Unnamed: 1_level_1,Unnamed: 2_level_1
1,23,24
5,6,24


In [34]:
n = 10
colors = np.random.choice(['red', 'gree'], size=n)
foods = np.random.choice(['eggs', 'vegetables'], size=n)
index = pd.MultiIndex.from_arrays([colors, foods], names=['color', 'food'])

In [35]:
df = pd.DataFrame(np.random.randn(n, 2), index=index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
color,food,Unnamed: 2_level_1,Unnamed: 3_level_1
red,vegetables,0.295226,-1.503227
gree,vegetables,0.33716,0.755573
gree,vegetables,0.832794,1.617346
gree,vegetables,-0.635746,0.784084
gree,vegetables,-0.773783,1.947809
gree,vegetables,0.028072,-1.569358
gree,eggs,-0.514814,-1.531944
red,eggs,-1.564366,0.442072
red,vegetables,-0.498764,-0.324104
gree,eggs,-1.275325,-0.802454


In [36]:
df.query('color == "red"')

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
color,food,Unnamed: 2_level_1,Unnamed: 3_level_1
red,vegetables,0.295226,-1.503227
red,eggs,-1.564366,0.442072
red,vegetables,-0.498764,-0.324104


In [38]:
df.index.names = [None, None]

In [39]:
df

Unnamed: 0,Unnamed: 1,0,1
red,vegetables,0.295226,-1.503227
gree,vegetables,0.33716,0.755573
gree,vegetables,0.832794,1.617346
gree,vegetables,-0.635746,0.784084
gree,vegetables,-0.773783,1.947809
gree,vegetables,0.028072,-1.569358
gree,eggs,-0.514814,-1.531944
red,eggs,-1.564366,0.442072
red,vegetables,-0.498764,-0.324104
gree,eggs,-1.275325,-0.802454


In [40]:
df.query('ilevel_0 == "red"' )

Unnamed: 0,Unnamed: 1,0,1
red,vegetables,0.295226,-1.503227
red,eggs,-1.564366,0.442072
red,vegetables,-0.498764,-0.324104


In [41]:
df.query('ilevel_1 == "eggs"')

Unnamed: 0,Unnamed: 1,0,1
gree,eggs,-0.514814,-1.531944
red,eggs,-1.564366,0.442072
gree,eggs,-1.275325,-0.802454


In [42]:
df = pd.DataFrame(np.random.randn(n, 3), columns=list('abc'))
df2 = pd.DataFrame(np.random.randn(n + 2, 3), columns=df.columns)
expr = '0.0 <= a <= c <= 0.5'

In [52]:
t = map(lambda x: x.query(expr), [df, df2])
for i in t:
    print(i)


          a         b         c
7  0.012147 -0.489134  0.032601
Empty DataFrame
Columns: [a, b, c]
Index: []


In [55]:
df = pd.DataFrame(np.random.randint(n, size=(n, 3)), columns=list('abc'))
df

Unnamed: 0,a,b,c
0,0,6,0
1,1,2,0
2,2,1,3
3,8,4,5
4,5,4,3
5,1,2,2
6,3,6,9
7,0,7,7
8,2,8,2
9,1,3,7


In [57]:
df[(df['a']< df['b']) & (df['b']<df['c'])]

Unnamed: 0,a,b,c
6,3,6,9
9,1,3,7


In [58]:
df = pd.DataFrame({'a': list('aabbccddeeff'),
                  'b': list('aaaabbbbcccc'),
                  'c': np.random.randint(5, size=12),
                  'd': np.random.randint(9, size=12)})
df

Unnamed: 0,a,b,c,d
0,a,a,2,4
1,a,a,1,8
2,b,a,0,3
3,b,a,1,2
4,c,b,4,4
5,c,b,4,4
6,d,b,1,1
7,d,b,3,4
8,e,c,4,4
9,e,c,2,8


In [66]:
df = pd.DataFrame(np.random.rand(n, 3), columns=list('abc'))
df

Unnamed: 0,a,b,c
0,0.722397,0.735807,0.975757
1,0.697363,0.708568,0.451638
2,0.862415,0.05815,0.404155
3,0.694196,0.354007,0.202363
4,0.150612,0.883081,0.235424
5,0.607,0.414331,0.125902
6,0.97245,0.829942,0.541191
7,0.681587,0.093245,0.757279
8,0.532483,0.250978,0.761034
9,0.843037,0.262381,0.167784


In [67]:
df = pd.DataFrame(np.random.rand(n, 3), columns=list('abc'))

In [82]:
df['bools'] = np.random.rand(len(df)) > 0.5
df

Unnamed: 0,a,b,c,bools
0,0.732163,0.195053,0.754678,True
1,0.37335,0.503601,0.267575,True
2,0.358014,0.321618,0.171853,False
3,0.114148,0.427127,0.279537,False
4,0.672874,0.079309,0.032628,True
5,0.388451,0.010855,0.546847,True
6,0.083075,0.75536,0.843533,False
7,0.497489,0.589482,0.000942,False
8,0.055651,0.880992,0.587656,False
9,0.237414,0.293404,0.86194,False


In [83]:
df.query('not bools')

Unnamed: 0,a,b,c,bools
2,0.358014,0.321618,0.171853,False
3,0.114148,0.427127,0.279537,False
6,0.083075,0.75536,0.843533,False
7,0.497489,0.589482,0.000942,False
8,0.055651,0.880992,0.587656,False
9,0.237414,0.293404,0.86194,False


In [86]:
df.query('not bools') == df[~df['bools']]

Unnamed: 0,a,b,c,bools
2,True,True,True,True
3,True,True,True,True
6,True,True,True,True
7,True,True,True,True
8,True,True,True,True
9,True,True,True,True


In [87]:
shorter = df.query('a < b < c and not bools or bools > 2')
shorter

Unnamed: 0,a,b,c,bools
6,0.083075,0.75536,0.843533,False
9,0.237414,0.293404,0.86194,False


In [92]:
longer = df[(df['a'] < df['b']) & (df['b'] < df['c']) & (~ df['bools']) | (df['bools'] > 2)]
longer

Unnamed: 0,a,b,c,bools
6,0.083075,0.75536,0.843533,False
9,0.237414,0.293404,0.86194,False


In [93]:
shorter == longer

Unnamed: 0,a,b,c,bools
6,True,True,True,True
9,True,True,True,True


In [108]:
df3 = pd.DataFrame({'a': np.random.randn(6), 'b': np.random.randn(6)}, index=list('aabcba'))
df3

Unnamed: 0,a,b
a,1.195817,1.710454
a,-1.70845,-0.60106
b,0.433693,-0.521991
c,-0.505928,-0.345267
b,-0.01978,-1.855012
a,-0.752848,1.831791


In [95]:
df3.index.duplicated()

array([False,  True, False, False,  True,  True])

In [96]:
df3[~ df3.index.duplicated()]

Unnamed: 0,a,b
a,0.501623,1.609102
b,-1.892547,0.110899
c,0.786636,1.275488


In [97]:
df3[~ df3.index.duplicated(keep='last')]

Unnamed: 0,a,b
c,0.786636,1.275488
b,-2.532966,1.742885
a,1.365332,2.920223


In [98]:
df3[~ df3.index.duplicated(keep=False)]

Unnamed: 0,a,b
c,0.786636,1.275488


In [118]:
df3.index.drop_duplicates()

Index(['a', 'b', 'c'], dtype='object')

In [119]:
df3.loc[df3.index.drop_duplicates()]

Unnamed: 0,a,b
a,1.195817,1.710454
a,-1.70845,-0.60106
a,-0.752848,1.831791
b,0.433693,-0.521991
b,-0.01978,-1.855012
c,-0.505928,-0.345267


In [120]:
df3[0:3]

Unnamed: 0,a,b
a,1.195817,1.710454
a,-1.70845,-0.60106
b,0.433693,-0.521991


In [140]:
df = pd.DataFrame({'col': list('AABB'), 'A': [80, 23, np.nan, 22], 'B': [80, 55, 76, 67]})
df

Unnamed: 0,col,A,B
0,A,80.0,80
1,A,23.0,55
2,B,,76
3,B,22.0,67


In [125]:
pd.factorize(df['col'])

(array([0, 0, 1, 1], dtype=int64), Index(['A', 'B'], dtype='object'))

In [126]:
idx, cols = pd.factorize(df['col'])

In [131]:
df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx]

array([80., 23., 76., 67.])

In [128]:
idx

array([0, 0, 1, 1], dtype=int64)

In [129]:
cols

Index(['A', 'B'], dtype='object')

In [141]:
index = pd.Index(list('eab'), name='object')
index

Index(['e', 'a', 'b'], dtype='object', name='object')

In [142]:
index.name

'object'

In [143]:
df

Unnamed: 0,col,A,B
0,A,80.0,80
1,A,23.0,55
2,B,,76
3,B,22.0,67


In [144]:
index = pd.Index(list(range(5)), name='rows')
columns = pd.Index(list('ABC'), name='cols')
df = pd.DataFrame(np.random.randn(5, 3), index=index, columns=columns)
df

cols,A,B,C
rows,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.261255,-0.185855,-0.555897
1,-0.519151,0.164914,-0.532782
2,0.578346,0.667791,0.910614
3,-1.175787,1.143781,-0.42914
4,0.971085,-1.4813,0.875252


In [145]:
index = pd.MultiIndex.from_product([range(3), ['a', 'b']], names=['first', 'second'])
index

MultiIndex([(0, 'a'),
            (0, 'b'),
            (1, 'a'),
            (1, 'b'),
            (2, 'a'),
            (2, 'b')],
           names=['first', 'second'])

In [150]:
index.levels[1]

Index(['a', 'b'], dtype='object', name='second')

In [153]:
index = index.set_names(['first', 'last'])

In [154]:
index

MultiIndex([(0, 'a'),
            (0, 'b'),
            (1, 'a'),
            (1, 'b'),
            (2, 'a'),
            (2, 'b')],
           names=['first', 'last'])

In [157]:
index = index.set_levels(['one', 'two'], level=1)

In [158]:
index

MultiIndex([(0, 'one'),
            (0, 'two'),
            (1, 'one'),
            (1, 'two'),
            (2, 'one'),
            (2, 'two')],
           names=['first', 'last'])

In [159]:
a = pd.Index(['a', 'b', 'c'])
b = pd.Index(['a', 'd', 'e'])
a.difference(b)

Index(['b', 'c'], dtype='object')

In [162]:
a.symmetric_difference(b)

Index(['b', 'c', 'd', 'e'], dtype='object')

In [164]:
np.arange(1,5).astype('float64')

array([1., 2., 3., 4.])

In [165]:
data = pd.DataFrame({'a': ['bar', 'bar', 'foo', 'foo'], 'b': ['one', 'two', 'one', 'two'],
                    'c': list('zyxw'), 'd': np.arange(1,5).astype('float64')})
data

Unnamed: 0,a,b,c,d
0,bar,one,z,1.0
1,bar,two,y,2.0
2,foo,one,x,3.0
3,foo,two,w,4.0


In [167]:
indexed1 = data.set_index('c')
indexed1

Unnamed: 0_level_0,a,b,d
c,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
z,bar,one,1.0
y,bar,two,2.0
x,foo,one,3.0
w,foo,two,4.0


In [168]:
data.set_index(['a', 'b'])

Unnamed: 0_level_0,Unnamed: 1_level_0,c,d
a,b,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,z,1.0
bar,two,y,2.0
foo,one,x,3.0
foo,two,w,4.0


In [169]:
data

Unnamed: 0,a,b,c,d
0,bar,one,z,1.0
1,bar,two,y,2.0
2,foo,one,x,3.0
3,foo,two,w,4.0


In [170]:
data.set_index('c', drop=False)

Unnamed: 0_level_0,a,b,c,d
c,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
z,bar,one,z,1.0
y,bar,two,y,2.0
x,foo,one,x,3.0
w,foo,two,w,4.0


In [175]:
data.set_index(['a', 'b'], inplace=True)

In [177]:
data.reset_index(level = 1)

Unnamed: 0_level_0,b,c,d
a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,z,1.0
bar,two,y,2.0
foo,one,x,3.0
foo,two,w,4.0


In [236]:
df = pd.DataFrame(np.random.rand(10, 4), index=np.arange(10)[::-1], columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
9,0.249768,0.587926,0.787779,0.692166
8,0.415833,0.47703,0.294153,0.808217
7,0.207671,0.398327,0.143084,0.173922
6,0.732162,0.211605,0.128542,0.950932
5,0.599208,0.426189,0.225376,0.082938
4,0.279166,0.800622,0.006861,0.045711
3,0.629184,0.68276,0.686305,0.631343
2,0.135509,0.64999,0.358425,0.286946
1,0.40662,0.601093,0.023405,0.937039
0,0.362735,0.683116,0.739503,0.708147


In [237]:
df.rename({'A': 'col1', 'B': 'col2', 'C': 'col3', 'D': 'col4'}, axis=1)
# df.rename({'one': 'foo', 'two': 'bar'}, axis=1)

Unnamed: 0,col1,col2,col3,col4
9,0.249768,0.587926,0.787779,0.692166
8,0.415833,0.47703,0.294153,0.808217
7,0.207671,0.398327,0.143084,0.173922
6,0.732162,0.211605,0.128542,0.950932
5,0.599208,0.426189,0.225376,0.082938
4,0.279166,0.800622,0.006861,0.045711
3,0.629184,0.68276,0.686305,0.631343
2,0.135509,0.64999,0.358425,0.286946
1,0.40662,0.601093,0.023405,0.937039
0,0.362735,0.683116,0.739503,0.708147


In [238]:
df = df.reset_index()

In [239]:
df

Unnamed: 0,index,A,B,C,D
0,9,0.249768,0.587926,0.787779,0.692166
1,8,0.415833,0.47703,0.294153,0.808217
2,7,0.207671,0.398327,0.143084,0.173922
3,6,0.732162,0.211605,0.128542,0.950932
4,5,0.599208,0.426189,0.225376,0.082938
5,4,0.279166,0.800622,0.006861,0.045711
6,3,0.629184,0.68276,0.686305,0.631343
7,2,0.135509,0.64999,0.358425,0.286946
8,1,0.40662,0.601093,0.023405,0.937039
9,0,0.362735,0.683116,0.739503,0.708147


In [241]:
df.rename({'index' : 'Col1'}, axis=1)

Unnamed: 0,Col1,A,B,C,D
0,9,0.249768,0.587926,0.787779,0.692166
1,8,0.415833,0.47703,0.294153,0.808217
2,7,0.207671,0.398327,0.143084,0.173922
3,6,0.732162,0.211605,0.128542,0.950932
4,5,0.599208,0.426189,0.225376,0.082938
5,4,0.279166,0.800622,0.006861,0.045711
6,3,0.629184,0.68276,0.686305,0.631343
7,2,0.135509,0.64999,0.358425,0.286946
8,1,0.40662,0.601093,0.023405,0.937039
9,0,0.362735,0.683116,0.739503,0.708147


In [243]:
df.columns.name = 'Columns'

In [244]:
df.index.name = 'Index'

In [245]:
df

Columns,index,A,B,C,D
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,9,0.249768,0.587926,0.787779,0.692166
1,8,0.415833,0.47703,0.294153,0.808217
2,7,0.207671,0.398327,0.143084,0.173922
3,6,0.732162,0.211605,0.128542,0.950932
4,5,0.599208,0.426189,0.225376,0.082938
5,4,0.279166,0.800622,0.006861,0.045711
6,3,0.629184,0.68276,0.686305,0.631343
7,2,0.135509,0.64999,0.358425,0.286946
8,1,0.40662,0.601093,0.023405,0.937039
9,0,0.362735,0.683116,0.739503,0.708147


In [247]:
df = df.rename({'index': 'Col1'}, axis=1)

In [248]:
df

Columns,Col1,A,B,C,D
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,9,0.249768,0.587926,0.787779,0.692166
1,8,0.415833,0.47703,0.294153,0.808217
2,7,0.207671,0.398327,0.143084,0.173922
3,6,0.732162,0.211605,0.128542,0.950932
4,5,0.599208,0.426189,0.225376,0.082938
5,4,0.279166,0.800622,0.006861,0.045711
6,3,0.629184,0.68276,0.686305,0.631343
7,2,0.135509,0.64999,0.358425,0.286946
8,1,0.40662,0.601093,0.023405,0.937039
9,0,0.362735,0.683116,0.739503,0.708147


In [255]:
dfb = pd.DataFrame({'a': ['one', 'one', 'two',
                          'three', 'two', 'one', 'six'],
                    'c': np.arange(7)})

In [256]:
dfb

Unnamed: 0,a,c
0,one,0
1,one,1
2,two,2
3,three,3
4,two,4
5,one,5
6,six,6


In [258]:
dfb.loc[dfb['a'].str.startswith('o'), 'c'] = 42

In [259]:
dfb

Unnamed: 0,a,c
0,one,42
1,one,42
2,two,2
3,three,3
4,two,4
5,one,42
6,six,6
