In [1]:
import pandas as pd
obj = pd.Series([4, 7, -5, 3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [2]:
obj.values

array([ 4,  7, -5,  3])

In [3]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [4]:
obj2 = pd.Series([4, 7, -5, 3], index=['b', 'd', 'a', 'c'])
obj2

b    4
d    7
a   -5
c    3
dtype: int64

In [5]:
obj2 = pd.Series({'b': 4, 'd': 7, 'a': -5, 'c': 3})
obj2

b    4
d    7
a   -5
c    3
dtype: int64

In [6]:
obj2.index

Index(['b', 'd', 'a', 'c'], dtype='object')

In [7]:
obj2['a']

-5

In [8]:
obj2['d'] = 99    # slicing provides a view - so changes are in place
obj2

b     4
d    99
a    -5
c     3
dtype: int64

In [9]:
obj2[['c', 'a', 'd']]

c     3
a    -5
d    99
dtype: int64

In [10]:
obj2 > 0

b     True
d     True
a    False
c     True
dtype: bool

In [11]:
obj2[obj2 > 0]

b     4
d    99
c     3
dtype: int64

In [12]:
import numpy as np

In [13]:
np.exp(obj2)

b    5.459815e+01
d    9.889030e+42
a    6.737947e-03
c    2.008554e+01
dtype: float64

In [14]:
'b' in obj2     # like in a dict - search over keys

True

In [15]:
4 in obj2

False

In [16]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [17]:
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = pd.Series(sdata, index=states)
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [18]:
pd.isnull(obj4)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [19]:
pd.isna(obj4)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [20]:
obj4.isnull()

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [21]:
obj3 + obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [22]:
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [23]:
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']     # rename index
obj

Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64

In [24]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]
       }
frame = pd.DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [25]:
pd.DataFrame(data, columns=['year', 'state', 'governor'])

Unnamed: 0,year,state,governor
0,2000,Ohio,
1,2001,Ohio,
2,2002,Ohio,
3,2001,Nevada,
4,2002,Nevada,
5,2003,Nevada,


In [26]:
frame2 = pd.DataFrame(data, index=['one', 'two', 'three', 'four', 'five', 'six'])
frame2

Unnamed: 0,state,year,pop
one,Ohio,2000,1.5
two,Ohio,2001,1.7
three,Ohio,2002,3.6
four,Nevada,2001,2.4
five,Nevada,2002,2.9
six,Nevada,2003,3.2


In [27]:
frame2['state']

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

In [28]:
frame2.state

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

In [29]:
frame2.loc['three']

state    Ohio
year     2002
pop       3.6
Name: three, dtype: object

In [30]:
frame2['debt'] = 16.5
frame2

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,16.5
two,Ohio,2001,1.7,16.5
three,Ohio,2002,3.6,16.5
four,Nevada,2001,2.4,16.5
five,Nevada,2002,2.9,16.5
six,Nevada,2003,3.2,16.5


In [31]:
frame2['debt'] = np.arange(6.)
frame2

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,0.0
two,Ohio,2001,1.7,1.0
three,Ohio,2002,3.6,2.0
four,Nevada,2001,2.4,3.0
five,Nevada,2002,2.9,4.0
six,Nevada,2003,3.2,5.0


In [32]:
val = pd.Series([1, 2, 3, 4, 5, 6])
frame2['debt'] = val       # wrong assignment - indexes don't match!
frame2

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,
two,Ohio,2001,1.7,
three,Ohio,2002,3.6,
four,Nevada,2001,2.4,
five,Nevada,2002,2.9,
six,Nevada,2003,3.2,


In [33]:
val

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [34]:
val = pd.Series([1, 2, 3, 4, 5, 6], index=['one', 'two', 'three', 'four', 'five', 'six'])
frame2['debt'] = val
frame2

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,1
two,Ohio,2001,1.7,2
three,Ohio,2002,3.6,3
four,Nevada,2001,2.4,4
five,Nevada,2002,2.9,5
six,Nevada,2003,3.2,6


In [35]:
val = pd.Series([-1.2, -1.5, -1.7], index=['four', 'five', 'two'])
frame2['debt'] = val          # no index match, no value
frame2

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,
two,Ohio,2001,1.7,-1.7
three,Ohio,2002,3.6,
four,Nevada,2001,2.4,-1.2
five,Nevada,2002,2.9,-1.5
six,Nevada,2003,3.2,


In [36]:
frame2['eastern'] = (frame2['state'] == 'Ohio')
frame2

Unnamed: 0,state,year,pop,debt,eastern
one,Ohio,2000,1.5,,True
two,Ohio,2001,1.7,-1.7,True
three,Ohio,2002,3.6,,True
four,Nevada,2001,2.4,-1.2,False
five,Nevada,2002,2.9,-1.5,False
six,Nevada,2003,3.2,,False


In [37]:
del frame2['eastern']
frame2.columns

Index(['state', 'year', 'pop', 'debt'], dtype='object')

In [38]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9},
       'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}
      }
frame3 = pd.DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [39]:
frame3.T

Unnamed: 0,2001,2002,2000
Nevada,2.4,2.9,
Ohio,1.7,3.6,1.5


In [40]:
pd.DataFrame(pop, index=[2000, 2001, 2002, 2003])

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6
2003,,


In [41]:
pdata = {'Ohio': frame3['Ohio'][:-1],
         'Nevada': frame3['Nevada'][:2]
        }
pd.DataFrame(pdata)

Unnamed: 0,Ohio,Nevada
2001,1.7,2.4
2002,3.6,2.9


In [42]:
frame3['Ohio'][2001:2002]

Series([], Name: Ohio, dtype: float64)

In [43]:
frame3['Ohio'].loc[2001:2002]

2001    1.7
2002    3.6
Name: Ohio, dtype: float64

In [44]:
frame3['Ohio'][:-1]

2001    1.7
2002    3.6
Name: Ohio, dtype: float64

In [45]:
frame3.values

array([[2.4, 1.7],
       [2.9, 3.6],
       [nan, 1.5]])

In [46]:
pdata = {'Nevada': frame3['Nevada'][:],
         'Ohio': frame3['Ohio'][:-1]    # what's not selected will return NaN
        }
pd.DataFrame(pdata)      # index is sorted implicitly!

Unnamed: 0,Nevada,Ohio
2000,,
2001,2.4,1.7
2002,2.9,3.6


In [47]:
frame2

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,
two,Ohio,2001,1.7,-1.7
three,Ohio,2002,3.6,
four,Nevada,2001,2.4,-1.2
five,Nevada,2002,2.9,-1.5
six,Nevada,2003,3.2,


In [48]:
pdata2 = {'State': frame2['state'][:3],
          'Pop': frame2['pop'][:]
         }
pd.DataFrame(pdata2)      # index is sorted implicitly!

Unnamed: 0,State,Pop
five,,2.9
four,,2.4
one,Ohio,1.5
six,,3.2
three,Ohio,3.6
two,Ohio,1.7


In [49]:
pd.Index(frame2)

Index([   ('Ohio', 2000, 1.5, nan),   ('Ohio', 2001, 1.7, -1.7),
          ('Ohio', 2002, 3.6, nan), ('Nevada', 2001, 2.4, -1.2),
       ('Nevada', 2002, 2.9, -1.5),  ('Nevada', 2003, 3.2, nan)],
      dtype='object')

In [50]:
frame2.index

Index(['one', 'two', 'three', 'four', 'five', 'six'], dtype='object')

In [51]:
pd.Index(np.arange(3,6))

Int64Index([3, 4, 5], dtype='int64')

In [52]:
pd.Series(np.arange(3,6)).index

RangeIndex(start=0, stop=3, step=1)

In [53]:
obj2 = pd.Series([1.5, -2.5, 0], index=[3, 4, 5])
obj2

3    1.5
4   -2.5
5    0.0
dtype: float64

In [54]:
obj2 = pd.Series([1.5, -2.5, 0], index=np.arange(3,6))
obj2

3    1.5
4   -2.5
5    0.0
dtype: float64

In [55]:
obj2 = pd.Series([1.5, -2.5, 0], index=pd.Index(np.arange(3,6)))
obj2

3    1.5
4   -2.5
5    0.0
dtype: float64

In [56]:
obj2 = pd.Series([1.5, -2.5, 0], index=pd.Series(np.arange(3,6)).index)
obj2

0    1.5
1   -2.5
2    0.0
dtype: float64

In [57]:
obj2 = obj2.reindex([2, 0, 1])
obj2

2    0.0
0    1.5
1   -2.5
dtype: float64

In [58]:
obj2.index

Int64Index([2, 0, 1], dtype='int64')

In [59]:
# can pass in numpy ndarrays to construct pandas dataframes
# then also pass in index=[] and columns=[] to complete construction

In [60]:
frame = pd.DataFrame(np.arange(9).reshape(3, 3),
                     index=['a', 'c', 'd'],
                     columns=['Ohio', 'Texas', 'California']
                    )
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [61]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [62]:
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [63]:
frame3 = frame.reindex(columns=['Texas', 'Utah', 'California'])
frame3

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


In [64]:
obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [65]:
obj.drop(['d', 'c'], inplace=True)
obj

a    0.0
b    1.0
e    4.0
dtype: float64

In [66]:
obj = obj.append(pd.Series({'d': 4}))

In [67]:
obj2 = pd.DataFrame(obj)
obj2

Unnamed: 0,0
a,0.0
b,1.0
e,4.0
d,4.0


In [68]:
obj2.iloc[2] = 3
obj2

Unnamed: 0,0
a,0.0
b,1.0
e,3.0
d,4.0


In [69]:
obj2.rename(index={'e': 'c'}, inplace=True)
obj2

Unnamed: 0,0
a,0.0
b,1.0
c,3.0
d,4.0


In [70]:
obj[obj < 2]

a    0.0
b    1.0
dtype: float64

In [71]:
s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])
s1 + s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [72]:
df1 = pd.DataFrame(np.arange(9.).reshape(3, 3),
                   index=['Ohio', 'Texas', 'Colorado'],
                   columns=list('bcd')
                  )
df2 = pd.DataFrame(np.arange(12.).reshape(4, 3),
                   index=['Utah', 'Ohio', 'Texas', 'Oregon'],
                   columns=list('bde')
                  )
df1 + df2

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,3.0,,6.0,
Oregon,,,,
Texas,9.0,,12.0,
Utah,,,,


In [73]:
df1 = pd.DataFrame({'A': [1, 2]})
df1

Unnamed: 0,A
0,1
1,2


In [74]:
df2 = pd.DataFrame({'B': [1, 2]})
df2

Unnamed: 0,B
0,1
1,2


In [75]:
df1 - df2    # no common columns exist so no value

Unnamed: 0,A,B
0,,
1,,


In [76]:
df1.sub(df2)

Unnamed: 0,A,B
0,,
1,,


In [77]:
df1.sub(df2, fill_value=0)    # not to fill the NaN's but to fill the non-common columns!

Unnamed: 0,A,B
0,1.0,-1.0
1,2.0,-2.0


In [78]:
df = pd.DataFrame(np.arange(12.).reshape(3, 4), columns=list('abcd'))
df

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


In [79]:
df / 10

Unnamed: 0,a,b,c,d
0,0.0,0.1,0.2,0.3
1,0.4,0.5,0.6,0.7
2,0.8,0.9,1.0,1.1


In [80]:
df.div(10)

Unnamed: 0,a,b,c,d
0,0.0,0.1,0.2,0.3
1,0.4,0.5,0.6,0.7
2,0.8,0.9,1.0,1.1


In [81]:
10 / df

Unnamed: 0,a,b,c,d
0,inf,10.0,5.0,3.333333
1,2.5,2.0,1.666667,1.428571
2,1.25,1.111111,1.0,0.909091


In [82]:
df.rdiv(10)

Unnamed: 0,a,b,c,d
0,inf,10.0,5.0,3.333333
1,2.5,2.0,1.666667,1.428571
2,1.25,1.111111,1.0,0.909091


In [83]:
arr = np.arange(12.).reshape(3, 4)
arr

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [84]:
arr[-1]

array([ 8.,  9., 10., 11.])

In [85]:
s = pd.Series(arr[-1])
s

0     8.0
1     9.0
2    10.0
3    11.0
dtype: float64

In [86]:
# s[-1] will return an error, this is different to numpy ndarrays! Use iloc instead:

In [87]:
s.iloc[-1]

11.0

In [88]:
df = pd.DataFrame(arr)
df

Unnamed: 0,0,1,2,3
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


In [89]:
# df[-1] will return an error, this is different to numpy ndarrays! Use iloc instead:

In [90]:
df.iloc[-1]

0     8.0
1     9.0
2    10.0
3    11.0
Name: 2, dtype: float64

In [91]:
df[1]            # indexing calls column first in a dataframe

0    1.0
1    5.0
2    9.0
Name: 1, dtype: float64

In [92]:
df[1][2]         # column name 1, row name 2

9.0

In [93]:
df.loc[2, 1]     # row name 2, column name 1 - different orientation for loc than indexing

9.0

In [94]:
frame = pd.DataFrame(np.arange(12.).reshape(4, 3),
                     index=['Utah', 'Ohio', 'Texas', 'Oregon'],
                     columns=list('bde')
                    )
series = pd.Series(np.arange(3.), index=list('bde'))

In [95]:
frame

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [96]:
series

b    0.0
d    1.0
e    2.0
dtype: float64

In [97]:
frame - series        # broadcasting here

Unnamed: 0,b,d,e
Utah,0.0,0.0,0.0
Ohio,3.0,3.0,3.0
Texas,6.0,6.0,6.0
Oregon,9.0,9.0,9.0


In [98]:
frame

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [99]:
series2 = pd.Series(np.arange(4.), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
series2

Utah      0.0
Ohio      1.0
Texas     2.0
Oregon    3.0
dtype: float64

In [100]:
frame - series2      # nope not working - use .sub() and set operation axis to rows instead

Unnamed: 0,Ohio,Oregon,Texas,Utah,b,d,e
Utah,,,,,,,
Ohio,,,,,,,
Texas,,,,,,,
Oregon,,,,,,,


In [101]:
frame.sub(series2, axis=0)

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,2.0,3.0,4.0
Texas,4.0,5.0,6.0
Oregon,6.0,7.0,8.0


In [102]:
frame.sub(series2, axis=1)

Unnamed: 0,Ohio,Oregon,Texas,Utah,b,d,e
Utah,,,,,,,
Ohio,,,,,,,
Texas,,,,,,,
Oregon,,,,,,,


In [103]:
frame = pd.DataFrame(np.random.randn(4, 3), 
                     columns=list('bde'),
                     index=['Utah', 'Ohio', 'Texas', 'Oregon']
                    )
frame

Unnamed: 0,b,d,e
Utah,-0.596219,0.618827,-0.8371
Ohio,-0.410019,-0.514041,1.127391
Texas,0.866688,0.708506,1.046592
Oregon,0.507091,0.654268,0.713384


In [104]:
frame.apply(lambda x: x.max() - x.min())      # .apply() is for a series

b    1.462908
d    1.222548
e    1.964490
dtype: float64

In [105]:
frame.apply(lambda x: np.max(x) - np.min(x), axis=1)

Utah      1.455927
Ohio      1.641432
Texas     0.338086
Oregon    0.206293
dtype: float64

In [106]:
frame.apply(lambda x: pd.Series([x.max(), x.min()], index=['max', 'min']))

Unnamed: 0,b,d,e
max,0.866688,0.708506,1.127391
min,-0.596219,-0.514041,-0.8371


In [107]:
# format string methods - all 3 do the same:

In [108]:
frame.applymap(lambda x: f'{x: .2f}')       # .applymap() is elelment-wise

Unnamed: 0,b,d,e
Utah,-0.6,0.62,-0.84
Ohio,-0.41,-0.51,1.13
Texas,0.87,0.71,1.05
Oregon,0.51,0.65,0.71


In [109]:
frame.applymap(lambda x: '{: .2f}'.format(x)) 

Unnamed: 0,b,d,e
Utah,-0.6,0.62,-0.84
Ohio,-0.41,-0.51,1.13
Texas,0.87,0.71,1.05
Oregon,0.51,0.65,0.71


In [110]:
frame.applymap(lambda x: '% .2f' % x) 

Unnamed: 0,b,d,e
Utah,-0.6,0.62,-0.84
Ohio,-0.41,-0.51,1.13
Texas,0.87,0.71,1.05
Oregon,0.51,0.65,0.71


In [111]:
frame['e'].map(lambda x: f'{x: .2f}')

Utah      -0.84
Ohio       1.13
Texas      1.05
Oregon     0.71
Name: e, dtype: object

In [112]:
frame = pd.DataFrame(np.arange(8).reshape(2, 4),
                     index=['three', 'one'],
                     columns=['d', 'a', 'b', 'c'])
frame

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [113]:
frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [114]:
frame.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [115]:
frame.sort_index(axis=1, ascending=False, inplace=True)
frame

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


In [116]:
frame.sort_values(by='three', axis=1)

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [117]:
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])
obj.rank(method='first')     # first seen has a higher rank for otherwise same rank e.g. 6.5

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

In [118]:
frame = pd.DataFrame(obj, columns=['numbers'])
frame

Unnamed: 0,numbers
0,7
1,-5
2,7
3,4
4,2
5,0
6,4


In [119]:
frame['rank_by_first_seen'] = frame.rank(method='first')
frame

Unnamed: 0,numbers,rank_by_first_seen
0,7,6.0
1,-5,1.0
2,7,7.0
3,4,4.0
4,2,3.0
5,0,2.0
6,4,5.0


In [120]:
frame.set_index('rank_by_first_seen', drop=True, inplace=True)
frame

Unnamed: 0_level_0,numbers
rank_by_first_seen,Unnamed: 1_level_1
6.0,7
1.0,-5
7.0,7
4.0,4
3.0,2
2.0,0
5.0,4


In [121]:
frame.sort_index(inplace=True)
frame

Unnamed: 0_level_0,numbers
rank_by_first_seen,Unnamed: 1_level_1
1.0,-5
2.0,0
3.0,2
4.0,4
5.0,4
6.0,7
7.0,7


In [122]:
frame.idxmax()

numbers    6.0
dtype: float64

In [123]:
frame.numbers.argmax()

5

In [124]:
frame.numbers.idxmax()

6.0

In [125]:
obj = pd.Series(list('cadaabbcc'))
uniques = obj.unique()
uniques

array(['c', 'a', 'd', 'b'], dtype=object)

In [126]:
obj.value_counts()

c    3
a    3
b    2
d    1
dtype: int64

In [127]:
pd.value_counts(obj.values)

c    3
a    3
b    2
d    1
dtype: int64

In [128]:
obj

0    c
1    a
2    d
3    a
4    a
5    b
6    b
7    c
8    c
dtype: object

In [129]:
mask = obj.isin(['b', 'c'])
mask

0     True
1    False
2    False
3    False
4    False
5     True
6     True
7     True
8     True
dtype: bool

In [130]:
obj[mask]

0    c
5    b
6    b
7    c
8    c
dtype: object

In [131]:
obj[(obj == 'b') | (obj == 'c')]

0    c
5    b
6    b
7    c
8    c
dtype: object

In [132]:
to_match = pd.Series(list('cabbcaegsfkric'))
unique_vals = pd.Series(list('cba'))
pd.Index(unique_vals).get_indexer(to_match)      # no match will give a -1

array([ 0,  2,  1,  1,  0,  2, -1, -1, -1, -1, -1, -1, -1,  0])

In [133]:
unique_indexes = pd.Index(unique_vals)   # .Index makes values to indexes so now 'c', 'b', 'a' are all indexes
unique_indexes

Index(['c', 'b', 'a'], dtype='object')

In [134]:
unique_indexes.get_indexer(to_match)     # .get_indexer gets the integer indexes and maps onto the target

array([ 0,  2,  1,  1,  0,  2, -1, -1, -1, -1, -1, -1, -1,  0])

In [140]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': list('abc')}, index=[1, 3, 5])
df

Unnamed: 0,A,B
1,1,a
3,2,b
5,3,c


In [141]:
df = pd.DataFrame([[1, 'a'], [2, 'b'], [3, 'c']], index=[1, 3, 5], columns=['A', 'B'])
df

Unnamed: 0,A,B
1,1,a
3,2,b
5,3,c


In [135]:
# .reindex vs .rename
# .rename only changes the name of index / column labels
# .reindex changes the index and places NaN in locations having no value in the previous index

In [143]:
df.rename(mapper={1: 0, 3: 1, 5: 2})

Unnamed: 0,A,B
0,1,a
1,2,b
2,3,c


In [146]:
df.reindex(index=[0, 1, 2])

Unnamed: 0,A,B
0,,
1,1.0,a
2,,


In [148]:
df.rename(index=lambda x: (x - 1) // 2)

Unnamed: 0,A,B
0,1,a
1,2,b
2,3,c
