# Transpose Frame

In [15]:
import numpy as np
import pandas as pd
pop = {'Nevada': {2001: 2.4, 2002: 2.9},'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

In [16]:
pop = pd.DataFrame(pop)

In [17]:
pop

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [18]:
pop.T

Unnamed: 0,2000,2001,2002
Nevada,,2.4,2.9
Ohio,1.5,1.7,3.6


In [8]:
 pd.DataFrame(pop, index=[2001, 2002, 2003]) 

Unnamed: 0,Nevada,Ohio
2001,2.4,1.7
2002,2.9,3.6
2003,,


In [19]:
data1 = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year':  [2000, 2001, 2002, 2001, 2002, 2003],
        'pop':   [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]
       }
frame3 = pd.DataFrame(pop)

In [20]:
frame3

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [21]:
pdata = {'Ohio': frame3['Ohio'][:-1],  'Nevada': frame3['Nevada'][:2]}


In [23]:
pdata=pd.DataFrame(pdata)

In [24]:
pdata

Unnamed: 0,Ohio,Nevada
2000,1.5,
2001,1.7,2.4


In [25]:
frame3['Ohio'][:-1]

2000    1.5
2001    1.7
Name: Ohio, dtype: float64

In [26]:
frame3['Nevada'][:2]

2000    NaN
2001    2.4
Name: Nevada, dtype: float64

In [27]:
 frame3.index.name = 'year'; frame3.columns.name = 'state'

In [28]:
frame3

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [29]:
frame3.values

array([[nan, 1.5],
       [2.4, 1.7],
       [2.9, 3.6]])

# Index Objects

In [31]:
obj = pd.Series(range(3), index=['a', 'b', 'c'])

In [32]:
obj.index

Index(['a', 'b', 'c'], dtype='object')

In [33]:
index = obj.index

In [34]:
index[1:]

Index(['b', 'c'], dtype='object')

In [35]:
index[:1]

Index(['a'], dtype='object')

In [36]:
obj[1:]

b    1
c    2
dtype: int64

In [37]:
index[1]

'b'

In [38]:
labels = pd.Index(np.arange(3))


In [39]:
labels

Int64Index([0, 1, 2], dtype='int64')

In [40]:
 obj2 = pd.Series([1.5, -2.5, 0], index=labels)


In [41]:
obj2

0    1.5
1   -2.5
2    0.0
dtype: float64

# Reindexing

In [42]:
obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])

In [43]:
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [44]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])

In [45]:
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [46]:
 obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [47]:
obj3.reindex(range(6), method='ffill') #forward fill method by which it will fill next index if its not available

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [48]:
 obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[1, 2, 4])
obj3

1      blue
2    purple
4    yellow
dtype: object

In [49]:
obj3.reindex(range(15), method='ffill') 

0        NaN
1       blue
2     purple
3     purple
4     yellow
5     yellow
6     yellow
7     yellow
8     yellow
9     yellow
10    yellow
11    yellow
12    yellow
13    yellow
14    yellow
dtype: object

In [50]:
 frame = pd.DataFrame(np.arange(9).reshape((3, 3)), index=['a', 'c', 'd'], columns=['Ohio', 'Texas', 'California'])

In [51]:
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [52]:
frame.reindex(['a','b','c','d'])

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [53]:
 states = ['Texas', 'Utah', 'California']


In [54]:
 frame.reindex(columns=states) 

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


In [55]:
 frame.loc[['a', 'b', 'c', 'd'], states] 

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Unnamed: 0,Texas,Utah,California
a,1.0,,2.0
b,,,
c,4.0,,5.0
d,7.0,,8.0


# Dropping Entries from an Axis

In [65]:
obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [66]:
 new_obj = obj.drop('c')
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [61]:
obj.drop(['d', 'c']) 


a    0.0
b    1.0
e    4.0
dtype: float64

In [62]:
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [63]:
obj.drop(['d', 'c'],inplace=True)

In [64]:
obj

a    0.0
b    1.0
e    4.0
dtype: float64

In [67]:
 data = pd.DataFrame(np.arange(16).reshape((4, 4)),
index=['Ohio', 'Colorado', 'Utah', 'New York'],  columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [68]:
data.drop(['Colorado', 'Ohio'])

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


In [69]:
data.drop('two',axis=1)

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utah,8,10,11
New York,12,14,15


In [70]:
 data.drop(['two', 'four'], axis='columns')

Unnamed: 0,one,three
Ohio,0,2
Colorado,4,6
Utah,8,10
New York,12,14


In [71]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [73]:
data.drop('one',axis=1,inplace=True)

In [74]:
data

Unnamed: 0,two,three,four
Ohio,1,2,3
Colorado,5,6,7
Utah,9,10,11
New York,13,14,15


# Indexing, Selection, and Filtering 

In [75]:
 obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [76]:
obj['b']

1.0

In [78]:
obj[1]

1.0

In [79]:
 obj[2:4] 

c    2.0
d    3.0
dtype: float64

In [80]:
 obj[['b', 'a', 'd']] 

b    1.0
a    0.0
d    3.0
dtype: float64

In [81]:
 obj[[1, 3]] 

b    1.0
d    3.0
dtype: float64

In [82]:
obj[obj < 2] 

a    0.0
b    1.0
dtype: float64

In [83]:
 obj['b':'c'] 

b    1.0
c    2.0
dtype: float64

In [84]:
 obj['b':'c'] = 5


In [85]:
obj

a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64

In [86]:
 data = pd.DataFrame(np.arange(16).reshape((4, 4)),index=['Ohio', 'Colorado', 'Utah', 'New York'],columns=['one', 'two', 'three', 'four'])


In [89]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [90]:
data['two']

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int32

In [91]:
 data[['three', 'one']] 

Unnamed: 0,three,one
Ohio,2,0
Colorado,6,4
Utah,10,8
New York,14,12


In [92]:
 data[:2] 

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [93]:
 data[data['three'] > 5] 

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [94]:
 data < 5 

Unnamed: 0,one,two,three,four
Ohio,True,True,True,True
Colorado,True,False,False,False
Utah,False,False,False,False
New York,False,False,False,False


In [95]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [96]:
 data[data < 5] = 0


In [97]:
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


# Selection with loc and iloc

In [98]:
data.loc['Colorado', ['two', 'three']] 

two      5
three    6
Name: Colorado, dtype: int32

In [99]:
 data.iloc[2, [3, 0, 1]] 

four    11
one      8
two      9
Name: Utah, dtype: int32

In [100]:
data.iloc[2] 

one       8
two       9
three    10
four     11
Name: Utah, dtype: int32

In [101]:
 data.iloc[[1, 2], [3, 0, 1]] 

Unnamed: 0,four,one,two
Colorado,7,0,5
Utah,11,8,9


In [102]:
 data.loc[:'Utah', 'two'] 

Ohio        0
Colorado    5
Utah        9
Name: two, dtype: int32

In [103]:
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [104]:
 data.iloc[:, :3][data.three > 5] 

Unnamed: 0,one,two,three
Colorado,0,5,6
Utah,8,9,10
New York,12,13,14


# Arithmetic and Data Alignment 

In [106]:
 df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'),index=['Ohio', 'Texas', 'Colorado'])
df1

Unnamed: 0,b,c,d
Ohio,0.0,1.0,2.0
Texas,3.0,4.0,5.0
Colorado,6.0,7.0,8.0


In [109]:
 df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),index=['Utah', 'Ohio', 'Texas', 'Oregon'])
df2


Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [110]:
 df1 + df2 

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,3.0,,6.0,
Oregon,,,,
Texas,9.0,,12.0,
Utah,,,,


In [111]:
 df1 = pd.DataFrame({'A': [1, 2]})

In [112]:
 df2 = pd.DataFrame({'B': [3, 4]})


In [113]:
df1

Unnamed: 0,A
0,1
1,2


In [114]:
df2

Unnamed: 0,B
0,3
1,4


In [115]:
df1-df2

Unnamed: 0,A,B
0,,
1,,


# Arithmetic methods with fill values 

In [119]:
df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)), columns=list('abcd'))
df1

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


In [121]:
 df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)),columns=list('abcde'))
df2

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,6.0,7.0,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [122]:
 df2.loc[1, 'b'] = np.nan

In [123]:
df2

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,,7.0,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [124]:
df1+df2

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


In [125]:
 df1.add(df2, fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,4.0
1,9.0,5.0,13.0,15.0,9.0
2,18.0,20.0,22.0,24.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [126]:
 df1.add(df2, fill_value=1)

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,5.0
1,9.0,6.0,13.0,15.0,10.0
2,18.0,20.0,22.0,24.0,15.0
3,16.0,17.0,18.0,19.0,20.0


In [127]:
 1 / df1 

Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,0.2,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


In [128]:
 df1.rdiv(1) 

Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,0.2,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


In [132]:
df1.reindex(columns=df2.columns,index=df2.index ,fill_value=1) 

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,1.0
1,4.0,5.0,6.0,7.0,1.0
2,8.0,9.0,10.0,11.0,1.0
3,1.0,1.0,1.0,1.0,1.0


# Operations between DataFrame and Series

In [134]:
arr = np.arange(12.).reshape((3, 4))
arr

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [135]:
 arr[0] 

array([0., 1., 2., 3.])

In [136]:
arr-arr[0]

array([[0., 0., 0., 0.],
       [4., 4., 4., 4.],
       [8., 8., 8., 8.]])

In [137]:
x = np.array([[1,2,3]])
y=np.array([[1],[2],[3]])
print(x.shape,x)

(1, 3) [[1 2 3]]


In [138]:
y.shape,y

((3, 1), array([[1],
        [2],
        [3]]))

In [139]:
x,y=np.broadcast_arrays(x,y)

In [140]:
x

array([[1, 2, 3],
       [1, 2, 3],
       [1, 2, 3]])

In [141]:
y

array([[1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]])

# Function Application and Mapping

In [150]:
 frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
frame

Unnamed: 0,b,d,e
Utah,1.05032,-0.606479,1.060157
Ohio,-0.709852,-0.645488,1.788494
Texas,-0.851136,1.799537,0.66934
Oregon,-0.558044,-0.510973,-0.831702


In [151]:
np.abs(frame)

Unnamed: 0,b,d,e
Utah,1.05032,0.606479,1.060157
Ohio,0.709852,0.645488,1.788494
Texas,0.851136,1.799537,0.66934
Oregon,0.558044,0.510973,0.831702


In [154]:
 f = lambda x: x.max() - x.min()

In [155]:
 frame.apply(f) 

b    1.901456
d    2.445024
e    2.620196
dtype: float64

# Lambda Function

In [148]:
f = lambda x:x*100
f(2)

200

In [156]:
frame.apply(f, axis='columns') 

Utah      1.666636
Ohio      2.498346
Texas     2.650672
Oregon    0.320730
dtype: float64

In [157]:
def f(x): 
     return pd.Series([x.min(), x.max()], index=['min', 'max'])

In [158]:
frame.apply(f)

Unnamed: 0,b,d,e
min,-0.851136,-0.645488,-0.831702
max,1.05032,1.799537,1.788494
