[Reference](https://medium.com/analytics-vidhya/fluent-pandas-22473fa3c30d)

In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
np.__version__
pd.__version__
mpl.__version__

import plotnine.data as ggdata

# Series

In [2]:
s = pd.Series([1,3,5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
s1 = pd.Series(np.arange(4)*1, index=['a','b','c1','d'])
s1

a     0
b     1
c1    2
d     3
dtype: int64

In [4]:
print(len(s1))
print(type(s1))
print(s1.index)

4
<class 'pandas.core.series.Series'>
Index(['a', 'b', 'c1', 'd'], dtype='object')


In [5]:
s2 = pd.Series(np.arange(4)*2, index=['a','b','c2','b'])
s2

a     0
b     2
c2    4
b     6
dtype: int64

In [6]:
print(len(s2))
print(type(s2))
print(s2.index)

4
<class 'pandas.core.series.Series'>
Index(['a', 'b', 'c2', 'b'], dtype='object')


In [7]:
s = s2 - s1

In [8]:
print(len(s))
print(type(s))
print(s.index)

6
<class 'pandas.core.series.Series'>
Index(['a', 'b', 'b', 'c1', 'c2', 'd'], dtype='object')


In [9]:
s['b']

b    1.0
b    5.0
dtype: float64

In [10]:
print(len(s['b']))
print(type(s['b']))
print(type(s['a']))
print(s['c1'])
print(type(s['c1']))

2
<class 'pandas.core.series.Series'>
<class 'numpy.float64'>
nan
<class 'numpy.float64'>


In [11]:
d = {'one' : [1., 2., 3., 4.], 'two' : [4., 3., 2., 1.]}
d = pd.DataFrame(d)
d

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [12]:
print(type(d))
print(d.columns)
print(d.index) 

<class 'pandas.core.frame.DataFrame'>
Index(['one', 'two'], dtype='object')
RangeIndex(start=0, stop=4, step=1)


In [13]:
s = d['one']
s

0    1.0
1    2.0
2    3.0
3    4.0
Name: one, dtype: float64

In [14]:
print(type(s))
print(s.name)
print(s.index) 

<class 'pandas.core.series.Series'>
one
RangeIndex(start=0, stop=4, step=1)


In [15]:
s = pd.Series(np.random.randn(5), name='something')

In [16]:
id(s)

139974555257040

In [17]:
s.name

'something'

In [18]:
s.name = 'new_name'

In [19]:
id(s)

139974555257040

In [20]:
s.name

'new_name'

In [21]:
s.rename("yet_another_name")

0    2.106261
1   -0.506023
2   -0.814289
3    0.694557
4   -0.048007
Name: yet_another_name, dtype: float64

In [22]:
id(s) 

139974555257040

In [23]:
s.name

'new_name'

In [24]:
d = {'one': [1,2,3,4], 'two': [4.,3.,2.,1.]}

In [25]:
pd.DataFrame(d)

Unnamed: 0,one,two
0,1,4.0
1,2,3.0
2,3,2.0
3,4,1.0


In [26]:
pd.DataFrame(d, index=['a','b','c','d'])

Unnamed: 0,one,two
a,1,4.0
b,2,3.0
c,3,2.0
d,4,1.0


In [27]:
pd.DataFrame(d, index=['a','b','a','a'])

Unnamed: 0,one,two
a,1,4.0
b,2,3.0
a,3,2.0
a,4,1.0


In [28]:
pd.DataFrame(d, columns=['one'])

Unnamed: 0,one
0,1
1,2
2,3
3,4


In [29]:
pd.DataFrame(d, columns=['one','three'])

Unnamed: 0,one,three
0,1,
1,2,
2,3,
3,4,


In [30]:
# Error: ValueError: Shape of passed values is (4, 1), indices imply (3, 1)
# pd.DataFrame(d, columns=['one'],index=[0,1,2])

In [31]:
pd.DataFrame(d, columns=['one'],index=[0,1,2,5])

Unnamed: 0,one
0,1
1,2
2,3
5,4


In [32]:
p = pd.DataFrame(d, columns=['one'])
pd.DataFrame(p, index=[0,1,2])

Unnamed: 0,one
0,1
1,2
2,3


In [33]:
d = {
    'one': pd.Series([1.,2.,3.],index=['a','b','c']),
    'two': pd.Series([1.,2.,3.,4.],index=['a','b','c','d']),
    'three': pd.Series(['A','B','C','D'])
}

In [34]:
d

{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64, 'three': 0    A
 1    B
 2    C
 3    D
 dtype: object, 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [35]:
type(d)

dict

In [36]:
df = pd.DataFrame(d)

In [37]:
df

Unnamed: 0,one,two,three
a,1.0,1.0,
b,2.0,2.0,
c,3.0,3.0,
d,,4.0,
0,,,A
1,,,B
2,,,C
3,,,D


In [38]:
df = pd.DataFrame(d, index=['d','b','a'])
df

Unnamed: 0,one,two,three
d,,4.0,
b,2.0,2.0,
a,1.0,1.0,


In [39]:
df = pd.DataFrame(d, index=['d','b','a','a'])
df

Unnamed: 0,one,two,three
d,,4.0,
b,2.0,2.0,
a,1.0,1.0,
a,1.0,1.0,


In [40]:
df = pd.DataFrame(d, index=['d','b','a'],columns=['two','three'])
df

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [41]:
df = pd.DataFrame(d, index=['d','b','a'],columns=['two','two'])
df

Unnamed: 0,two,two.1
d,4.0,4.0
b,2.0,2.0
a,1.0,1.0


In [42]:
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [43]:
pd.DataFrame(data2, index=['first', 'second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [44]:
pd.DataFrame(data2, columns=['a', 'b'])

Unnamed: 0,a,b
0,1,2
1,5,10


In [45]:
s = pd.Series([1., 2., 3.], index=['a', 'b', 'c'])
type(s)

pandas.core.series.Series

In [46]:
df2 = pd.DataFrame(s)
df2

Unnamed: 0,0
a,1.0
b,2.0
c,3.0


In [47]:
type(df2)

pandas.core.frame.DataFrame

In [48]:
df2.columns

RangeIndex(start=0, stop=1, step=1)

In [49]:
df2.index

Index(['a', 'b', 'c'], dtype='object')

In [50]:
# df1 = pd.read_csv("https://storage.googleapis.com/mledu-datatsets/california_housing_traing.csv", sep=",")

In [51]:
# len(df1)

In [52]:
pd.set_option('display.width', 40)     # default is 80
pd.set_option('display.max_colwidth', 30)

In [53]:
d = {
  'one' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'a']),
  'two' : pd.Series(['A', 'B', 'C', 'D'], index=['a', 'b', 'c', 'a'])
}

In [54]:
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,A
b,2.0,B
c,3.0,C
a,4.0,D


In [55]:
type(df['one'])
df['one']

a    1.0
b    2.0
c    3.0
a    4.0
Name: one, dtype: float64

In [56]:
type(df[['one']])
df[['one']]

Unnamed: 0,one
a,1.0
b,2.0
c,3.0
a,4.0


In [57]:
type(df[['one', 'two']])

pandas.core.frame.DataFrame

In [58]:
df[['one', 'two']]

Unnamed: 0,one,two
a,1.0,A
b,2.0,B
c,3.0,C
a,4.0,D


In [59]:
type(df.loc['a']) 

pandas.core.frame.DataFrame

In [60]:
df.loc['a']

Unnamed: 0,one,two
a,1.0,A
a,4.0,D


In [61]:
type(df.loc['b'])   

pandas.core.series.Series

In [62]:
df.loc['b']

one    2
two    B
Name: b, dtype: object

In [63]:
type(df.loc[['a', 'c']]) 

pandas.core.frame.DataFrame

In [64]:
df.loc[['a', 'c']]

Unnamed: 0,one,two
a,1.0,A
a,4.0,D
c,3.0,C


In [65]:
type(df.iloc[0])

pandas.core.series.Series

In [66]:
df.iloc[0]

one    1
two    A
Name: a, dtype: object

In [67]:
df.iloc[1:3]

Unnamed: 0,one,two
b,2.0,B
c,3.0,C


In [68]:
df.iloc[[1, 2]]

Unnamed: 0,one,two
b,2.0,B
c,3.0,C


In [69]:
df.iloc[[1, 0, 1, 0]]

Unnamed: 0,one,two
b,2.0,B
a,1.0,A
b,2.0,B
a,1.0,A


In [70]:
df.iloc[[True, False, True, False]]

Unnamed: 0,one,two
a,1.0,A
c,3.0,C


In [71]:
d = {
    'one': pd.Series([1.,2.,3.],index=['a','b','c']),
    'two': pd.Series([1.,2.,3.,4.],index=['a','b','c','d'])
}

In [72]:
id(d)

139974130393448

In [73]:
df = pd.DataFrame(d)

In [74]:
id(df)

139974130505040

In [75]:
type(df['one'])

pandas.core.series.Series

In [76]:
df['one']['a']

1.0

In [77]:
df['one'][0]

1.0

In [78]:
type(df['one'][0])

numpy.float64

In [79]:
df['one'][:2]

a    1.0
b    2.0
Name: one, dtype: float64

In [80]:
type(df['one'][0:2])

pandas.core.series.Series

In [81]:
id(df)

139974130505040

In [82]:
df['one_trunc'] = df['one'][:2]
df['out_of'] = 100
df

Unnamed: 0,one,two,one_trunc,out_of
a,1.0,1.0,1.0,100
b,2.0,2.0,2.0,100
c,3.0,3.0,,100
d,,4.0,,100


In [83]:
id(df)

139974130505040

In [84]:
df['test_1'] = pd.Series([1.,2.,3.])
df['test_2'] = pd.Series([1.,2.,3.], index=['a','b', 5])
df

Unnamed: 0,one,two,one_trunc,out_of,test_1,test_2
a,1.0,1.0,1.0,100,,1.0
b,2.0,2.0,2.0,100,,2.0
c,3.0,3.0,,100,,
d,,4.0,,100,,


In [85]:
id(df)

139974130505040

In [86]:
df.insert(4, 'test_3', pd.Series([1.,2.,3.]))
df.insert(7, 'test_4', pd.Series([1.,2.,3.], index=['a','b',5]))

In [87]:
id(df)

139974130505040

In [88]:
df['const'] = 1
df

Unnamed: 0,one,two,one_trunc,out_of,test_3,test_1,test_2,test_4,const
a,1.0,1.0,1.0,100,,,1.0,1.0,1
b,2.0,2.0,2.0,100,,,2.0,2.0,1
c,3.0,3.0,,100,,,,,1
d,,4.0,,100,,,,,1


In [89]:
id(df)

139974130505040

In [90]:
del df['test_1']
del df['test_2']
del df['test_3']
del df['test_4']
df

Unnamed: 0,one,two,one_trunc,out_of,const
a,1.0,1.0,1.0,100,1
b,2.0,2.0,2.0,100,1
c,3.0,3.0,,100,1
d,,4.0,,100,1


In [91]:
id(df)

139974130505040

In [92]:
df = pd.DataFrame(np.random.randn(10, 4), columns=['A','B','C','D'], index = range(2,12))
id(df)

139974130062056

In [93]:
df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A','B','C'])
id(df2)

139974130061664

In [94]:
df3 = df + df2

In [95]:
id(df3)

139974130065024

In [96]:
df4 = df - df.iloc[0]

In [97]:
id(df4)

139974130064912

In [98]:
index = pd.date_range('1/1/2000', periods = 8)
df = pd.DataFrame(np.random.randn(8,3), index = index, columns =list('ABC'))
df
type(df['A'])
df.sub(df['A'], axis=0)

Unnamed: 0,A,B,C
2000-01-01,0.0,0.127312,1.156624
2000-01-02,0.0,-0.606287,-2.817334
2000-01-03,0.0,3.833396,2.510825
2000-01-04,0.0,0.797346,1.447197
2000-01-05,0.0,0.235591,-1.007808
2000-01-06,0.0,-1.875114,-2.678529
2000-01-07,0.0,1.693971,0.22107
2000-01-08,0.0,-1.369209,1.160267


In [99]:
df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)})
df

Unnamed: 0,A,B
0,1,10
1,2,8
2,3,6
3,4,4
4,5,2


In [100]:
df.eval('2*A + B')

0    12
1    12
2    12
3    12
4    12
dtype: int64

In [101]:
df.eval('C = A + 2*B', inplace=True)

In [102]:
df

Unnamed: 0,A,B,C
0,1,10,21
1,2,8,18
2,3,6,15
3,4,4,12
4,5,2,9


In [103]:
d = {
    'one': pd.Series([1.,2.,3.],index=['a','b','c']),
    'two': pd.Series([1.,2.,3.,4.],index=['a','b','c','d'])
}
df = pd.DataFrame(d)

df['one_trunc'] = df['one'][:2]
df['out_of'] = 100

df['test_1'] = pd.Series([1.,2.,3.])
df['test_2'] = pd.Series([1.,2.,3.], index=['a','b', 5])

df.insert(4, 'test_3', pd.Series([1.,2.,3.]))
df.insert(7, 'test_4', pd.Series([1.,2.,3.], index=['a','b',5]))

df['const'] = 1

del df['test_1']
del df['test_2']
del df['test_3']
del df['test_4']

In [104]:
df2 = df.assign(one_ratio = df['one']/df['out_of'])

In [105]:
df2

Unnamed: 0,one,two,one_trunc,out_of,const,one_ratio
a,1.0,1.0,1.0,100,1,0.01
b,2.0,2.0,2.0,100,1,0.02
c,3.0,3.0,,100,1,0.03
d,,4.0,,100,1,


In [106]:
df3 = df.assign(one_ratio = lambda x: (x['one']/x['out_of']))
df3

Unnamed: 0,one,two,one_trunc,out_of,const,one_ratio
a,1.0,1.0,1.0,100,1,0.01
b,2.0,2.0,2.0,100,1,0.02
c,3.0,3.0,,100,1,0.03
d,,4.0,,100,1,


In [107]:
id(df)

139974130507504

In [108]:
id(df3)

139974130265056

In [109]:
df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'))
df

Unnamed: 0,A,B
0,1,2
1,3,4


In [110]:
# 1  3  4
df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'))
df2

Unnamed: 0,A,B
0,5,6
1,7,8


In [111]:
df.append(df2)

Unnamed: 0,A,B
0,1,2
1,3,4
0,5,6
1,7,8


In [112]:
df.append(df2, ignore_index=True)

Unnamed: 0,A,B
0,1,2
1,3,4
2,5,6
3,7,8


In [113]:
df.append(df2, ignore_index=True)

Unnamed: 0,A,B
0,1,2
1,3,4
2,5,6
3,7,8


In [114]:
d = {
    'one': pd.Series([1.,2.,3.,4.],index=['a','b','c','d']),
    'two': pd.Series([4,3,2,1],index=['a','b','c','d']),
    'three': pd.Series(['A','B','C','D'],index=['a','b','c','d']), 
}

In [115]:
df = pd.DataFrame(d, columns=['one','two'])
df

Unnamed: 0,one,two
a,1.0,4
b,2.0,3
c,3.0,2
d,4.0,1


In [116]:
df_t = df.T

In [117]:
df_t

Unnamed: 0,a,b,c,d
one,1.0,2.0,3.0,4.0
two,4.0,3.0,2.0,1.0


In [118]:
df_t.dtypes

a    float64
b    float64
c    float64
d    float64
dtype: object

In [119]:
df_t.values

array([[1., 2., 3., 4.],
       [4., 3., 2., 1.]])

In [120]:
df = pd.DataFrame(d, columns=['one','three'])
df

Unnamed: 0,one,three
a,1.0,A
b,2.0,B
c,3.0,C
d,4.0,D


In [121]:
df_t = df.T
df_t

Unnamed: 0,a,b,c,d
one,1,2,3,4
three,A,B,C,D


In [122]:
df_t.dtypes

a    object
b    object
c    object
d    object
dtype: object

In [123]:
df_t.values

array([[1.0, 2.0, 3.0, 4.0],
       ['A', 'B', 'C', 'D']], dtype=object)

In [124]:
df = pd.DataFrame({
    'col1':['A','A','B',np.nan,'D','C'],
    'col2': [2, 1, 9, 8, 7, 4],
    'col3': [0, 1, 9, 4, 2, 3],
})

In [125]:
df

Unnamed: 0,col1,col2,col3
0,A,2,0
1,A,1,1
2,B,9,9
3,,8,4
4,D,7,2
5,C,4,3


In [126]:
df.sort_values(by=['col1'])

Unnamed: 0,col1,col2,col3
0,A,2,0
1,A,1,1
2,B,9,9
5,C,4,3
4,D,7,2
3,,8,4


In [127]:
df.sort_values(by=['col1','col2'])

Unnamed: 0,col1,col2,col3
1,A,1,1
0,A,2,0
2,B,9,9
5,C,4,3
4,D,7,2
3,,8,4


In [128]:
df.sort_values(by=['col1','col2'],ascending=False)

Unnamed: 0,col1,col2,col3
4,D,7,2
5,C,4,3
2,B,9,9
0,A,2,0
1,A,1,1
3,,8,4


In [129]:
df.sort_values(by=['col1','col2'],ascending=False, na_position='first')

Unnamed: 0,col1,col2,col3
3,,8,4
4,D,7,2
5,C,4,3
2,B,9,9
0,A,2,0
1,A,1,1


In [130]:
df.sort_values(by=['col1','col2'],ascending=False, na_position='first', inplace=True)

In [131]:
df = pd.DataFrame({
    'col1':['A','A','B',np.nan,'D','C'],
    'col2': [2, 1, 9, 8, 7, 4],
    'col3': [0, 1, 9, 4, 2, 3],
})

In [132]:
df.sort_values(by=['col1','col2'],ascending=False, na_position='last', inplace=True)

In [133]:
df = pd.DataFrame({
    'col1':['A','A','B',np.nan,'D','C'],
    'col2': [2, 1, 9, 8, 7, 4],
    'col3': [0, 1, 9, 4, 2, 3],
})

In [134]:
df

Unnamed: 0,col1,col2,col3
0,A,2,0
1,A,1,1
2,B,9,9
3,,8,4
4,D,7,2
5,C,4,3


In [135]:
df.groupby(['col1']).groups

{'A': Int64Index([0, 1], dtype='int64'),
 'B': Int64Index([2], dtype='int64'),
 'C': Int64Index([5], dtype='int64'),
 'D': Int64Index([4], dtype='int64')}

In [136]:
grouped = df.groupby(['col1'])

In [137]:
dir(grouped)

['__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_accessors',
 '_add_numeric_operations',
 '_agg_examples_doc',
 '_agg_see_also_doc',
 '_aggregate',
 '_aggregate_frame',
 '_aggregate_item_by_item',
 '_aggregate_multiple_funcs',
 '_apply_filter',
 '_apply_to_column_groupbys',
 '_apply_whitelist',
 '_assure_grouper',
 '_bool_agg',
 '_builtin_table',
 '_choose_path',
 '_concat_objects',
 '_constructor',
 '_cumcount_array',
 '_cython_agg_blocks',
 '_cython_agg_general',
 '_cython_table',
 '_cython_transform',
 '_define_paths',
 '_deprecations',
 '_dir_additions',
 '_dir_deletions',
 '_fill',
 '_ge

In [138]:
grouped.agg([np.sum, np.mean])

Unnamed: 0_level_0,col2,col2,col3,col3
Unnamed: 0_level_1,sum,mean,sum,mean
col1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,3,1.5,1,0.5
B,9,9.0,9,9.0
C,4,4.0,3,3.0
D,7,7.0,2,2.0


In [139]:
grouped.mean()

Unnamed: 0_level_0,col2,col3
col1,Unnamed: 1_level_1,Unnamed: 2_level_1
A,1.5,0.5
B,9.0,9.0
C,4.0,3.0
D,7.0,2.0


In [140]:
df = pd.DataFrame({
    'one': [1,4],
    'two': [2, 5],
    'three': [3, 6]
}, index=['mouse','rabbit'])
df

Unnamed: 0,one,two,three
mouse,1,2,3
rabbit,4,5,6


In [141]:
df.filter(items=['one','three'])

Unnamed: 0,one,three
mouse,1,3
rabbit,4,6


In [142]:
df.filter(regex='e$', axis=1)

Unnamed: 0,one,three
mouse,1,3
rabbit,4,6


In [143]:
df.filter(like='bbi',axis=0)

Unnamed: 0,one,two,three
rabbit,4,5,6


In [145]:
mtcars = ggdata.mtcars

In [146]:
mtcars.head()

Unnamed: 0,name,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [147]:
mtcars2 = mtcars[['name','mpg','wt']]

In [148]:
mtcars2.head()

Unnamed: 0,name,mpg,wt
0,Mazda RX4,21.0,2.62
1,Mazda RX4 Wag,21.0,2.875
2,Datsun 710,22.8,2.32
3,Hornet 4 Drive,21.4,3.215
4,Hornet Sportabout,18.7,3.44


In [149]:
mtcars3 = mtcars2.melt(id_vars=['name'],value_vars=['mpg','wt'],var_name='variable',value_name='value')
mtcars3.head()

Unnamed: 0,name,variable,value
0,Mazda RX4,mpg,21.0
1,Mazda RX4 Wag,mpg,21.0
2,Datsun 710,mpg,22.8
3,Hornet 4 Drive,mpg,21.4
4,Hornet Sportabout,mpg,18.7


In [150]:
mtcars3.tail()

Unnamed: 0,name,variable,value
59,Lotus Europa,wt,1.513
60,Ford Pantera L,wt,3.17
61,Ferrari Dino,wt,2.77
62,Maserati Bora,wt,3.57
63,Volvo 142E,wt,2.78


In [152]:
mtcars4 = mtcars3.pivot(index='name',columns='variable',values='value')
mtcars4.head()

variable,mpg,wt
name,Unnamed: 1_level_1,Unnamed: 2_level_1
AMC Javelin,15.2,3.435
Cadillac Fleetwood,10.4,5.25
Camaro Z28,13.3,3.84
Chrysler Imperial,14.7,5.345
Datsun 710,22.8,2.32


In [155]:
# df = foo3(foo2(foo1(df, arg1=1), arg2=2), arg3=3)

In [156]:
# df.pipe(foo1, arg1=1).
#     pipe(foo2, arg2=2).
#     pipe(foo3, arg3=3)

In [158]:
df = pd.DataFrame([[4,9],]*3, columns=['A','B'])
df

Unnamed: 0,A,B
0,4,9
1,4,9
2,4,9


In [159]:
df.apply(np.sqrt)

Unnamed: 0,A,B
0,2.0,3.0
1,2.0,3.0
2,2.0,3.0


In [160]:
df.apply(np.sqrt, axis=1)

Unnamed: 0,A,B
0,2.0,3.0
1,2.0,3.0
2,2.0,3.0


In [162]:
df1 = df.apply(np.sum)

In [163]:
type(df1)

pandas.core.series.Series

In [164]:
df2 = df.apply(np.sum, axis=1)
type(df2)
df2

0    13
1    13
2    13
dtype: int64

In [165]:
df.apply(lambda x: np.sum(x))

A    12
B    27
dtype: int64

In [166]:
df.apply(lambda x: np.sum(x), axis = 1)

0    13
1    13
2    13
dtype: int64

In [167]:
df = pd.DataFrame([[1,2.12],[3.356,4.567]])
df

Unnamed: 0,0,1
0,1.0,2.12
1,3.356,4.567


In [168]:
df.applymap(str)

Unnamed: 0,0,1
0,1.0,2.12
1,3.356,4.567


In [169]:
df.applymap(lambda x: len(str(x)))

Unnamed: 0,0,1
0,3,4
1,5,5


In [170]:
df.applymap(lambda x: x**2)

Unnamed: 0,0,1
0,1.0,4.4944
1,11.262736,20.857489


In [171]:
df.applymap(lambda x: x**2)

Unnamed: 0,0,1
0,1.0,4.4944
1,11.262736,20.857489


In [172]:
df**2

Unnamed: 0,0,1
0,1.0,4.4944
1,11.262736,20.857489


In [174]:
df1 = pd.DataFrame(
    {
        'A': ['A0', 'A1', 'A2', 'A3'],
        'B': ['B0', 'B1', 'B2', 'B3'],
        'C': ['C0', 'C1', 'C2', 'C3'],
        'D': ['D0', 'D1', 'D2', 'D3']
    }, index=[0, 1, 2, 3]
)

df2 = pd.DataFrame(
    {
        'A': ['A4', 'A5', 'A6', 'A7'],
        'B': ['B4', 'B5', 'B6', 'B7'],
        'C': ['C4', 'C5', 'C6', 'C7'],
        'D': ['D4', 'D5', 'D6', 'D7']
    }, index=[4, 5, 6, 7]
)

df3 = pd.DataFrame(
    {
        'A': ['A8', 'A9', 'A10', 'A11'],
        'B': ['B8', 'B9', 'B10', 'B11'],
        'C': ['C8', 'C9', 'C10', 'C11'],
        'D': ['D8', 'D9', 'D10', 'D11']
    }, index=[1, 2, 3, 4]
)
frames = [df1, df2, df3]

df4 = pd.concat(frames)

df4

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
1,A8,B8,C8,D8
2,A9,B9,C9,D9


In [176]:
df5 = pd.concat(frames, ignore_index=True)
df5

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B9,C9,D9


In [177]:
df5 = pd.concat(frames, keys=['s1', 's2', 's3'])
df5

Unnamed: 0,Unnamed: 1,A,B,C,D
s1,0,A0,B0,C0,D0
s1,1,A1,B1,C1,D1
s1,2,A2,B2,C2,D2
s1,3,A3,B3,C3,D3
s2,4,A4,B4,C4,D4
s2,5,A5,B5,C5,D5
s2,6,A6,B6,C6,D6
s2,7,A7,B7,C7,D7
s3,1,A8,B8,C8,D8
s3,2,A9,B9,C9,D9


In [178]:
df5.index

MultiIndex([('s1', 0),
            ('s1', 1),
            ('s1', 2),
            ('s1', 3),
            ('s2', 4),
            ('s2', 5),
            ('s2', 6),
            ('s2', 7),
            ('s3', 1),
            ('s3', 2),
            ('s3', 3),
            ('s3', 4)],
           )

In [179]:
df = pd.DataFrame({'char': list('bbacab'), 'data1': range(6)})
df

Unnamed: 0,char,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,b,5


In [180]:
dummies = pd.get_dummies(df['char'], prefix='key')
dummies

Unnamed: 0,key_a,key_b,key_c
0,0,1,0
1,0,1,0
2,1,0,0
3,0,0,1
4,1,0,0
5,0,1,0
