In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# eq to pct_change

In [2]:
df = pd.DataFrame({'cost': [250, 150, 100],
                   'revenue': [100, 250, 300]},
                  index=['A', 'B', 'C'])

In [3]:
df

Unnamed: 0,cost,revenue
A,250,100
B,150,250
C,100,300


In [4]:
df == 100

Unnamed: 0,cost,revenue
A,False,True
B,False,False
C,True,False


In [5]:
df.eq(100)

Unnamed: 0,cost,revenue
A,False,True
B,False,False
C,True,False


In [6]:
df != pd.Series([100, 250], index=["cost", "revenue"])

Unnamed: 0,cost,revenue
A,True,True
B,True,False
C,False,True


In [7]:
df.ne(pd.Series([100, 300], index=["A", "D"]), axis='index')

Unnamed: 0,cost,revenue
A,True,False
B,True,True
C,True,True
D,True,True


In [8]:
df == [250, 100]

Unnamed: 0,cost,revenue
A,True,True
B,False,False
C,False,False


In [9]:
df.eq([250, 250, 100], axis='index')

Unnamed: 0,cost,revenue
A,True,False
B,False,True
C,True,False


In [10]:
other = pd.DataFrame({'revenue': [300, 250, 100, 150]},
                     index=['A', 'B', 'C', 'D'])

In [11]:
other

Unnamed: 0,revenue
A,300
B,250
C,100
D,150


In [12]:
df.gt(other)

Unnamed: 0,cost,revenue
A,False,False
B,False,False
C,False,True
D,False,False


In [13]:
df_multindex = pd.DataFrame({'cost': [250, 150, 100, 150, 300, 220],
                             'revenue': [100, 250, 300, 200, 175, 225]},
                            index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
                                   ['A', 'B', 'C', 'A', 'B', 'C']])

In [14]:
df_multindex

Unnamed: 0,Unnamed: 1,cost,revenue
Q1,A,250,100
Q1,B,150,250
Q1,C,100,300
Q2,A,150,200
Q2,B,300,175
Q2,C,220,225


In [15]:
df.le(df_multindex, level=1)

Unnamed: 0,Unnamed: 1,cost,revenue
Q1,A,True,True
Q1,B,True,True
Q1,C,True,True
Q2,A,False,True
Q2,B,True,False
Q2,C,True,False


In [16]:
df = pd.DataFrame({'cost': [250, 150, 100],
                   'revenue': [100, 250, 300]},
                  index=['A', 'B', 'C'])
df

Unnamed: 0,cost,revenue
A,250,100
B,150,250
C,100,300


In [17]:
df == 100

Unnamed: 0,cost,revenue
A,False,True
B,False,False
C,True,False


In [18]:
df.eq(100)

Unnamed: 0,cost,revenue
A,False,True
B,False,False
C,True,False


In [19]:
df != pd.Series([100, 250], index=["cost", "revenue"])

Unnamed: 0,cost,revenue
A,True,True
B,True,False
C,False,True


In [20]:
df.ne(pd.Series([100, 300], index=["A", "D"]), axis='index')

Unnamed: 0,cost,revenue
A,True,False
B,True,True
C,True,True
D,True,True


In [21]:
df == [250, 100]


Unnamed: 0,cost,revenue
A,True,True
B,False,False
C,False,False


In [22]:
df.eq([250, 250, 100], axis='index')

Unnamed: 0,cost,revenue
A,True,False
B,False,True
C,True,False


In [23]:
other = pd.DataFrame({'revenue': [300, 250, 100, 150]},
                     index=['A', 'B', 'C', 'D'])

In [24]:
other

Unnamed: 0,revenue
A,300
B,250
C,100
D,150


In [25]:
df.gt(other)

Unnamed: 0,cost,revenue
A,False,False
B,False,False
C,False,True
D,False,False


In [26]:
df.le(df_multindex, level=1)

Unnamed: 0,Unnamed: 1,cost,revenue
Q1,A,True,True
Q1,B,True,True
Q1,C,True,True
Q2,A,False,True
Q2,B,True,False
Q2,C,True,False


In [27]:
df.ne(pd.Series([100, 300], index=["A", "D"]), axis='index')

Unnamed: 0,cost,revenue
A,True,False
B,True,True
C,True,True
D,True,True


In [28]:
df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]})

In [29]:
df1

Unnamed: 0,A,B
0,0,4
1,0,4


In [30]:
df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})

In [31]:
df2

Unnamed: 0,A,B
0,1,3
1,1,3


In [32]:
take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2

In [33]:
take_smaller

<function __main__.<lambda>(s1, s2)>

In [34]:
df1.combine(df2, take_smaller)

Unnamed: 0,A,B
0,0,3
1,0,3


In [35]:
df1 = pd.DataFrame({'A': [5, 0], 'B': [2, 4]})

In [36]:
df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})

In [37]:
df1.combine(df2, np.minimum)

Unnamed: 0,A,B
0,1,2
1,0,3


In [38]:
df1

Unnamed: 0,A,B
0,5,2
1,0,4


In [39]:
df2

Unnamed: 0,A,B
0,1,3
1,1,3


In [40]:
df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]})

In [41]:
df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})

In [42]:
df1.combine(df2, take_smaller, fill_value=-5)

Unnamed: 0,A,B
0,0,-5.0
1,0,4.0


In [43]:
df1

Unnamed: 0,A,B
0,0,
1,0,4.0


In [44]:
df2

Unnamed: 0,A,B
0,1,3
1,1,3


In [45]:
df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]})

In [46]:
df2 = pd.DataFrame({'A': [1, 1], 'B': [None, 3]})

In [47]:
df1.combine(df2, take_smaller, fill_value=-5)

Unnamed: 0,A,B
0,0,-5.0
1,0,3.0


In [48]:
df1

Unnamed: 0,A,B
0,0,
1,0,4.0


In [49]:
df2

Unnamed: 0,A,B
0,1,
1,1,3.0


In [50]:
df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]})

In [51]:
df2 = pd.DataFrame({'B': [3, 3], 'C': [-10, 1], }, index=[1, 2])

In [52]:
df1.combine(df2, take_smaller)

Unnamed: 0,A,B,C
0,,,
1,,3.0,-10.0
2,,3.0,1.0


In [53]:
df1

Unnamed: 0,A,B
0,0,4
1,0,4


In [54]:
df2

Unnamed: 0,B,C
1,3,-10
2,3,1


In [55]:
df1.combine(df2, take_smaller, overwrite=False)

Unnamed: 0,A,B,C
0,0.0,,
1,0.0,3.0,-10.0
2,,3.0,1.0


In [56]:
df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1], }, index=[1, 2])

In [57]:
df2

Unnamed: 0,B,C
1,3,1
2,3,1


In [58]:
df2.combine(df1, take_smaller)

Unnamed: 0,A,B,C
0,0.0,,
1,0.0,3.0,
2,,3.0,


In [59]:
df2.combine(df1, take_smaller, overwrite=False)

Unnamed: 0,A,B,C
0,0.0,,
1,0.0,3.0,1.0
2,,3.0,1.0


In [60]:
df1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]})

In [61]:
df1

Unnamed: 0,A,B
0,,
1,0.0,4.0


In [62]:
df1.head(3)

Unnamed: 0,A,B
0,,
1,0.0,4.0


In [66]:
df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})

In [67]:
df2

Unnamed: 0,A,B
0,1,3
1,1,3


In [68]:
df1.combine_first(df2)

Unnamed: 0,A,B
0,1.0,3.0
1,0.0,4.0


In [69]:
df1 = pd.DataFrame({'A': [None, 0], 'B': [4, None]})

In [70]:
df1

Unnamed: 0,A,B
0,,4.0
1,0.0,


In [71]:
df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2])

In [72]:
df1.combine_first(df2)

Unnamed: 0,A,B,C
0,,4.0,
1,0.0,3.0,1.0
2,,3.0,1.0


In [73]:
df = pd.DataFrame([[4, 9]] * 3, columns=['A', 'B'])

In [74]:
df

Unnamed: 0,A,B
0,4,9
1,4,9
2,4,9


In [75]:
df.apply(np.sqrt)

Unnamed: 0,A,B
0,2.0,3.0
1,2.0,3.0
2,2.0,3.0


In [76]:
df.apply(np.sum ,axis=0)

A    12
B    27
dtype: int64

In [77]:
df.apply(np.sum,axis=1)

0    13
1    13
2    13
dtype: int64

In [78]:
df.apply(lambda x: [1, 2], axis=1)

0    [1, 2]
1    [1, 2]
2    [1, 2]
dtype: object

In [79]:
df.apply(lambda x: [1, 2], axis=1, result_type='expand')

Unnamed: 0,0,1
0,1,2
1,1,2
2,1,2


In [80]:
df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1)

Unnamed: 0,foo,bar
0,1,2
1,1,2
2,1,2


In [81]:
df.apply(lambda x: [1, 2], axis=1, result_type='broadcast')

Unnamed: 0,A,B
0,1,2
1,1,2
2,1,2


In [82]:
df = pd.DataFrame([[1, 2.12], [3.356, 4.567]])

In [83]:
df

Unnamed: 0,0,1
0,1.0,2.12
1,3.356,4.567


In [85]:
df.map(lambda x: len(str(x)))

Unnamed: 0,0,1
0,3,4
1,5,5


In [86]:
df_copy = df.copy()

In [87]:
df_copy

Unnamed: 0,0,1
0,1.0,2.12
1,3.356,4.567


In [88]:
df_copy.iloc[0, 0] = pd.NA

In [89]:
df_copy.map(lambda x: len(str(x)), na_action='ignore')

Unnamed: 0,0,1
0,,4
1,5.0,5


In [90]:
df.map(round, ndigits=1)

Unnamed: 0,0,1
0,1.0,2.1
1,3.4,4.6


In [91]:
df.map(lambda x: x**2)

Unnamed: 0,0,1
0,1.0,4.4944
1,11.262736,20.857489


In [92]:
df ** 2

Unnamed: 0,0,1
0,1.0,4.4944
1,11.262736,20.857489


In [93]:
df ** 3

Unnamed: 0,0,1
0,1.0,9.528128
1,37.797742,95.256152


In [94]:
df = pd.DataFrame([[1, 2.12], [3.356, 4.567]])

In [95]:
df

Unnamed: 0,0,1
0,1.0,2.12
1,3.356,4.567


In [96]:
df.map(lambda x: len(str(x)))

Unnamed: 0,0,1
0,3,4
1,5,5


In [97]:
df

Unnamed: 0,0,1
0,1.0,2.12
1,3.356,4.567


In [98]:
data = [[8000, 1000], [9500, np.nan], [5000, 2000]]

In [99]:
data

[[8000, 1000], [9500, nan], [5000, 2000]]

In [100]:
df = pd.DataFrame(data, columns=['Salary', 'Others'])

In [101]:
df

Unnamed: 0,Salary,Others
0,8000,1000.0
1,9500,
2,5000,2000.0


In [104]:
def subtract_federal_tax(df):
    return df * 0.9
def subtract_state_tax(df, rate):
    return df * (1 - rate)
def subtract_national_insurance(df, rate, rate_increase):
    new_rate = rate + rate_increase
    return df * (1 - new_rate)

In [105]:
subtract_national_insurance(
    subtract_state_tax(subtract_federal_tax(df), rate=0.12),
    rate=0.05,
    rate_increase=0.02)

Unnamed: 0,Salary,Others
0,5892.48,736.56
1,6997.32,
2,3682.8,1473.12


In [106]:
(
    df.pipe(subtract_federal_tax)
    .pipe(subtract_state_tax, rate=0.12)
    .pipe(subtract_national_insurance, rate=0.05, rate_increase=0.02)
)

Unnamed: 0,Salary,Others
0,5892.48,736.56
1,6997.32,
2,3682.8,1473.12


In [107]:
def subtract_national_insurance(rate, df, rate_increase):
    new_rate = rate + rate_increase
    return df * (1 - new_rate)

In [110]:
(
    df.pipe(subtract_federal_tax)
    .pipe(subtract_state_tax, rate=0.12)
    .pipe(
        (subtract_national_insurance, 'df'),
        rate=0.05,
        rate_increase=0.02
    )
)

Unnamed: 0,Salary,Others
0,5892.48,736.56
1,6997.32,
2,3682.8,1473.12


In [111]:
df = pd.DataFrame([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9],
                   [np.nan, np.nan, np.nan]],
                  columns=['A', 'B', 'C'])

In [112]:
df

Unnamed: 0,A,B,C
0,1.0,2.0,3.0
1,4.0,5.0,6.0
2,7.0,8.0,9.0
3,,,


In [113]:
df.agg(['sum', 'min'])

Unnamed: 0,A,B,C
sum,12.0,15.0,18.0
min,1.0,2.0,3.0


In [114]:
df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})

Unnamed: 0,A,B
sum,12.0,
min,1.0,2.0
max,,8.0


In [115]:
df.agg(x=('A', 'max'), y=('B', 'min'), z=('C', 'mean'))

Unnamed: 0,A,B,C
x,7.0,,
y,,2.0,
z,,,6.0


In [116]:
df.agg("mean", axis="columns")

0    2.0
1    5.0
2    8.0
3    NaN
dtype: float64

In [117]:
df = pd.DataFrame([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9],
                   [np.nan, np.nan, np.nan]],
                  columns=['A', 'B', 'C'])

In [118]:
df

Unnamed: 0,A,B,C
0,1.0,2.0,3.0
1,4.0,5.0,6.0
2,7.0,8.0,9.0
3,,,


In [119]:
df.agg(['sum', 'min'])

Unnamed: 0,A,B,C
sum,12.0,15.0,18.0
min,1.0,2.0,3.0


In [120]:
df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']})

Unnamed: 0,A,B
sum,12.0,
min,1.0,2.0
max,,8.0


In [121]:
df.agg(x=('A', 'max'), y=('B', 'min'), z=('C', 'mean'))

Unnamed: 0,A,B,C
x,7.0,,
y,,2.0,
z,,,6.0


In [122]:
df.agg("mean", axis="columns")

0    2.0
1    5.0
2    8.0
3    NaN
dtype: float64

In [123]:
df = pd.DataFrame({'A': range(3), 'B': range(1, 4)})
df

Unnamed: 0,A,B
0,0,1
1,1,2
2,2,3


In [124]:
df.transform(lambda x: x + 1)

Unnamed: 0,A,B
0,1,2
1,2,3
2,3,4


In [125]:
s = pd.Series(range(3))

In [126]:
s

0    0
1    1
2    2
dtype: int64

In [127]:
s.transform([np.sqrt, np.exp])

Unnamed: 0,sqrt,exp
0,0.0,1.0
1,1.0,2.718282
2,1.414214,7.389056


In [128]:
df = pd.DataFrame({
    "Date": [
        "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05",
        "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05"],
    "Data": [5, 8, 6, 1, 50, 100, 60, 120],
})

In [129]:
df

Unnamed: 0,Date,Data
0,2015-05-08,5
1,2015-05-07,8
2,2015-05-06,6
3,2015-05-05,1
4,2015-05-08,50
5,2015-05-07,100
6,2015-05-06,60
7,2015-05-05,120


In [130]:
df.groupby('Date')['Data'].transform('sum')

0     55
1    108
2     66
3    121
4     55
5    108
6     66
7    121
Name: Data, dtype: int64

In [131]:
df = pd.DataFrame({
    "c": [1, 1, 1, 2, 2, 2, 2],
    "type": ["m", "n", "o", "m", "m", "n", "n"]
})

In [132]:
df

Unnamed: 0,c,type
0,1,m
1,1,n
2,1,o
3,2,m
4,2,m
5,2,n
6,2,n


In [133]:
df['size'] = df.groupby('c')['type'].transform(len)

In [134]:
df

Unnamed: 0,c,type,size
0,1,m,3
1,1,n,3
2,1,o,3
3,2,m,4
4,2,m,4
5,2,n,4
6,2,n,4


In [135]:
df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
                              'Parrot', 'Parrot'],
                   'Max Speed': [380., 370., 24., 26.]})

In [136]:
df

Unnamed: 0,Animal,Max Speed
0,Falcon,380.0
1,Falcon,370.0
2,Parrot,24.0
3,Parrot,26.0


In [137]:
df.groupby(['Animal']).mean()

Unnamed: 0_level_0,Max Speed
Animal,Unnamed: 1_level_1
Falcon,375.0
Parrot,25.0


In [138]:
arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
          ['Captive', 'Wild', 'Captive', 'Wild']]

In [139]:
arrays

[['Falcon', 'Falcon', 'Parrot', 'Parrot'],
 ['Captive', 'Wild', 'Captive', 'Wild']]

In [140]:
index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))

In [141]:
index

MultiIndex([('Falcon', 'Captive'),
            ('Falcon',    'Wild'),
            ('Parrot', 'Captive'),
            ('Parrot',    'Wild')],
           names=['Animal', 'Type'])

In [142]:
df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
                  index=index)

In [143]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Max Speed
Animal,Type,Unnamed: 2_level_1
Falcon,Captive,390.0
Falcon,Wild,350.0
Parrot,Captive,30.0
Parrot,Wild,20.0


In [148]:
df.groupby(level=0)['Max Speed'].mean()

Animal
Falcon    370.0
Parrot     25.0
Name: Max Speed, dtype: float64

In [149]:
df.groupby(level="Type").mean()

Unnamed: 0_level_0,Max Speed
Type,Unnamed: 1_level_1
Captive,210.0
Wild,185.0


In [150]:
df.groupby(level=0).mean()

Unnamed: 0_level_0,Max Speed
Animal,Unnamed: 1_level_1
Falcon,370.0
Parrot,25.0


In [151]:
l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
df = pd.DataFrame(l, columns=["a", "b", "c"])

In [152]:
l

[[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]

In [153]:
df

Unnamed: 0,a,b,c
0,1,2.0,3
1,1,,4
2,2,1.0,3
3,1,2.0,2


In [154]:
df.groupby(by=["b"]).sum()

Unnamed: 0_level_0,a,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,2,3
2.0,2,5


In [155]:
df.groupby(by=["b"], dropna=False).sum()

Unnamed: 0_level_0,a,c
b,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,2,3
2.0,2,5
,1,4


In [156]:
l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
df = pd.DataFrame(l, columns=["a", "b", "c"])

In [157]:
l

[['a', 12, 12], [None, 12.3, 33.0], ['b', 12.3, 123], ['a', 1, 1]]

In [158]:
df

Unnamed: 0,a,b,c
0,a,12.0,12.0
1,,12.3,33.0
2,b,12.3,123.0
3,a,1.0,1.0


In [159]:
df.groupby(by="a").sum()

Unnamed: 0_level_0,b,c
a,Unnamed: 1_level_1,Unnamed: 2_level_1
a,13.0,13.0
b,12.3,123.0


In [160]:
df.groupby(by="a", dropna=False).sum()

Unnamed: 0_level_0,b,c
a,Unnamed: 1_level_1,Unnamed: 2_level_1
a,13.0,13.0
b,12.3,123.0
,12.3,33.0


In [161]:
df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
                              'Parrot', 'Parrot'],
                   'Max Speed': [380., 370., 24., 26.]})

In [162]:
df

Unnamed: 0,Animal,Max Speed
0,Falcon,380.0
1,Falcon,370.0
2,Parrot,24.0
3,Parrot,26.0


In [163]:
df.groupby("Animal", group_keys=True)[['Max Speed']].apply(lambda x: x)

Unnamed: 0_level_0,Unnamed: 1_level_0,Max Speed
Animal,Unnamed: 1_level_1,Unnamed: 2_level_1
Falcon,0,380.0
Falcon,1,370.0
Parrot,2,24.0
Parrot,3,26.0


In [164]:
df

Unnamed: 0,Animal,Max Speed
0,Falcon,380.0
1,Falcon,370.0
2,Parrot,24.0
3,Parrot,26.0


In [165]:
df.groupby("Animal", group_keys=False)[['Max Speed']].apply(lambda x: x)

Unnamed: 0,Max Speed
0,380.0
1,370.0
2,24.0
3,26.0


In [166]:
df

Unnamed: 0,Animal,Max Speed
0,Falcon,380.0
1,Falcon,370.0
2,Parrot,24.0
3,Parrot,26.0


In [167]:
df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})

In [168]:
df

Unnamed: 0,B
0,0.0
1,1.0
2,2.0
3,
4,4.0


In [169]:
df.rolling(2).sum()

Unnamed: 0,B
0,
1,1.0
2,3.0
3,
4,


In [170]:
df_time = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
                       index=[pd.Timestamp('20130101 09:00:00'),
                              pd.Timestamp('20130101 09:00:02'),
                              pd.Timestamp('20130101 09:00:03'),
                              pd.Timestamp('20130101 09:00:05'),
                              pd.Timestamp('20130101 09:00:06')])

In [171]:
df_time

Unnamed: 0,B
2013-01-01 09:00:00,0.0
2013-01-01 09:00:02,1.0
2013-01-01 09:00:03,2.0
2013-01-01 09:00:05,
2013-01-01 09:00:06,4.0


In [172]:
df_time.rolling('2s').sum()

Unnamed: 0,B
2013-01-01 09:00:00,0.0
2013-01-01 09:00:02,1.0
2013-01-01 09:00:03,3.0
2013-01-01 09:00:05,
2013-01-01 09:00:06,4.0


In [173]:
indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2)
df.rolling(window=indexer, min_periods=1).sum()

Unnamed: 0,B
0,1.0
1,3.0
2,2.0
3,4.0
4,4.0


In [174]:
indexer

<pandas.core.indexers.objects.FixedForwardWindowIndexer at 0x246e41882f0>

In [175]:
df.rolling(2, min_periods=1).sum()

Unnamed: 0,B
0,0.0
1,1.0
2,3.0
3,2.0
4,4.0


In [176]:
df.rolling(3, min_periods=1, center=True).sum()

Unnamed: 0,B
0,1.0
1,3.0
2,3.0
3,6.0
4,4.0


In [177]:
df.rolling(3, min_periods=1, center=False).sum()

Unnamed: 0,B
0,0.0
1,1.0
2,3.0
3,3.0
4,6.0


In [178]:
df.rolling(2, min_periods=1, step=2).sum()

Unnamed: 0,B
0,0.0
2,3.0
4,4.0


In [179]:
df.rolling(2, win_type='gaussian').sum(std=3)

Unnamed: 0,B
0,
1,0.986207
2,2.958621
3,
4,


In [180]:
df = pd.DataFrame({
    'A': [pd.to_datetime('2020-01-01'),
          pd.to_datetime('2020-01-01'),
          pd.to_datetime('2020-01-02'),],
    'B': [1, 2, 3], },
    index=pd.date_range('2020', periods=3))

In [181]:
df

Unnamed: 0,A,B
2020-01-01,2020-01-01,1
2020-01-02,2020-01-01,2
2020-01-03,2020-01-02,3


In [182]:
df.rolling('2D', on='A').sum()

Unnamed: 0,A,B
2020-01-01,2020-01-01,1.0
2020-01-02,2020-01-01,3.0
2020-01-03,2020-01-02,6.0


In [183]:
df

Unnamed: 0,A,B
2020-01-01,2020-01-01,1
2020-01-02,2020-01-01,2
2020-01-03,2020-01-02,3


In [184]:
df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})

In [185]:
df

Unnamed: 0,B
0,0.0
1,1.0
2,2.0
3,
4,4.0


In [186]:
df.expanding(1).sum()

Unnamed: 0,B
0,0.0
1,1.0
2,3.0
3,3.0
4,7.0


In [187]:
df.expanding(3).sum()

Unnamed: 0,B
0,
1,
2,3.0
3,3.0
4,7.0


In [188]:
df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})

In [189]:
df

Unnamed: 0,B
0,0.0
1,1.0
2,2.0
3,
4,4.0


In [190]:
df.ewm(com=0.5).mean()

Unnamed: 0,B
0,0.0
1,0.75
2,1.615385
3,1.615385
4,3.670213


In [191]:
df.ewm(alpha=2 / 3).mean()

Unnamed: 0,B
0,0.0
1,0.75
2,1.615385
3,1.615385
4,3.670213


In [192]:
df.ewm(com=0.5, adjust=True).mean()

Unnamed: 0,B
0,0.0
1,0.75
2,1.615385
3,1.615385
4,3.670213


In [193]:
df.ewm(com=0.5, adjust=False).mean()

Unnamed: 0,B
0,0.0
1,0.666667
2,1.555556
3,1.555556
4,3.650794


In [194]:
df.ewm(com=0.5, ignore_na=True).mean()

Unnamed: 0,B
0,0.0
1,0.75
2,1.615385
3,1.615385
4,3.225


In [195]:
df.ewm(com=0.5, ignore_na=False).mean()

Unnamed: 0,B
0,0.0
1,0.75
2,1.615385
3,1.615385
4,3.670213


In [196]:
times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17']
df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean()

Unnamed: 0,B
0,0.0
1,0.585786
2,1.523889
3,1.523889
4,3.233686


In [197]:
times

['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17']

In [198]:
s = pd.Series([-1.10, 2, -3.33, 4])

In [199]:
s

0   -1.10
1    2.00
2   -3.33
3    4.00
dtype: float64

In [200]:
s.abs()

0    1.10
1    2.00
2    3.33
3    4.00
dtype: float64

In [201]:
s = pd.Series([1.2 + 1j])

In [202]:
s.abs()

0    1.56205
dtype: float64

In [203]:
s = pd.Series([pd.Timedelta('1 days')])

In [204]:
s.abs()

0   1 days
dtype: timedelta64[ns]

In [205]:
df = pd.DataFrame({
    'a': [4, 5, 6, 7],
    'b': [10, 20, 30, 40],
    'c': [100, 50, -30, -50]
})

In [206]:
df

Unnamed: 0,a,b,c
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [207]:
df.loc[(df.c - 43).abs().argsort()]

Unnamed: 0,a,b,c
1,5,20,50
0,4,10,100
2,6,30,-30
3,7,40,-50


In [208]:
pd.Series([True, True]).all()

np.True_

In [209]:
pd.Series([True, False]).all()

np.False_

In [210]:
pd.Series([], dtype="float64").all()

np.True_

In [211]:
pd.Series([np.nan]).all()

np.True_

In [212]:
pd.Series([np.nan]).all(skipna=False)

np.True_

In [213]:
df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]})

In [214]:
df

Unnamed: 0,col1,col2
0,True,True
1,True,False


In [215]:
df.all()

col1     True
col2    False
dtype: bool

In [216]:
df.all(axis='columns')

0     True
1    False
dtype: bool

In [217]:
df.all(axis=None)

np.False_

In [218]:
pd.Series([False, False]).any()

np.False_

In [219]:
pd.Series([True, False]).any()

np.True_

In [220]:
pd.Series([], dtype="float64").any()

np.False_

In [221]:
pd.Series([np.nan]).any()

np.False_

In [222]:
pd.Series([np.nan]).any(skipna=False)

np.True_

In [223]:
df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]})

In [224]:
df

Unnamed: 0,A,B,C
0,1,0,0
1,2,2,0


In [225]:
df.any()

A     True
B     True
C    False
dtype: bool

In [226]:
df = pd.DataFrame({"A": [True, False], "B": [1, 2]})

In [227]:
df

Unnamed: 0,A,B
0,True,1
1,False,2


In [228]:
df.any()

A    True
B    True
dtype: bool

In [231]:
df.any(axis='columns')

0    True
1    True
dtype: bool

In [232]:
df = pd.DataFrame({"A": [True, False], "B": [1, 0]})

In [233]:
df

Unnamed: 0,A,B
0,True,1
1,False,0


In [234]:
df.any(axis='columns')

0     True
1    False
dtype: bool

In [237]:
df.any(axis=None)

np.True_

In [238]:
pd.DataFrame([]).any()

Series([], dtype: bool)

In [239]:
data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}

In [240]:
data

{'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}

In [242]:
df=pd.DataFrame(data)

In [243]:
df

Unnamed: 0,col_0,col_1
0,9,-2
1,-3,-7
2,0,6
3,-1,8
4,5,-5


In [244]:
df.clip(-4, 6)

Unnamed: 0,col_0,col_1
0,6,-2
1,-3,-4
2,0,6
3,-1,6
4,5,-4


In [245]:
df.clip([-2, -1], [4, 5])

Unnamed: 0,col_0,col_1
0,4,-1
1,-2,-1
2,0,5
3,-1,5
4,4,-1


In [246]:
t = pd.Series([2, -4, -1, 6, 3])

In [247]:
t

0    2
1   -4
2   -1
3    6
4    3
dtype: int64

In [248]:
df.clip(t, t + 4, axis=0)

Unnamed: 0,col_0,col_1
0,6,2
1,-3,-4
2,0,3
3,6,8
4,5,3


In [249]:
t = pd.Series([2, -4, np.nan, 6, 3])

In [250]:
t

0    2.0
1   -4.0
2    NaN
3    6.0
4    3.0
dtype: float64

In [251]:
df.clip(t, axis=0)

  df.clip(t, axis=0)


Unnamed: 0,col_0,col_1
0,9,2
1,-3,-4
2,0,6
3,6,8
4,5,3


In [252]:
def histogram_intersection(a, b):
    v = np.minimum(a, b).sum().round(decimals=1)
    return v

In [253]:
df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)],
                  columns=['dogs', 'cats'])

In [254]:
df

Unnamed: 0,dogs,cats
0,0.2,0.3
1,0.0,0.6
2,0.6,0.0
3,0.2,0.1


In [255]:
df.corr(method=histogram_intersection)

Unnamed: 0,dogs,cats
dogs,1.0,0.3
cats,0.3,1.0


In [256]:
df = pd.DataFrame([(1, 1), (2, np.nan), (np.nan, 3), (4, 4)],
                  columns=['dogs', 'cats'])

In [257]:
df

Unnamed: 0,dogs,cats
0,1.0,1.0
1,2.0,
2,,3.0
3,4.0,4.0


In [258]:
df.corr(min_periods=3)

Unnamed: 0,dogs,cats
dogs,1.0,
cats,,1.0


In [259]:
df

Unnamed: 0,dogs,cats
0,1.0,1.0
1,2.0,
2,,3.0
3,4.0,4.0


In [260]:
index = ["a", "b", "c", "d", "e"]

In [261]:
index

['a', 'b', 'c', 'd', 'e']

In [262]:
columns = ["one", "two", "three", "four"]


In [263]:
columns

['one', 'two', 'three', 'four']

In [264]:
df1 = pd.DataFrame(np.arange(20).reshape(5, 4), index=index, columns=columns)
df2 = pd.DataFrame(np.arange(16).reshape(4, 4), index=index[:4], columns=columns)

one      1.0
two      1.0
three    1.0
four     1.0
dtype: float64

In [265]:
df1

Unnamed: 0,one,two,three,four
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15
e,16,17,18,19


In [266]:
df2

Unnamed: 0,one,two,three,four
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [267]:
df1.corrwith(df2)

one      1.0
two      1.0
three    1.0
four     1.0
dtype: float64

In [268]:
df2.corrwith(df1, axis=1)

a    1.0
b    1.0
c    1.0
d    1.0
e    NaN
dtype: float64

In [269]:
df = pd.DataFrame({"Person":
                   ["John", "Myla", "Lewis", "John", "Myla"],
                   "Age": [24., np.nan, 21., 33, 26],
                   "Single": [False, True, True, True, False]})

In [270]:
df

Unnamed: 0,Person,Age,Single
0,John,24.0,False
1,Myla,,True
2,Lewis,21.0,True
3,John,33.0,True
4,Myla,26.0,False


In [271]:
df.count()

Person    5
Age       4
Single    5
dtype: int64

In [272]:
df.count(axis='columns')

0    3
1    2
2    3
3    3
4    3
dtype: int64

In [273]:
df = pd.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)],
                  columns=['dogs', 'cats'])

In [274]:
df

Unnamed: 0,dogs,cats
0,1,2
1,0,3
2,2,0
3,1,1


In [275]:
df.cov()

Unnamed: 0,dogs,cats
dogs,0.666667,-1.0
cats,-1.0,1.666667


In [277]:
df = pd.DataFrame(np.random.randn(1000, 5),
                  columns=['a', 'b', 'c', 'd', 'e'])
df.cov()

Unnamed: 0,a,b,c,d,e
a,0.998438,-0.020161,0.059277,-0.008943,0.014144
b,-0.020161,1.059352,-0.008543,-0.024738,0.009826
c,0.059277,-0.008543,1.01067,-0.001486,-0.000271
d,-0.008943,-0.024738,-0.001486,0.921297,-0.013692
e,0.014144,0.009826,-0.000271,-0.013692,0.977795


In [278]:
np.random.seed(42)

In [281]:
df = pd.DataFrame(np.random.randn(20, 3),
                  columns=['a', 'b', 'c'])

In [282]:
df

Unnamed: 0,a,b,c
0,0.496714,-0.138264,0.647689
1,1.52303,-0.234153,-0.234137
2,1.579213,0.767435,-0.469474
3,0.54256,-0.463418,-0.46573
4,0.241962,-1.91328,-1.724918
5,-0.562288,-1.012831,0.314247
6,-0.908024,-1.412304,1.465649
7,-0.225776,0.067528,-1.424748
8,-0.544383,0.110923,-1.150994
9,0.375698,-0.600639,-0.291694


In [283]:
df.loc[df.index[:5], 'a'] = np.nan

In [284]:
df.loc[df.index[5:10], 'b'] = np.nan

In [285]:
df.cov(min_periods=12)

Unnamed: 0,a,b,c
a,0.316741,,-0.150812
b,,1.248003,0.191417
c,-0.150812,0.191417,0.895202


In [286]:
s = pd.Series([2, np.nan, 5, -1, 0])

In [287]:
s

0    2.0
1    NaN
2    5.0
3   -1.0
4    0.0
dtype: float64

In [288]:
s.cummax()

0    2.0
1    NaN
2    5.0
3    5.0
4    5.0
dtype: float64

In [289]:
s.cummax(skipna=False)

0    2.0
1    NaN
2    NaN
3    NaN
4    NaN
dtype: float64

In [290]:
df = pd.DataFrame([[2.0, 1.0],
                   [3.0, np.nan],
                   [1.0, 0.0]],
                  columns=list('AB'))

In [291]:
df

Unnamed: 0,A,B
0,2.0,1.0
1,3.0,
2,1.0,0.0


In [292]:
df.cummax()

Unnamed: 0,A,B
0,2.0,1.0
1,3.0,
2,3.0,1.0


In [293]:
df.cummax(axis=1)

Unnamed: 0,A,B
0,2.0,2.0
1,3.0,
2,1.0,1.0


In [294]:
s = pd.Series([2, np.nan, 5, -1, 0])


In [296]:
s

0    2.0
1    NaN
2    5.0
3   -1.0
4    0.0
dtype: float64

In [297]:
s.cummin()

0    2.0
1    NaN
2    2.0
3   -1.0
4   -1.0
dtype: float64

In [298]:
s.cummin(skipna=False)

0    2.0
1    NaN
2    NaN
3    NaN
4    NaN
dtype: float64

In [299]:
df = pd.DataFrame([[2.0, 1.0],
                   [3.0, np.nan],
                   [1.0, 0.0]],
                  columns=list('AB'))

In [300]:
df

Unnamed: 0,A,B
0,2.0,1.0
1,3.0,
2,1.0,0.0


In [301]:
df.cummin()

Unnamed: 0,A,B
0,2.0,1.0
1,2.0,
2,1.0,0.0


In [302]:
df.cummin(axis=1)

Unnamed: 0,A,B
0,2.0,1.0
1,3.0,
2,1.0,0.0


In [303]:
s = pd.Series([2, np.nan, 5, -1, 0])

In [304]:
s

0    2.0
1    NaN
2    5.0
3   -1.0
4    0.0
dtype: float64

In [305]:
s.cumprod()

0     2.0
1     NaN
2    10.0
3   -10.0
4    -0.0
dtype: float64

In [306]:
s.cumprod(skipna=False)

0    2.0
1    NaN
2    NaN
3    NaN
4    NaN
dtype: float64

In [307]:
df = pd.DataFrame([[2.0, 1.0],
                   [3.0, np.nan],
                   [1.0, 0.0]],
                  columns=list('AB'))
df

Unnamed: 0,A,B
0,2.0,1.0
1,3.0,
2,1.0,0.0


In [308]:
df.cumprod()

Unnamed: 0,A,B
0,2.0,1.0
1,6.0,
2,6.0,0.0


In [310]:
df.cumprod(axis=1)

Unnamed: 0,A,B
0,2.0,2.0
1,3.0,
2,1.0,0.0


In [311]:
s = pd.Series([2, np.nan, 5, -1, 0])

In [313]:
s

0    2.0
1    NaN
2    5.0
3   -1.0
4    0.0
dtype: float64

In [314]:
s.cumsum(skipna=False)

0    2.0
1    NaN
2    NaN
3    NaN
4    NaN
dtype: float64

In [315]:
s.cumsum()

0    2.0
1    NaN
2    7.0
3    6.0
4    6.0
dtype: float64

In [316]:
df = pd.DataFrame([[2.0, 1.0],
                   [3.0, np.nan],
                   [1.0, 0.0]],
                  columns=list('AB'))
df

Unnamed: 0,A,B
0,2.0,1.0
1,3.0,
2,1.0,0.0


In [317]:
df.cumsum()

Unnamed: 0,A,B
0,2.0,1.0
1,5.0,
2,6.0,1.0


In [318]:
df.cumsum(axis=1)

Unnamed: 0,A,B
0,2.0,3.0
1,3.0,
2,1.0,1.0


In [319]:
s = pd.Series([1, 2, 3])

In [320]:
s.describe()

count    3.0
mean     2.0
std      1.0
min      1.0
25%      1.5
50%      2.0
75%      2.5
max      3.0
dtype: float64

In [321]:
s = pd.Series([
    np.datetime64("2000-01-01"),
    np.datetime64("2010-01-01"),
    np.datetime64("2010-01-01")
])

In [322]:
s.describe()

count                      3
mean     2006-09-01 08:00:00
min      2000-01-01 00:00:00
25%      2004-12-31 12:00:00
50%      2010-01-01 00:00:00
75%      2010-01-01 00:00:00
max      2010-01-01 00:00:00
dtype: object

In [323]:
df = pd.DataFrame({'categorical': pd.Categorical(['d', 'e', 'f']),
                   'numeric': [1, 2, 3],
                   'object': ['a', 'b', 'c']
                   })

In [324]:
df

Unnamed: 0,categorical,numeric,object
0,d,1,a
1,e,2,b
2,f,3,c


In [325]:
df.describe()

Unnamed: 0,numeric
count,3.0
mean,2.0
std,1.0
min,1.0
25%,1.5
50%,2.0
75%,2.5
max,3.0


In [326]:
df.describe(include='all')

Unnamed: 0,categorical,numeric,object
count,3,3.0,3
unique,3,,3
top,d,,a
freq,1,,1
mean,,2.0,
std,,1.0,
min,,1.0,
25%,,1.5,
50%,,2.0,
75%,,2.5,


In [327]:
df.numeric.describe()

count    3.0
mean     2.0
std      1.0
min      1.0
25%      1.5
50%      2.0
75%      2.5
max      3.0
Name: numeric, dtype: float64

In [328]:
df.describe(include=[np.number])

Unnamed: 0,numeric
count,3.0
mean,2.0
std,1.0
min,1.0
25%,1.5
50%,2.0
75%,2.5
max,3.0


In [329]:
df.describe(include=[object])  

Unnamed: 0,object
count,3
unique,3
top,a
freq,1


In [330]:
df.describe(include=['category'])

Unnamed: 0,categorical
count,3
unique,3
top,d
freq,1


In [331]:
df.describe(exclude=[np.number])  

Unnamed: 0,categorical,object
count,3,3
unique,3,3
top,d,a
freq,1,1


In [332]:
df.describe(exclude=[object])  

Unnamed: 0,categorical,numeric
count,3,3.0
unique,3,
top,d,
freq,1,
mean,,2.0
std,,1.0
min,,1.0
25%,,1.5
50%,,2.0
75%,,2.5


In [333]:
df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6],
                   'b': [1, 1, 2, 3, 5, 8],
                   'c': [1, 4, 9, 16, 25, 36]})

In [334]:
df

Unnamed: 0,a,b,c
0,1,1,1
1,2,1,4
2,3,2,9
3,4,3,16
4,5,5,25
5,6,8,36


In [335]:
df.diff()

Unnamed: 0,a,b,c
0,,,
1,1.0,0.0,3.0
2,1.0,1.0,5.0
3,1.0,1.0,7.0
4,1.0,2.0,9.0
5,1.0,3.0,11.0


In [337]:
df.diff(axis=1)

Unnamed: 0,a,b,c
0,,0,0
1,,-1,3
2,,-1,7
3,,-1,13
4,,0,20
5,,2,28


## 

In [338]:
df.diff(periods=2)

Unnamed: 0,a,b,c
0,,,
1,,,
2,2.0,1.0,8.0
3,2.0,2.0,12.0
4,2.0,3.0,16.0
5,2.0,5.0,20.0


In [339]:
df.diff(periods=3)

Unnamed: 0,a,b,c
0,,,
1,,,
2,,,
3,3.0,2.0,15.0
4,3.0,4.0,21.0
5,3.0,6.0,27.0


In [340]:
df.diff(periods=-1)

Unnamed: 0,a,b,c
0,-1.0,0.0,-3.0
1,-1.0,-1.0,-5.0
2,-1.0,-1.0,-7.0
3,-1.0,-2.0,-9.0
4,-1.0,-3.0,-11.0
5,,,


In [341]:
df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8)

In [342]:
df.diff()

Unnamed: 0,a
0,
1,255.0


In [343]:
df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)})

In [344]:
df

Unnamed: 0,A,B
0,1,10
1,2,8
2,3,6
3,4,4
4,5,2


In [345]:
df.eval('C = A + B')

Unnamed: 0,A,B,C
0,1,10,11
1,2,8,10
2,3,6,9
3,4,4,8
4,5,2,7


In [346]:
df

Unnamed: 0,A,B
0,1,10
1,2,8
2,3,6
3,4,4
4,5,2


In [348]:
df.eval(
    '''
C = A + B
D = A - B
'''
)

Unnamed: 0,A,B,C,D
0,1,10,11,-9
1,2,8,10,-6
2,3,6,9,-3
3,4,4,8,0
4,5,2,7,3


In [349]:
s = pd.Series([1, 2, 2, 3], index=['cat', 'dog', 'dog', 'mouse'])

In [350]:
s

cat      1
dog      2
dog      2
mouse    3
dtype: int64

In [351]:
s.kurt()

np.float64(1.5)

In [352]:
df = pd.DataFrame({'a': [1, 2, 2, 3], 'b': [3, 4, 4, 4]},
                  index=['cat', 'dog', 'dog', 'mouse'])

In [353]:
df

Unnamed: 0,a,b
cat,1,3
dog,2,4
dog,2,4
mouse,3,4


In [354]:
df.kurt(axis=None).round(6)

np.float64(-0.988693)

In [355]:
df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [3, 4], 'd': [1, 2]},
                  index=['cat', 'dog'])

In [356]:
df.kurt(axis=1)

cat   -6.0
dog   -6.0
dtype: float64

In [357]:
s = pd.Series([1, 2, 2, 3], index=['cat', 'dog', 'dog', 'mouse'])

In [358]:
s

cat      1
dog      2
dog      2
mouse    3
dtype: int64

In [359]:
df = pd.DataFrame({'a': [1, 2, 2, 3], 'b': [3, 4, 4, 4]},
                  index=['cat', 'dog', 'dog', 'mouse'])

In [360]:
df

Unnamed: 0,a,b
cat,1,3
dog,2,4
dog,2,4
mouse,3,4


In [361]:
df.kurt(axis=None).round(6)

np.float64(-0.988693)

In [362]:
df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [3, 4], 'd': [1, 2]},
                  index=['cat', 'dog'])

In [363]:
df.kurt(axis=1)

cat   -6.0
dog   -6.0
dtype: float64

In [364]:
idx = pd.MultiIndex.from_arrays([
    ['warm', 'warm', 'cold', 'cold'],
    ['dog', 'falcon', 'fish', 'spider']],
    names=['blooded', 'animal'])
s = pd.Series([4, 2, 0, 8], name='legs', index=idx)

In [365]:
s

blooded  animal
warm     dog       4
         falcon    2
cold     fish      0
         spider    8
Name: legs, dtype: int64

In [366]:
s.max()

np.int64(8)

In [367]:
s = pd.Series([1, 2, 3])

In [368]:
s.mean()

np.float64(2.0)

In [369]:
df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra'])

In [370]:
df

Unnamed: 0,a,b
tiger,1,2
zebra,2,3


In [371]:
df.mean(axis=1)

tiger    1.5
zebra    2.5
dtype: float64

In [372]:
df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']},
                  index=['tiger', 'zebra'])

In [377]:
df.mean(numeric_only=True)

a    1.5
dtype: float64

In [378]:
s = pd.Series([1, 2, 3])

In [379]:
s.median()

np.float64(2.0)

In [380]:
df = pd.DataFrame({'a': [1, 2], 'b': [2, 3]}, index=['tiger', 'zebra'])

In [381]:
df

Unnamed: 0,a,b
tiger,1,2
zebra,2,3


In [382]:
df.median()

a    1.5
b    2.5
dtype: float64

In [383]:
df.median(axis=1)

tiger    1.5
zebra    2.5
dtype: float64

In [384]:
df = pd.DataFrame({'a': [1, 2], 'b': ['T', 'Z']},
                  index=['tiger', 'zebra'])

In [385]:
df.median(numeric_only=True)

a    1.5
dtype: float64

In [386]:
idx = pd.MultiIndex.from_arrays([
    ['warm', 'warm', 'cold', 'cold'],
    ['dog', 'falcon', 'fish', 'spider']],
    names=['blooded', 'animal'])
s = pd.Series([4, 2, 0, 8], name='legs', index=idx)


In [387]:
s

blooded  animal
warm     dog       4
         falcon    2
cold     fish      0
         spider    8
Name: legs, dtype: int64

In [388]:
s.min()

np.int64(0)

In [389]:
df = pd.DataFrame([('bird', 2, 2),
                   ('mammal', 4, np.nan),
                   ('arthropod', 8, 0),
                   ('bird', 2, np.nan)],
                  index=('falcon', 'horse', 'spider', 'ostrich'),
                  columns=('species', 'legs', 'wings'))

In [390]:
df

Unnamed: 0,species,legs,wings
falcon,bird,2,2.0
horse,mammal,4,
spider,arthropod,8,0.0
ostrich,bird,2,


In [391]:
df.mode()

Unnamed: 0,species,legs,wings
0,bird,2.0,0.0
1,,,2.0


In [392]:
df.mode(dropna=False)

Unnamed: 0,species,legs,wings
0,bird,2,


In [394]:
df.mode(numeric_only=True)

Unnamed: 0,legs,wings
0,2.0,0.0
1,,2.0


In [395]:
df.mode(axis='columns',numeric_only=True)

Unnamed: 0,0,1
falcon,2.0,
horse,4.0,
spider,0.0,8.0
ostrich,2.0,


In [396]:
s = pd.Series([90, 91, 85])

In [397]:
s

0    90
1    91
2    85
dtype: int64

In [398]:
s.pct_change()

0         NaN
1    0.011111
2   -0.065934
dtype: float64

In [399]:
s.pct_change(periods=2)

0         NaN
1         NaN
2   -0.055556
dtype: float64

In [400]:
s = pd.Series([90, 91, None, 85])

In [401]:
s

0    90.0
1    91.0
2     NaN
3    85.0
dtype: float64

In [402]:
s.ffill().pct_change()

0         NaN
1    0.011111
2    0.000000
3   -0.065934
dtype: float64

In [403]:
df = pd.DataFrame({
    'FR': [4.0405, 4.0963, 4.3149],
    'GR': [1.7246, 1.7482, 1.8519],
    'IT': [804.74, 810.01, 860.13]},
    index=['1980-01-01', '1980-02-01', '1980-03-01'])
df

Unnamed: 0,FR,GR,IT
1980-01-01,4.0405,1.7246,804.74
1980-02-01,4.0963,1.7482,810.01
1980-03-01,4.3149,1.8519,860.13


In [404]:
df.pct_change()

Unnamed: 0,FR,GR,IT
1980-01-01,,,
1980-02-01,0.01381,0.013684,0.006549
1980-03-01,0.053365,0.059318,0.061876


In [405]:
df = pd.DataFrame({
    '2016': [1769950, 30586265],
    '2015': [1500923, 40912316],
    '2014': [1371819, 41403351]},
    index=['GOOG', 'APPL'])
df


Unnamed: 0,2016,2015,2014
GOOG,1769950,1500923,1371819
APPL,30586265,40912316,41403351


In [407]:
df.pct_change(axis='columns',periods=-1)

Unnamed: 0,2016,2015,2014
GOOG,0.179241,0.094112,
APPL,-0.252395,-0.01186,
