# Function Application and Mapping

In [2]:
import pandas as pd 
import numpy as np

In [3]:
frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'),index=['Utah', 'Ohio', 'Texas', 'Oregon'])

In [4]:
frame

Unnamed: 0,b,d,e
Utah,0.42421,0.544389,-1.263363
Ohio,-0.663471,0.117128,-0.30764
Texas,0.31333,0.756065,0.261384
Oregon,-0.802206,0.090649,-1.007473


In [7]:
frame=np.abs(frame)

In [8]:
frame

Unnamed: 0,b,d,e
Utah,0.42421,0.544389,1.263363
Ohio,0.663471,0.117128,0.30764
Texas,0.31333,0.756065,0.261384
Oregon,0.802206,0.090649,1.007473


In [12]:
f = lambda x: x.max()-x.min()

In [13]:
frame.apply(f)

b    0.488876
d    0.665416
e    1.001980
dtype: float64

In [14]:
 frame.apply(f, axis='columns')

Utah      0.839153
Ohio      0.546342
Texas     0.494681
Oregon    0.916824
dtype: float64

In [15]:
format = lambda x: '%.2f' % x
frame.applymap(format)

Unnamed: 0,b,d,e
Utah,0.42,0.54,1.26
Ohio,0.66,0.12,0.31
Texas,0.31,0.76,0.26
Oregon,0.8,0.09,1.01


In [16]:
frame['e'].map(format)

Utah      1.26
Ohio      0.31
Texas     0.26
Oregon    1.01
Name: e, dtype: object

# Sorting and Ranking

In [20]:
obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])
obj

d    0
a    1
b    2
c    3
dtype: int64

In [18]:
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [21]:
frame = pd.DataFrame(np.arange(8).reshape((2, 4)),
       index=['three', 'one'],  columns=['d', 'a', 'b', 'c'])

In [23]:
frame

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [24]:
frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [25]:
frame.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [26]:
frame.sort_index(axis=1, ascending=False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


In [27]:
obj = pd.Series([4, 7, -3, 2])


In [28]:
obj.sort_values()

2   -3
3    2
0    4
1    7
dtype: int64

In [29]:
obj = pd.Series([4, np.nan, 7, np.nan, -3, 2])

In [30]:
obj.sort_values()

4   -3.0
5    2.0
0    4.0
2    7.0
1    NaN
3    NaN
dtype: float64

In [31]:
frame = pd.DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1]})
frame

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [32]:
frame.sort_values(by='b')

Unnamed: 0,b,a
2,-3,0
3,2,1
0,4,0
1,7,1


In [33]:
 frame.sort_values(by=['a', 'b'])
    # preferece is given to a

Unnamed: 0,b,a
2,-3,0
0,4,0
3,2,1
1,7,1


In [34]:
frame.sort_values(by=['b', 'a'])
    # preferece is given to b

Unnamed: 0,b,a
2,-3,0
3,2,1
0,4,0
1,7,1


In [52]:
obj = pd.Series([7, -5, 7, 4, 2, 0, 4]) # ranks assigned/ total ranks assigned 
print(obj)
obj.rank()

0    7
1   -5
2    7
3    4
4    2
5    0
6    4
7    4
dtype: int64


0    7.5
1    1.0
2    7.5
3    5.0
4    3.0
5    2.0
6    5.0
7    5.0
dtype: float64

In [36]:
obj.mean()

2.7142857142857144

In [37]:
 df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],
                    [np.nan, np.nan], [0.75, -1.3]],
                    index=['a', 'b', 'c', 'd'],
                    columns=['one', 'two'])

In [38]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [39]:
df.sum()# sum whole column 

one    9.25
two   -5.80
dtype: float64

In [40]:
df.sum(axis='columns')

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [41]:
df.mean(axis='columns', skipna=False) #skipna mean skip nan values

a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64

In [42]:
df.mean(axis='columns', skipna=True) #skipna mean skip nan

a    1.400
b    1.300
c      NaN
d   -0.275
dtype: float64

In [44]:
df.idxmax()

one    b
two    d
dtype: object

In [45]:
df.cumsum()

Unnamed: 0,one,two
a,1.4,
b,8.5,-4.5
c,,
d,9.25,-5.8


# Correlation and Covariance

In [60]:
frame.mean()

b    2.5
a    0.5
dtype: float64

In [56]:
frame.corr() # releation of b with b and a releation of a with b and a 

Unnamed: 0,b,a
b,1.0,0.549442
a,0.549442,1.0


In [57]:
frame.cov() 

Unnamed: 0,b,a
b,17.666667,1.333333
a,1.333333,0.333333


In [62]:
import pandas_datareader as web

In [63]:
all_data = {ticker: web.get_data_yahoo(ticker)
for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']}

In [79]:
price = pd.DataFrame({ticker: data['Adj Close']
for ticker, data in all_data.items()})
price

Unnamed: 0_level_0,AAPL,IBM,MSFT,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-10-20,91.539482,136.936264,39.458977,519.413940
2014-10-21,94.026176,132.182739,40.175110,525.098328
2014-10-22,94.503311,131.016617,39.727531,531.251465
2014-10-23,96.191689,131.332458,40.300430,542.490601
2014-10-24,96.549545,131.251480,41.294075,538.302063
2014-10-27,96.448616,131.081451,41.097130,539.289368
2014-10-28,97.944298,132.482391,41.616329,547.397095
2014-10-29,98.494850,132.369003,41.732697,547.825928
2014-10-30,98.164513,133.089706,41.222458,548.803284
2014-10-31,99.100471,133.130219,42.028111,557.549255


In [81]:
volume = pd.DataFrame({ticker: data['Volume']
for ticker, data in all_data.items()})
volume 

Unnamed: 0_level_0,AAPL,IBM,MSFT,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-10-20,77517300.0,23416500.0,34527900.0,2607500.0
2014-10-21,94623900.0,20949800.0,36433800.0,2336200.0
2014-10-22,68263100.0,11084800.0,33570900.0,2919200.0
2014-10-23,71074700.0,7599400.0,45451900.0,2348800.0
2014-10-24,47053900.0,6652100.0,61076700.0,1973100.0
2014-10-27,34187700.0,4989100.0,30371300.0,1185300.0
2014-10-28,48060900.0,7895300.0,29049800.0,1270900.0
2014-10-29,52687900.0,4739300.0,30276100.0,1770500.0
2014-10-30,40654800.0,3896000.0,30073900.0,1455600.0
2014-10-31,44639300.0,5818000.0,35849700.0,2035000.0


In [70]:
returns = price.pct_change()

In [82]:
returns.tail()

Unnamed: 0_level_0,AAPL,IBM,MSFT,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-10-14,-0.001439,-0.005043,-0.000931,0.00139
2019-10-15,-0.002332,0.006759,0.014475,0.021255
2019-10-16,-0.004037,-0.006224,-0.008194,0.000507
2019-10-17,0.003883,-0.055239,-0.005128,0.007583
2019-10-18,0.004803,-0.001266,-0.016322,-0.006049


In [72]:
returns['MSFT'].corr(returns['IBM'])

0.48337544081890604

In [73]:
 returns['MSFT'].cov(returns['IBM'])

9.348353117225172e-05

In [74]:
returns.MSFT.corr(returns.IBM)

0.48337544081890604

In [75]:
returns.corr()

Unnamed: 0,AAPL,IBM,MSFT,GOOG
AAPL,1.0,0.401226,0.573099,0.521443
IBM,0.401226,1.0,0.483375,0.40992
MSFT,0.573099,0.483375,1.0,0.653896
GOOG,0.521443,0.40992,0.653896,1.0


In [76]:
returns.cov()

Unnamed: 0,AAPL,IBM,MSFT,GOOG
AAPL,0.000247,8.2e-05,0.000133,0.000124
IBM,8.2e-05,0.000171,9.3e-05,8.1e-05
MSFT,0.000133,9.3e-05,0.000219,0.000147
GOOG,0.000124,8.1e-05,0.000147,0.000231


In [77]:
returns.corrwith(returns.IBM)

AAPL    0.401226
IBM     1.000000
MSFT    0.483375
GOOG    0.409920
dtype: float64

In [78]:
 returns.corrwith(volume)

AAPL   -0.109907
IBM    -0.142421
MSFT   -0.081835
GOOG   -0.006146
dtype: float64