In [2]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [4]:
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows',10)

%matplotlib inline
import matplotlib.pyplot as plt

In [6]:
s = Series([1,2,3,4])
s

0    1
1    2
2    3
3    4
dtype: int64

In [9]:
s[[1,3]]

1    2
3    4
dtype: int64

In [15]:
s = Series([1,2,3,4],index=['a','b','c','d'])
s

a    1
b    2
c    3
d    4
dtype: int64

In [16]:
s[[2,3]]

c    3
d    4
dtype: int64

In [19]:
3 in s

False

In [20]:
"c" in s

True

In [22]:
s2 = Series({1:"AA",2:"cc",'f':"6"})
s2

1    AA
2    cc
f     6
dtype: object

In [23]:
pd.isnull(s2)

1    False
2    False
f    False
dtype: bool

In [30]:
s3 = s2 + Series({4:"bA",5:"tc",'fs':"a6"})
s3

1     NaN
2     NaN
f     NaN
4     NaN
5     NaN
fs    NaN
dtype: object

In [31]:
s3 = s2 + Series({1:"bA",2:"tc",'f':"a6"})
s3

1    AAbA
2    cctc
f     6a6
dtype: object

In [33]:
s3.index = "q","w","e"
s3

q    AAbA
w    cctc
e     6a6
dtype: object

In [35]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
 'year': [2000, 2001, 2002, 2001, 2002, 2003],
 'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
frame

    state  year  pop
0    Ohio  2000  1.5
1    Ohio  2001  1.7
2    Ohio  2002  3.6
3  Nevada  2001  2.4
4  Nevada  2002  2.9
5  Nevada  2003  3.2

In [37]:
frame = pd.DataFrame(data,index=["first","second","third","fourth","fifth","sixth"])
frame

         state  year  pop
first     Ohio  2000  1.5
second    Ohio  2001  1.7
third     Ohio  2002  3.6
fourth  Nevada  2001  2.4
fifth   Nevada  2002  2.9
sixth   Nevada  2003  3.2

In [38]:
frame = pd.DataFrame(data,columns=['year','state','pop','dept'])
frame

   year   state  pop dept
0  2000    Ohio  1.5  NaN
1  2001    Ohio  1.7  NaN
2  2002    Ohio  3.6  NaN
3  2001  Nevada  2.4  NaN
4  2002  Nevada  2.9  NaN
5  2003  Nevada  3.2  NaN

In [39]:
frame['year']

0    2000
1    2001
2    2002
3    2001
4    2002
5    2003
Name: year, dtype: int64

In [44]:
frame.state

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: state, dtype: object

In [45]:
print(frame.state.unique())

['Ohio' 'Nevada']


In [46]:
frame

   year   state  pop dept
0  2000    Ohio  1.5  NaN
1  2001    Ohio  1.7  NaN
2  2002    Ohio  3.6  NaN
3  2001  Nevada  2.4  NaN
4  2002  Nevada  2.9  NaN
5  2003  Nevada  3.2  NaN

In [50]:
frame.dept = [1,2,3,4,5,6]
frame

   year   state  pop  dept
0  2000    Ohio  1.5     1
1  2001    Ohio  1.7     2
2  2002    Ohio  3.6     3
3  2001  Nevada  2.4     4
4  2002  Nevada  2.9     5
5  2003  Nevada  3.2     6

In [52]:
frame["new"] = [2,3,2,3,2,3]
frame

   year   state  pop  dept  new
0  2000    Ohio  1.5     1    2
1  2001    Ohio  1.7     2    3
2  2002    Ohio  3.6     3    2
3  2001  Nevada  2.4     4    3
4  2002  Nevada  2.9     5    2
5  2003  Nevada  3.2     6    3

In [56]:
frame['new2'] = Series([3,2,1],index=[0,3,5])
frame

   year   state  pop  dept  new  new2
0  2000    Ohio  1.5     1    2   3.0
1  2001    Ohio  1.7     2    3   NaN
2  2002    Ohio  3.6     3    2   NaN
3  2001  Nevada  2.4     4    3   2.0
4  2002  Nevada  2.9     5    2   NaN
5  2003  Nevada  3.2     6    3   1.0

In [57]:
frame.T

          0     1     2       3       4       5
year   2000  2001  2002    2001    2002    2003
state  Ohio  Ohio  Ohio  Nevada  Nevada  Nevada
pop     1.5   1.7   3.6     2.4     2.9     3.2
dept      1     2     3       4       5       6
new       2     3     2       3       2       3
new2      3   NaN   NaN       2     NaN       1

In [58]:
frame.index.name = 'year'; frame.columns.name = 'state'
frame

state  year   state  pop  dept  new  new2
year                                     
0      2000    Ohio  1.5     1    2   3.0
1      2001    Ohio  1.7     2    3   NaN
2      2002    Ohio  3.6     3    2   NaN
3      2001  Nevada  2.4     4    3   2.0
4      2002  Nevada  2.9     5    2   NaN
5      2003  Nevada  3.2     6    3   1.0

In [61]:
frame.reindex([0,1,2.5,3,4,5,6])

state    year   state  pop  dept  new  new2
year                                       
0.0    2000.0    Ohio  1.5   1.0  2.0   3.0
1.0    2001.0    Ohio  1.7   2.0  3.0   NaN
2.5       NaN     NaN  NaN   NaN  NaN   NaN
3.0    2001.0  Nevada  2.4   4.0  3.0   2.0
4.0    2002.0  Nevada  2.9   5.0  2.0   NaN
5.0    2003.0  Nevada  3.2   6.0  3.0   1.0
6.0       NaN     NaN  NaN   NaN  NaN   NaN

In [64]:
df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'),index=['Ohio', 'Texas', 'Colorado'])
df1           

            b    c    d
Ohio      0.0  1.0  2.0
Texas     3.0  4.0  5.0
Colorado  6.0  7.0  8.0

In [65]:
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])
obj

0    7
1   -5
2    7
3    4
4    2
5    0
6    4
dtype: int64

In [66]:
obj.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [69]:
df4 = DataFrame([[1,np.nan],[7.1,-9],[np.nan,0],[9,8]],index=['f','s','t','f'],columns=['one','two'])
df4

   one  two
f  1.0  NaN
s  7.1 -9.0
t  NaN  0.0
f  9.0  8.0

In [71]:
df4.sort_index()

   one  two
f  1.0  NaN
f  9.0  8.0
s  7.1 -9.0
t  NaN  0.0

In [72]:
df4.sum()

one    17.1
two    -1.0
dtype: float64

In [77]:
import pandas_datareader.data as web
all_data = {ticker: web.get_data_yahoo(ticker) for ticker in ['AAPL','IBM','MSFT','GOOG']}

price = pd.DataFrame({ticker: data['Adj Close'] for ticker, data in all_data.items()}) 
volume = pd.DataFrame({ticker: data['Volume'] for ticker, data in all_data.items()})

In [78]:
all_data

{'AAPL':                   High         Low        Open       Close       Volume  \
 Date                                                                      
 2009-12-31   30.478571   30.080000   30.447144   30.104286   88102700.0   
 2010-01-04   30.642857   30.340000   30.490000   30.572857  123432400.0   
 2010-01-05   30.798571   30.464285   30.657143   30.625713  150476200.0   
 2010-01-06   30.747143   30.107143   30.625713   30.138571  138040000.0   
 2010-01-07   30.285715   29.864286   30.250000   30.082857  119282800.0   
 ...                ...         ...         ...         ...          ...   
 2018-12-10  170.089996  163.330002  165.000000  169.600006   62026000.0   
 2018-12-11  171.789993  167.000000  171.660004  168.630005   47281700.0   
 2018-12-12  171.919998  169.020004  170.399994  169.100006   35627700.0   
 2018-12-13  172.570007  169.550003  170.490005  170.949997   31898600.0   
 2018-12-14  169.080002  165.279999  169.000000  165.479996   40634300.0   
 
  

In [80]:
returns = price.pct_change()
returns

                AAPL       IBM      MSFT      GOOG
Date                                              
2009-12-31       NaN       NaN       NaN       NaN
2010-01-04  0.015565  0.011841  0.015420  0.010920
2010-01-05  0.001729 -0.012080  0.000323 -0.004404
2010-01-06 -0.015906 -0.006496 -0.006137 -0.025209
2010-01-07 -0.001849 -0.003462 -0.010400 -0.023279
...              ...       ...       ...       ...
2018-12-10  0.006588  0.014999  0.026426  0.002865
2018-12-11 -0.005719 -0.001981  0.009295  0.011736
2018-12-12  0.002787  0.002233  0.004512  0.011343
2018-12-13  0.010940 -0.003549  0.003392 -0.001673
2018-12-14 -0.031998 -0.006875 -0.031247 -0.018646

[2255 rows x 4 columns]

In [81]:
returns.tail()

                AAPL       IBM      MSFT      GOOG
Date                                              
2018-12-10  0.006588  0.014999  0.026426  0.002865
2018-12-11 -0.005719 -0.001981  0.009295  0.011736
2018-12-12  0.002787  0.002233  0.004512  0.011343
2018-12-13  0.010940 -0.003549  0.003392 -0.001673
2018-12-14 -0.031998 -0.006875 -0.031247 -0.018646

# Correlation and Covariance

To be continued!