In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows', 8)

---------
# Series

In [3]:
s = pd.Series(np.random.rand(10))

In [4]:
s

0    0.913613
1    0.427257
2    0.166348
3    0.539684
       ...   
6    0.112187
7    0.384593
8    0.246150
9    0.118775
Length: 10, dtype: float64

In [5]:
s.index

RangeIndex(start=0, stop=10, step=1)

In [6]:
s.values

array([0.91361313, 0.42725709, 0.16634834, 0.53968405, 0.08984242,
       0.03297377, 0.1121867 , 0.38459344, 0.24615022, 0.11877545])

In [7]:
s = pd.Series([1,2,3,4], index=['a','b','c','d'])

In [8]:
s

a    1
b    2
c    3
d    4
dtype: int64

In [9]:
k = s.append(pd.Series(5)) 
k

a    1
b    2
c    3
d    4
0    5
dtype: int64

In [10]:
k.shape

(5,)

In [11]:
k.count()

5

In [12]:
k.unique()

array([1, 2, 3, 4, 5])

In [13]:
k.value_counts()

1    1
2    1
3    1
4    1
5    1
dtype: int64

In [14]:
s3 = pd.Series([1,2,3,4], index=['a','b','c','d'])
s3

a    1
b    2
c    3
d    4
dtype: int64

In [15]:
s4 = pd.Series([4,3,2,1], index=['d','c','b','a'])
s4

d    4
c    3
b    2
a    1
dtype: int64

In [16]:
s3 + s4

a    2
b    4
c    6
d    8
dtype: int64

---------
# DataFrames

In [17]:
df = pd.DataFrame(np.array([[10,11], [20,21]]))
df


    0   1
0  10  11
1  20  21

In [18]:
df.index= ['a','b']
df

    0   1
a  10  11
b  20  21

In [19]:
df.columns=['x','y']
df

    x   y
a  10  11
b  20  21

In [20]:
df1 = pd.DataFrame([pd.Series(np.arange(10,15)),
                    pd.Series(np.arange(15,20))])
df1

    0   1   2   3   4
0  10  11  12  13  14
1  15  16  17  18  19

In [21]:
df1.shape

(2, 5)

In [22]:
df.columns = ['c1', 'c2']
df 

   c1  c2
a  10  11
b  20  21

In [23]:
df.index

Index(['a', 'b'], dtype='object')

In [24]:
df.values

array([[10, 11],
       [20, 21]])

-----------

In [25]:
s1 = pd.Series(np.arange(1,6,1))
s2 = pd.Series(np.arange(6,11,1))
s3 = pd.Series(np.arange(12,14), index=[1,2])
pd.DataFrame({'c1':s1, 'c2':s2, 'c3': s3})

   c1  c2    c3
0   1   6   NaN
1   2   7  12.0
2   3   8  13.0
3   4   9   NaN
4   5  10   NaN

-----------
# Example 

In [29]:
sp500 = pd.read_csv("../data/sp500.csv")
sp500.head()

  Symbol                 Name                  Sector   Price  ...  EBITDA  \
0    MMM               3M Co.             Industrials  141.14  ...   8.121   
1    ABT  Abbott Laboratories             Health Care   39.60  ...   4.359   
2   ABBV          AbbVie Inc.             Health Care   53.95  ...   7.190   
3    ACN            Accenture  Information Technology   79.79  ...   4.423   
4    ACE          ACE Limited              Financials  102.91  ...   4.275   

   Price/Sales  Price/Book                                        SEC Filings  
0         2.95        5.26  http://www.sec.gov/cgi-bin/browse-edgar?action...  
1         2.74        2.55  http://www.sec.gov/cgi-bin/browse-edgar?action...  
2         4.48       18.16  http://www.sec.gov/cgi-bin/browse-edgar?action...  
3         1.75        9.54  http://www.sec.gov/cgi-bin/browse-edgar?action...  
4         1.79        1.18  http://www.sec.gov/cgi-bin/browse-edgar?action...  

[5 rows x 15 columns]
