In [4]:
import pandas as pd
import numpy as np

In [5]:
df = pd.DataFrame(np.random.randint(10,size=(3,4)),columns = ['A','B','C','D'], index = 
                 ['a','b','c'])
df

Unnamed: 0,A,B,C,D
a,0,7,6,0
b,4,1,1,2
c,7,4,4,9


In [6]:
np.exp(df)

Unnamed: 0,A,B,C,D
a,1.0,1096.633158,403.428793,1.0
b,54.59815,2.718282,2.718282,7.389056
c,1096.633158,54.59815,54.59815,8103.083928


In [7]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name='population')

In [8]:
area

Alaska        1723337
California     423967
Texas          695662
Name: area, dtype: int64

In [9]:
population

California    38332521
New York      19651127
Texas         26448193
Name: population, dtype: int64

In [10]:
population/area

Alaska              NaN
California    90.413926
New York            NaN
Texas         38.018740
dtype: float64

In [11]:
area.index

Index([u'Alaska', u'California', u'Texas'], dtype='object')

In [12]:
population.index

Index([u'California', u'New York', u'Texas'], dtype='object')

In [13]:
#Brings out the common intersection similar to sets
area.index & population.index

Index([u'California', u'Texas'], dtype='object')

In [14]:
#Brings out whats not common when comparing two sets of data. 
area.index.difference(area.index & population.index)

Index([u'Alaska'], dtype='object')

In [15]:
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])
A

0    2
1    4
2    6
dtype: int64

In [16]:
B

1    1
2    3
3    5
dtype: int64

In [17]:
# Notice how indexes are different and only common indexes get added up
A+B

0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [18]:
# Rather than using a Nan and no opertions happening, we replace null with 0 using fill 
A.add(B, fill_value=0.0)

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64

In [19]:
# Random State 
#- using differenet probabilities it generates random numbers
#- THROUGH RANGES THAT ARE DEFINIABLE IN VARIOUS SIZES
rng = np.random.RandomState(10)

In [20]:
rng.randint(0,20,(2,2))

array([[ 9,  4],
       [15,  0]])

In [21]:
rng

<mtrand.RandomState at 0x10e672be0>

In [22]:
rng.randint(0,20,(2,2))

array([[17, 16],
       [17,  8]])

In [23]:
A = pd.DataFrame(rng.randint(0,20,(2,2)),columns=list('AB'))

In [24]:
A

Unnamed: 0,A,B
0,9,0
1,10,8


In [25]:
A.stack().mean()

6.75

In [26]:
A.mean()

A    9.5
B    4.0
dtype: float64

In [27]:
A

Unnamed: 0,A,B
0,9,0
1,10,8


In [28]:
B = pd.DataFrame(rng.randint(0,10, (3,3)),columns = list('BAC'))

In [29]:
B

Unnamed: 0,B,A,C
0,6,4,3
1,0,4,6
2,8,1,8


In [30]:
C = A + B

In [31]:
C

Unnamed: 0,A,B,C
0,13.0,6.0,
1,14.0,8.0,
2,,,


In [32]:
C.mean()

A    13.5
B     7.0
C     NaN
dtype: float64

In [33]:
C

Unnamed: 0,A,B,C
0,13.0,6.0,
1,14.0,8.0,
2,,,


In [34]:
C.stack().mean()

10.25

In [35]:
C.add(A,fill_value=17.5)

Unnamed: 0,A,B,C
0,22.0,6.0,
1,24.0,16.0,
2,,,


In [36]:
A**B

Unnamed: 0,A,B,C
0,6561.0,0.0,
1,10000.0,1.0,
2,,,


In [37]:
A

Unnamed: 0,A,B
0,9,0
1,10,8


In [38]:
B

Unnamed: 0,B,A,C
0,6,4,3
1,0,4,6
2,8,1,8


## Handling Missing Data

In [39]:
d = np.array([1,2,None,4])

In [40]:
d.sum()

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

In [41]:
s = np.array([1,2,np.nan,3])

In [42]:
s.sum()

nan

In [43]:
np.nansum(s)

6.0

In [44]:
d=pd.Series([1,2,np.nan,3,None])

In [45]:
d

0    1.0
1    2.0
2    NaN
3    3.0
4    NaN
dtype: float64

In [46]:
d.isnull()

0    False
1    False
2     True
3    False
4     True
dtype: bool

In [47]:
d.notnull()

0     True
1     True
2    False
3     True
4    False
dtype: bool

In [48]:
d[d.notnull()]

0    1.0
1    2.0
3    3.0
dtype: float64

In [49]:
d[d.isnull()]

2   NaN
4   NaN
dtype: float64

In [50]:
d.dropna()

0    1.0
1    2.0
3    3.0
dtype: float64

In [51]:
d

0    1.0
1    2.0
2    NaN
3    3.0
4    NaN
dtype: float64

In [53]:
d = d.dropna()

In [54]:
d

0    1.0
1    2.0
3    3.0
dtype: float64

In [55]:
d = pd.Series([1,2,np.nan,3,None])

In [56]:
d.fillna('1')

0    1
1    2
2    1
3    3
4    1
dtype: object

In [57]:
d

0    1.0
1    2.0
2    NaN
3    3.0
4    NaN
dtype: float64

In [58]:
d.fillna(method='bfill')

0    1.0
1    2.0
2    3.0
3    3.0
4    NaN
dtype: float64