In [1]:
import pandas as pd
import numpy as np

In [2]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0,10,4))
ser

0    6
1    3
2    7
3    4
dtype: int32

In [6]:
df = pd.DataFrame(rng.randint(0,10,(3,4)), columns=['A','B','C','D'])
df

Unnamed: 0,A,B,C,D
0,1,7,5,1
1,4,0,9,5
2,8,0,9,2


## Index preservation

In [7]:
np.exp(ser)

0     403.428793
1      20.085537
2    1096.633158
3      54.598150
dtype: float64

## Operations between Dataframe ans Series
* row and column labels are maintained
* Just like operations between 3D arrays and 1D array

In [9]:
A  = rng.randint(10, size=(3,3))

In [11]:
A-A[0]

array([[ 0,  0,  0],
       [-4,  1, -6],
       [ 0,  1,  0]])

# Handeling Missing Data with Pandas

### None: Pythonic missing data

In [15]:
vals1 = np.array([1,None,3,4])
vals1

array([1, None, 3, 4], dtype=object)

### NaN: Missing Numerical Data

In [13]:
vals2 = np.array([1,np.nan,3,4])
vals2.dtype

dtype('float64')

# Operating on Null Values
* isnull()
* notnull()
* dropna()
* fillna()

## Detexting null values

In [16]:
data = pd.Series([1, np.nan, 'hello', None])
data

0        1
1      NaN
2    hello
3     None
dtype: object

In [23]:
data.isnull() 

0    False
1     True
2    False
3     True
dtype: bool

In [25]:
data[data.notnull()]

0        1
2    hello
dtype: object

# Dropping null values

In [26]:
data.dropna()

0        1
2    hello
dtype: object

In [27]:
df = pd.DataFrame([[1, np.nan, 2],
 [2, 3, 5],
 [np.nan, 4, 6]])
df

Unnamed: 0,0,1,2
0,1.0,,2
1,2.0,3.0,5
2,,4.0,6


In [29]:
df.dropna(axis='rows')

Unnamed: 0,0,1,2
1,2.0,3.0,5


# Filling null values

In [30]:
data = pd.Series([1, np.nan, 2, None, 3], index=list('abcde'))
data

a    1.0
b    NaN
c    2.0
d    NaN
e    3.0
dtype: float64

In [33]:
data.fillna(data[data.notnull()].mean())

a    1.0
b    2.0
c    2.0
d    2.0
e    3.0
dtype: float64