## data type cast

In [1]:
import pandas as pd
import numpy as np
pd.Series([1, 2, np.nan, 1])

0    1.0
1    2.0
2    NaN
3    1.0
dtype: float64

In [2]:
pd.Series(['fd', None, 'k' ])

0      fd
1    None
2       k
dtype: object

In [7]:
pd.Series([True, np.nan, False])

0     True
1      NaN
2    False
dtype: object

## detecting null values

In [8]:
data = pd.Series([1, np.nan, 'hello', None])

In [9]:
data

0        1
1      NaN
2    hello
3     None
dtype: object

In [11]:
data.isnull()

0    False
1     True
2    False
3     True
dtype: bool

In [12]:
data[data.notnull()] # used as a mask

0        1
2    hello
dtype: object

In [13]:
data.dropna()

0        1
2    hello
dtype: object

In [3]:
df = np.random.randint(0, 12, size = (3, 4))
df = pd.DataFrame(df, columns = list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,10,9,0,6
1,6,9,0,5
2,7,8,9,7


In [4]:
df.iloc[[1,2], [2, 2]] = None
df

Unnamed: 0,A,B,C,D
0,10,9,0.0,6
1,6,9,,5
2,7,8,,7


In [35]:
df.dropna(axis =1) # default is 0 or rows 

Unnamed: 0,A,B,D
0,1,10,2
1,9,3,3
2,1,1,8


In [5]:
df

Unnamed: 0,A,B,C,D
0,10,9,0.0,6
1,6,9,,5
2,7,8,,7


In [6]:
df.loc[[0, 1], ['C', 'D']] = None

In [7]:
df

Unnamed: 0,A,B,C,D
0,10,9,,
1,6,9,,
2,7,8,,7.0


In [51]:
df.dropna(axis = 1, how = "all") # only drop the column if all value is NA

Unnamed: 0,A,B,D
0,1,10,
1,9,3,
2,1,1,8.0


## fill in default values

In [52]:
data = pd.Series([1, None, 2, np.nan, 3])

In [53]:
data

0    1.0
1    NaN
2    2.0
3    NaN
4    3.0
dtype: float64

In [54]:
data.fillna(0)

0    1.0
1    0.0
2    2.0
3    0.0
4    3.0
dtype: float64

In [55]:
data

0    1.0
1    NaN
2    2.0
3    NaN
4    3.0
dtype: float64

In [56]:
data.fillna(method = 'bfill') # fill the previous value

0    1.0
1    2.0
2    2.0
3    3.0
4    3.0
dtype: float64

In [8]:
df

Unnamed: 0,A,B,C,D
0,10,9,,
1,6,9,,
2,7,8,,7.0


In [9]:
df.fillna(method = 'ffill', axis = 1)

Unnamed: 0,A,B,C,D
0,10.0,9.0,9.0,9.0
1,6.0,9.0,9.0,9.0
2,7.0,8.0,8.0,7.0


In [10]:
?df.fillna

[0;31mSignature:[0m
[0mdf[0m[0;34m.[0m[0mfillna[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mvalue[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmethod[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minplace[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlimit[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdowncast[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mkwargs[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Fill NA/NaN values using the specified method.

Parameters
----------
value : scalar, dict, Series, or DataFrame
    Value to use to fill holes (e.g. 0), alternately a
    dict/Series/DataFrame of values specifying which value to use for
    each index (for a Series) or column (for a DataFrame).  Va