In [8]:
import pandas as pd
from pandas import DataFrame, Series
import numpy as np

In [2]:
data = DataFrame({'Qu1': [1, 3, 4, 3, 4],
        'Qu2': [2, 3, 1, 2, 3],
        'Qu3': [1, 5, 2, 4, 4]})
data

Unnamed: 0,Qu1,Qu2,Qu3
0,1,2,1
1,3,3,5
2,4,1,2
3,3,2,4
4,4,3,4


In [4]:
result = data.apply(pd.value_counts).fillna(0)

In [5]:
result

Unnamed: 0,Qu1,Qu2,Qu3
1,1.0,1.0,1.0
2,0.0,2.0,1.0
3,2.0,2.0,0.0
4,2.0,0.0,2.0
5,0.0,0.0,1.0


In [9]:
string_data = Series(['aardvark', 'artichoke', np.nan, 'avocado'])

In [10]:
string_data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [11]:
string_data[0] = None

In [12]:
string_data

0         None
1    artichoke
2          NaN
3      avocado
dtype: object

In [13]:
string_data.isnull()

0     True
1    False
2     True
3    False
dtype: bool

## Filtering Out Missing Data

In [14]:
data = Series([1, np.nan, 3.5, np.nan, 7])

In [15]:
# elimina los nulo
data.dropna()

0    1.0
2    3.5
4    7.0
dtype: float64

In [16]:
data[data.notnull()]

0    1.0
2    3.5
4    7.0
dtype: float64

In [17]:
data = DataFrame([[1., 6.5, 3.], [1., np.nan, np.nan],
                  [np.nan, np.nan, np.nan], [np.nan, 6.5, 3.]])
data

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
2,,,
3,,6.5,3.0


In [18]:
cleaned = data.dropna()

In [19]:
cleaned

Unnamed: 0,0,1,2
0,1.0,6.5,3.0


In [20]:
# all nan in row delete
data.dropna(how='all')

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
3,,6.5,3.0


In [21]:
data[4] = np.nan

In [22]:
data

Unnamed: 0,0,1,2,4
0,1.0,6.5,3.0,
1,1.0,,,
2,,,,
3,,6.5,3.0,


In [23]:
data.dropna(axis=1, how='all')

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
2,,,
3,,6.5,3.0


In [25]:
df = DataFrame(np.random.randn(7, 3))
df

Unnamed: 0,0,1,2
0,-1.201782,-1.071651,2.185548
1,-0.575724,0.140551,-0.820376
2,-0.263425,1.061567,-0.333018
3,-1.435237,0.456216,1.322233
4,0.521477,-1.016991,1.645543
5,-0.174286,-1.297249,0.371323
6,0.357332,0.788229,-0.504023


In [26]:
df.loc[:1,1] = np.nan

In [27]:
df

Unnamed: 0,0,1,2
0,-1.201782,,2.185548
1,-0.575724,,-0.820376
2,-0.263425,1.061567,-0.333018
3,-1.435237,0.456216,1.322233
4,0.521477,-1.016991,1.645543
5,-0.174286,-1.297249,0.371323
6,0.357332,0.788229,-0.504023


In [28]:
df.fillna(0)

Unnamed: 0,0,1,2
0,-1.201782,0.0,2.185548
1,-0.575724,0.0,-0.820376
2,-0.263425,1.061567,-0.333018
3,-1.435237,0.456216,1.322233
4,0.521477,-1.016991,1.645543
5,-0.174286,-1.297249,0.371323
6,0.357332,0.788229,-0.504023


In [30]:
# Inplace true : Modifica el objeto
df.fillna(0, inplace=True)

In [31]:
df

Unnamed: 0,0,1,2
0,-1.201782,0.0,2.185548
1,-0.575724,0.0,-0.820376
2,-0.263425,1.061567,-0.333018
3,-1.435237,0.456216,1.322233
4,0.521477,-1.016991,1.645543
5,-0.174286,-1.297249,0.371323
6,0.357332,0.788229,-0.504023


In [32]:
df = DataFrame(np.random.randn(6, 3))
df

Unnamed: 0,0,1,2
0,0.685838,-0.98822,-0.590396
1,0.936142,-1.937112,1.090324
2,1.162065,1.710779,-1.758683
3,-0.348645,0.079936,0.981048
4,-0.000453,2.055471,-1.457754
5,0.797676,-0.680987,0.564739


In [33]:
df.loc[:2, 2] = np.nan

In [37]:
df

Unnamed: 0,0,1,2,-1
0,0.685838,-0.98822,,
1,0.936142,-1.937112,,
2,1.162065,1.710779,,
3,-0.348645,0.079936,0.981048,
4,-0.000453,2.055471,-1.457754,
5,0.797676,-0.680987,0.564739,


In [38]:
df.fillna(df.mean())

Unnamed: 0,0,1,2,-1
0,0.685838,-0.98822,0.029344,
1,0.936142,-1.937112,0.029344,
2,1.162065,1.710779,0.029344,
3,-0.348645,0.079936,0.981048,
4,-0.000453,2.055471,-1.457754,
5,0.797676,-0.680987,0.564739,
