### Pandas Dealing With NaNs

In [4]:
import pandas as pd

items = [{'bikes' : 20, 'pants' : 30, 'watches' : 35, 'shirts' : 15, 'shoes' : 8, 'suits' : 45},
         {'watches' : 10, 'glasses' : 50, 'bikes' : 15, 'pants' : 5, 'shirts' : 2, 'shoes' : 5, 'suits' : 7},
         {'bikes' : 20, 'pants' : 30, 'watches' : 35, 'glasses' : 4, 'shoes' : 10}]

store_items = pd.DataFrame(items, index=['store1','store2','store3'])
store_items

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store1,20,30,35,15.0,8,45.0,
store2,15,5,10,2.0,5,7.0,50.0
store3,20,30,35,,10,,4.0


**Checking NaNs**

In [8]:
x = store_items.isnull()
x

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store1,False,False,False,False,False,False,True
store2,False,False,False,False,False,False,False
store3,False,False,False,True,False,True,False


In [13]:
# To Cound the NaNs for all rows
x = store_items.isnull().sum()
print(x)

# All Nans
x = store_items.isnull().sum().sum()
print(x)

bikes      0
pants      0
watches    0
shirts     1
shoes      0
suits      1
glasses    1
dtype: int64
3


**Count the NOT NaN values using Count()**

In [14]:
x = store_items.count()
x

bikes      3
pants      3
watches    3
shirts     2
shoes      3
suits      2
glasses    2
dtype: int64

**Remove NaNs**

dropna() - out of place - original is not modified

In [15]:
store_items.dropna()

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store2,15,5,10,2.0,5,7.0,50.0


In [16]:
store_items.dropna(axis=0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store2,15,5,10,2.0,5,7.0,50.0


In [17]:
store_items.dropna(axis=1)

Unnamed: 0,bikes,pants,watches,shoes
store1,20,30,35,8
store2,15,5,10,5
store3,20,30,35,10


In [18]:
store_items

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store1,20,30,35,15.0,8,45.0,
store2,15,5,10,2.0,5,7.0,50.0
store3,20,30,35,,10,,4.0


In [20]:
store_items.dropna(axis=1, inplace = True)
store_items

Unnamed: 0,bikes,pants,watches,shoes
store1,20,30,35,8
store2,15,5,10,5
store3,20,30,35,10


**REPLACE NaNs**



In [24]:
store_items = pd.DataFrame(items, index=['store1','store2','store3'])
store_items.fillna(0)


Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store1,20,30,35,15.0,8,45.0,0.0
store2,15,5,10,2.0,5,7.0,50.0
store3,20,30,35,0.0,10,0.0,4.0


In [26]:
# ffil - forward filling  -> fill with values before their location
# the NaN in sytore1 is not filled in because it does not have a previous value

store_items.fillna(method ='ffill', axis=0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store1,20,30,35,15.0,8,45.0,
store2,15,5,10,2.0,5,7.0,50.0
store3,20,30,35,2.0,10,7.0,4.0


In [27]:
store_items.fillna(method='ffill', axis=1)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store1,20.0,30.0,35.0,15.0,8.0,45.0,45.0
store2,15.0,5.0,10.0,2.0,5.0,7.0,50.0
store3,20.0,30.0,35.0,35.0,10.0,10.0,4.0


In [29]:
store_items.fillna(method='backfill',axis=0)

# inplace
# store_items.fillna(method='backfill',axis=0, inplace=True)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store1,20,30,35,15.0,8,45.0,50.0
store2,15,5,10,2.0,5,7.0,50.0
store3,20,30,35,,10,,4.0


**Interpolation methods - NaNs**



In [30]:
store_items.interpolate(method='linear',axis=0)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store1,20,30,35,15.0,8,45.0,
store2,15,5,10,2.0,5,7.0,50.0
store3,20,30,35,2.0,10,7.0,4.0


In [32]:
store_items.interpolate(method='linear',axis=1)

Unnamed: 0,bikes,pants,watches,shirts,shoes,suits,glasses
store1,20.0,30.0,35.0,15.0,8.0,45.0,45.0
store2,15.0,5.0,10.0,2.0,5.0,7.0,50.0
store3,20.0,30.0,35.0,22.5,10.0,7.0,4.0
