# Dealing with NaN

In [13]:
import pandas as pd

In [14]:
# We create a list of Python dictionaries
items2 = [{'bikes': 20, 'pants': 30, 'glasses': 50,'watches': 35, 'shirts': 15, 'shoes':8, 'suits':45},
{'watches': 10,  'bikes': 15, 'pants':5, 'shirts': 2, 'shoes':5, 'suits':7},
{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4, 'shoes':10}]

# We create a DataFrame  and provide the row index
store_items = pd.DataFrame(items2, index = ['store 1', 'store 2', 'store 3'])

# We display the DataFrame
store_items

Unnamed: 0,bikes,pants,glasses,watches,shirts,shoes,suits
store 1,20,30,50.0,35,15.0,8,45.0
store 2,15,5,,10,2.0,5,7.0
store 3,20,30,4.0,35,,10,


# How to count the number of NaN values in store_items ?

In [21]:
# We count the number of NaN values in store_items
x =  store_items.isnull().sum().sum()

# We print x
print('Number of NaN values in our DataFrame:\n', x)


Number of NaN values in our DataFrame:
 3


In [22]:
store_items.shape[0]

3

In [24]:
x =  store_items.isnull().sum()/ store_items.shape[0]*100
x

bikes       0.000000
pants       0.000000
glasses    33.333333
watches     0.000000
shirts     33.333333
shoes       0.000000
suits      33.333333
dtype: float64

##  Number of non-NaN values in our DataFrame

In [27]:
# We print the number of non-NaN values in our DataFrame
print()
print('Number of non-NaN values in the columns of our DataFrame:\n', store_items.count().sum())


Number of non-NaN values in the columns of our DataFrame:
 18


## How to drop any rows  or columns with NaN values ?

In [28]:
# We drop any rows with NaN values
store_items.dropna(axis = 0)#will drop every row contains nuls (out of place)

Unnamed: 0,bikes,pants,glasses,watches,shirts,shoes,suits
store 1,20,30,50.0,35,15.0,8,45.0


In [29]:
# We drop any columns with NaN values
store_items.dropna(axis = 1)

Unnamed: 0,bikes,pants,watches,shoes
store 1,20,30,35,8
store 2,15,5,10,5
store 3,20,30,35,10


## Replace all NaN values with 0

In [31]:
# We replace all NaN values with 0
store_items.fillna(0)

Unnamed: 0,bikes,pants,glasses,watches,shirts,shoes,suits
store 1,20,30,50.0,35,15.0,8,45.0
store 2,15,5,0.0,10,2.0,5,7.0
store 3,20,30,4.0,35,0.0,10,0.0


## Replace all NaN values with column mean

In [35]:
print(store_items.mean(numeric_only=True))
store_items.fillna(store_items.mean(numeric_only=True))

bikes      18.333333
pants      21.666667
glasses    27.000000
watches    26.666667
shirts      8.500000
shoes       7.666667
suits      26.000000
dtype: float64


Unnamed: 0,bikes,pants,glasses,watches,shirts,shoes,suits
store 1,20,30,50.0,35,15.0,8,45.0
store 2,15,5,27.0,10,2.0,5,7.0
store 3,20,30,4.0,35,8.5,10,26.0


## Using linear interpolation to replace NaN

In [36]:
# We replace NaN values by using linear interpolation using column values
store_items.interpolate(method = 'linear', axis = 0)

Unnamed: 0,bikes,pants,glasses,watches,shirts,shoes,suits
store 1,20,30,50.0,35,15.0,8,45.0
store 2,15,5,27.0,10,2.0,5,7.0
store 3,20,30,4.0,35,2.0,10,7.0


In [37]:
# We replace NaN values by using linear interpolation using row values
store_items.interpolate(method = 'linear', axis = 1)

Unnamed: 0,bikes,pants,glasses,watches,shirts,shoes,suits
store 1,20.0,30.0,50.0,35.0,15.0,8.0,45.0
store 2,15.0,5.0,7.5,10.0,2.0,5.0,7.0
store 3,20.0,30.0,4.0,35.0,22.5,10.0,10.0
