# Working with Null Values

In [3]:
import pandas as pd
import numpy as np

In [4]:
data = {
    'First Name': ['Harry', 'Hermione', 'Ron', 'Draco', 'Luna', np.nan, np.nan],
    'Last Name': ['Potter', 'Granger', 'Weasley', 'Malfoy', np.nan, 'Lovegood', np.nan],
    'Hogwarts House': ['Gryffindor', 'Gryffindor', 'Gryffindor', np.nan, 'Ravenclaw', 'Gryffindor', np.nan],
    'Age': [18, 17, np.nan, 16, 16, 18, np.nan]
}

Nulls = pd.DataFrame(data)

Nulls

Unnamed: 0,First Name,Last Name,Hogwarts House,Age
0,Harry,Potter,Gryffindor,18.0
1,Hermione,Granger,Gryffindor,17.0
2,Ron,Weasley,Gryffindor,
3,Draco,Malfoy,,16.0
4,Luna,,Ravenclaw,16.0
5,,Lovegood,Gryffindor,18.0
6,,,,


In [6]:
Nulls.dropna(subset = ['Age'])

Unnamed: 0,First Name,Last Name,Hogwarts House,Age
0,Harry,Potter,Gryffindor,18.0
1,Hermione,Granger,Gryffindor,17.0
3,Draco,Malfoy,,16.0
4,Luna,,Ravenclaw,16.0
5,,Lovegood,Gryffindor,18.0


In [11]:
Nulls.dropna(axis = 1, subset = 3)

Unnamed: 0,First Name,Last Name,Age
0,Harry,Potter,18.0
1,Hermione,Granger,17.0
2,Ron,Weasley,
3,Draco,Malfoy,16.0
4,Luna,,16.0
5,,Lovegood,18.0
6,,,


In [13]:
Nulls.dropna(how = 'any')

Unnamed: 0,First Name,Last Name,Hogwarts House,Age
0,Harry,Potter,Gryffindor,18.0
1,Hermione,Granger,Gryffindor,17.0


In [15]:
Nulls['Age'].fillna(Nulls['Age'].mean())

0    18.0
1    17.0
2    17.0
3    16.0
4    16.0
5    18.0
6    17.0
Name: Age, dtype: float64

In [18]:
Nulls.fillna(method = 'bfill')

Unnamed: 0,First Name,Last Name,Hogwarts House,Age
0,Harry,Potter,Gryffindor,18.0
1,Hermione,Granger,Gryffindor,17.0
2,Ron,Weasley,Gryffindor,16.0
3,Draco,Malfoy,Ravenclaw,16.0
4,Luna,Lovegood,Ravenclaw,16.0
5,,Lovegood,Gryffindor,18.0
6,,,,


In [20]:
Nulls.replace(np.nan, 'Unknown')

Unnamed: 0,First Name,Last Name,Hogwarts House,Age
0,Harry,Potter,Gryffindor,18.0
1,Hermione,Granger,Gryffindor,17.0
2,Ron,Weasley,Gryffindor,Unknown
3,Draco,Malfoy,Unknown,16.0
4,Luna,Unknown,Ravenclaw,16.0
5,Unknown,Lovegood,Gryffindor,18.0
6,Unknown,Unknown,Unknown,Unknown


In [21]:
data = {
    'First name': ['Harry', 'Harry', 'Hermione', 'Hermione', 'Ron', 'Ron', 'Draco', 'Draco', 'Luna', 'Luna', 'Neville', 'Neville', 'Ginny', 'Ginny', 'Fred', 'Fred', 'George', 'George', 'Cho', 'Cho', 'Cedric', 'Cedric', 'Sirius', 'Sirius', 'Remus', 'Remus'],
    'Last name': ['Potter', np.nan, 'Granger', 'Granger', 'Weasley', 'Weasley', 'Malfoy', 'Malfoy', 'Lovegood', 'Lovegood', 'Longbottom', 'Longbottom', 'Weasley', 'Weasley', 'Weasley', 'Weasley', 'Weasley', 'Weasley', 'Chang', 'Chang', 'Diggory', 'Diggory', 'Black', 'Black', 'Lupin', 'Lupin'],
    'Hogwarts house': ['Gryffindor', 'Gryffindor', 'Gryffindor', 'Gryffindor', np.nan, 'Gryffindor', 'Slytherin', 'Slytherin', 'Ravenclaw', 'Ravenclaw', 'Gryffindor', 'Gryffindor', 'Gryffindor', 'Gryffindor', 'Gryffindor', 'Gryffindor', 'Gryffindor', 'Gryffindor', 'Ravenclaw', 'Ravenclaw', 'Hufflepuff', 'Hufflepuff', 'Gryffindor', 'Gryffindor', 'Gryffindor', 'Gryffindor'],
    'Transaction_ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, np.nan, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26],
    'Transaction_amount': [50, 75, 30, 20, 45, 60, 55, 80, 65, 70, 40, 85, 90, 55, 60, 75, 80, 35, 40, 95, 50, 65, 70, 25, 30, 45]
}

df = pd.DataFrame(data)
df

Unnamed: 0,First name,Last name,Hogwarts house,Transaction_ID,Transaction_amount
0,Harry,Potter,Gryffindor,1.0,50
1,Harry,,Gryffindor,2.0,75
2,Hermione,Granger,Gryffindor,3.0,30
3,Hermione,Granger,Gryffindor,4.0,20
4,Ron,Weasley,,5.0,45
5,Ron,Weasley,Gryffindor,6.0,60
6,Draco,Malfoy,Slytherin,7.0,55
7,Draco,Malfoy,Slytherin,8.0,80
8,Luna,Lovegood,Ravenclaw,9.0,65
9,Luna,Lovegood,Ravenclaw,10.0,70


In [27]:
df['Last name'] = df['Last name'].fillna(df.groupby(['First name','Hogwarts house'])['Last name'].transform('first'))

In [28]:
df

Unnamed: 0,First name,Last name,Hogwarts house,Transaction_ID,Transaction_amount
0,Harry,Potter,Gryffindor,1.0,50
1,Harry,Potter,Gryffindor,2.0,75
2,Hermione,Granger,Gryffindor,3.0,30
3,Hermione,Granger,Gryffindor,4.0,20
4,Ron,Weasley,,5.0,45
5,Ron,Weasley,Gryffindor,6.0,60
6,Draco,Malfoy,Slytherin,7.0,55
7,Draco,Malfoy,Slytherin,8.0,80
8,Luna,Lovegood,Ravenclaw,9.0,65
9,Luna,Lovegood,Ravenclaw,10.0,70


In [29]:
df['Hogwarts house'] = df['Hogwarts house'].fillna(df.groupby(['First name','Last name'])['Hogwarts house'].transform('first'))

In [35]:
df['Transaction_ID'] = df['Transaction_ID'].fillna(df['Transaction_ID'].ffill() + 1)

In [36]:
df

Unnamed: 0,First name,Last name,Hogwarts house,Transaction_ID,Transaction_amount
0,Harry,Potter,Gryffindor,1.0,50
1,Harry,Potter,Gryffindor,2.0,75
2,Hermione,Granger,Gryffindor,3.0,30
3,Hermione,Granger,Gryffindor,4.0,20
4,Ron,Weasley,Gryffindor,5.0,45
5,Ron,Weasley,Gryffindor,6.0,60
6,Draco,Malfoy,Slytherin,7.0,55
7,Draco,Malfoy,Slytherin,8.0,80
8,Luna,Lovegood,Ravenclaw,9.0,65
9,Luna,Lovegood,Ravenclaw,10.0,70
