# Topics Covered In This Notebook

- fillna to fill missing values using different ways
- interpolate to make a guess on missing values using interpolation
- dropna to drop rows with missing values

In [3]:
import pandas as pd
df = pd.read_csv("weather_data.csv", parse_dates=["day"])
df.set_index('day', inplace=True)
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,32.0,6.0,Rain
2023-01-04,,9.0,Sunny
2023-01-05,28.0,,Snow
2023-01-06,,7.0,
2023-01-07,32.0,,Rain
2023-01-08,,,Sunny
2023-01-09,,,
2023-01-10,34.0,8.0,Cloudy
2023-01-11,40.0,12.0,Sunny


In [4]:
new_df = df.fillna(0)
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,32.0,6.0,Rain
2023-01-04,0.0,9.0,Sunny
2023-01-05,28.0,0.0,Snow
2023-01-06,0.0,7.0,0
2023-01-07,32.0,0.0,Rain
2023-01-08,0.0,0.0,Sunny
2023-01-09,0.0,0.0,0
2023-01-10,34.0,8.0,Cloudy
2023-01-11,40.0,12.0,Sunny


In [5]:
new_df = df.fillna({
    'temperature' : 0,
    'windspeed' : 0,
    'event' : 'no event'
})
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,32.0,6.0,Rain
2023-01-04,0.0,9.0,Sunny
2023-01-05,28.0,0.0,Snow
2023-01-06,0.0,7.0,no event
2023-01-07,32.0,0.0,Rain
2023-01-08,0.0,0.0,Sunny
2023-01-09,0.0,0.0,no event
2023-01-10,34.0,8.0,Cloudy
2023-01-11,40.0,12.0,Sunny


In [6]:
new_df = df.fillna(method="ffill")
new_df
# It copied previous value

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,32.0,6.0,Rain
2023-01-04,32.0,9.0,Sunny
2023-01-05,28.0,9.0,Snow
2023-01-06,28.0,7.0,Snow
2023-01-07,32.0,7.0,Rain
2023-01-08,32.0,7.0,Sunny
2023-01-09,32.0,7.0,Sunny
2023-01-10,34.0,8.0,Cloudy
2023-01-11,40.0,12.0,Sunny


In [7]:
new_df = df.fillna(method="bfill")
new_df
# It copied next value

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,32.0,6.0,Rain
2023-01-04,28.0,9.0,Sunny
2023-01-05,28.0,7.0,Snow
2023-01-06,32.0,7.0,Rain
2023-01-07,32.0,8.0,Rain
2023-01-08,34.0,8.0,Sunny
2023-01-09,34.0,8.0,Cloudy
2023-01-10,34.0,8.0,Cloudy
2023-01-11,40.0,12.0,Sunny


In [8]:
new_df = df.fillna(method="ffill",limit=1)
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,32.0,6.0,Rain
2023-01-04,32.0,9.0,Sunny
2023-01-05,28.0,9.0,Snow
2023-01-06,28.0,7.0,Snow
2023-01-07,32.0,7.0,Rain
2023-01-08,32.0,,Sunny
2023-01-09,,,Sunny
2023-01-10,34.0,8.0,Cloudy
2023-01-11,40.0,12.0,Sunny


In [9]:
new_df = df.interpolate()
new_df


Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,32.0,6.0,Rain
2023-01-04,30.0,9.0,Sunny
2023-01-05,28.0,8.0,Snow
2023-01-06,30.0,7.0,
2023-01-07,32.0,7.25,Rain
2023-01-08,32.666667,7.5,Sunny
2023-01-09,33.333333,7.75,
2023-01-10,34.0,8.0,Cloudy
2023-01-11,40.0,12.0,Sunny


In [10]:
new_df = df.interpolate(method="time")
new_df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,32.0,6.0,Rain
2023-01-04,29.0,9.0,Sunny
2023-01-05,28.0,8.0,Snow
2023-01-06,30.0,7.0,
2023-01-07,32.0,7.25,Rain
2023-01-08,32.666667,7.5,Sunny
2023-01-09,33.333333,7.75,
2023-01-10,34.0,8.0,Cloudy
2023-01-11,40.0,12.0,Sunny


In [11]:
new_df = df.dropna()
new_df
# Whichever row had any na value it dropped all of them

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,32.0,6.0,Rain
2023-01-10,34.0,8.0,Cloudy
2023-01-11,40.0,12.0,Sunny


In [12]:
new_df = df.dropna(how="all")
new_df
# It drop only if it has all any 

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,32.0,6.0,Rain
2023-01-04,,9.0,Sunny
2023-01-05,28.0,,Snow
2023-01-06,,7.0,
2023-01-07,32.0,,Rain
2023-01-08,,,Sunny
2023-01-10,34.0,8.0,Cloudy
2023-01-11,40.0,12.0,Sunny


In [13]:
new_df = df.dropna(thresh=1)
new_df
# If I have at least one none any value keep that row and drop ant other rows 

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,32.0,6.0,Rain
2023-01-04,,9.0,Sunny
2023-01-05,28.0,,Snow
2023-01-06,,7.0,
2023-01-07,32.0,,Rain
2023-01-08,,,Sunny
2023-01-10,34.0,8.0,Cloudy
2023-01-11,40.0,12.0,Sunny


In [16]:
dt = pd.date_range("01-01-2023","01-11-2023")
idx = pd.DatetimeIndex(dt)
df = df.reindex(idx)
df

Unnamed: 0,temperature,windspeed,event
2023-01-01,32.0,6.0,Rain
2023-01-02,,,
2023-01-03,,,
2023-01-04,,9.0,Sunny
2023-01-05,28.0,,Snow
2023-01-06,,7.0,
2023-01-07,32.0,,Rain
2023-01-08,,,Sunny
2023-01-09,,,
2023-01-10,34.0,8.0,Cloudy
