In [2]:
# Import necessary libraries
import pandas as pd

In [3]:
# Create a dataframe from the content of a CSV file
cars_mv = pd.read_csv("car-sales-missing-data.csv")
cars_mv

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
2,Toyota,Blue,,3.0,"$7,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,,4.0,"$4,500"
6,Honda,,,4.0,"$7,500"
7,Honda,Blue,,4.0,
8,Toyota,White,60000.0,,
9,,White,31600.0,4.0,"$9,700"


In [5]:
# Check if any missing values in the dataframe - option 1
cars_mv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Make      9 non-null      object 
 1   Colour    9 non-null      object 
 2   Odometer  6 non-null      float64
 3   Doors     9 non-null      float64
 4   Price     8 non-null      object 
dtypes: float64(2), object(3)
memory usage: 528.0+ bytes


In [8]:
# Check if any missing values in the dataframe - option 2 (better)
cars_mv.isnull().sum()

Make        1
Colour      1
Odometer    4
Doors       1
Price       2
dtype: int64

In [10]:
# Delete rows having missing value in one or more column
cars_mv_drop1 = cars_mv.dropna()
cars_mv_drop1

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"


In [11]:
# Delete rows having missing value any of the columns specified
cars_mv_drop2 = cars_mv.dropna(subset = ['Make', 'Price'])
cars_mv_drop2

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
2,Toyota,Blue,,3.0,"$7,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,,4.0,"$4,500"
6,Honda,,,4.0,"$7,500"


In [15]:
# Update missing values in the dataframe with single value
cars_mv_upd1 = cars_mv.fillna(0)
cars_mv_upd1

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
2,Toyota,Blue,0.0,3.0,"$7,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,0.0,4.0,"$4,500"
6,Honda,0,0.0,4.0,"$7,500"
7,Honda,Blue,0.0,4.0,0
8,Toyota,White,60000.0,0.0,0
9,0,White,31600.0,4.0,"$9,700"


In [26]:
# Update missing values in the dataframe with appropriate value for each column
cars_mv_upd2 = cars_mv.fillna(
    {
        'Make' : cars_mv['Make'].mode().iloc[0], # Update missing value with most frequent value in the column
        'Colour' : 'Missing', # Update missing value with constant value
        'Odometer' : cars_mv['Odometer'].mean(), # Update missing value with average value in the column
        'Doors' : cars_mv['Doors'].mode().iloc[0] # Update missing value with most frequent value in the column
    }
)
cars_mv_upd2

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
2,Toyota,Blue,92302.666667,3.0,"$7,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,92302.666667,4.0,"$4,500"
6,Honda,Missing,92302.666667,4.0,"$7,500"
7,Honda,Blue,92302.666667,4.0,
8,Toyota,White,60000.0,4.0,
9,Toyota,White,31600.0,4.0,"$9,700"


In [27]:
# Add a column to the dataframe
cars_mv_upd2['Wheels'] = 4
cars_mv_upd2

Unnamed: 0,Make,Colour,Odometer,Doors,Price,Wheels
0,Toyota,White,150043.0,4.0,"$4,000",4
1,Honda,Red,87899.0,4.0,"$5,000",4
2,Toyota,Blue,92302.666667,3.0,"$7,000",4
3,BMW,Black,11179.0,5.0,"$22,000",4
4,Nissan,White,213095.0,4.0,"$3,500",4
5,Toyota,Green,92302.666667,4.0,"$4,500",4
6,Honda,Missing,92302.666667,4.0,"$7,500",4
7,Honda,Blue,92302.666667,4.0,,4
8,Toyota,White,60000.0,4.0,,4
9,Toyota,White,31600.0,4.0,"$9,700",4


In [29]:
# Drop column from the dataframe
cars_mv_upd2.drop("Wheels", axis = 1, inplace = True)
cars_mv_upd2

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
2,Toyota,Blue,92302.666667,3.0,"$7,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,92302.666667,4.0,"$4,500"
6,Honda,Missing,92302.666667,4.0,"$7,500"
7,Honda,Blue,92302.666667,4.0,
8,Toyota,White,60000.0,4.0,
9,Toyota,White,31600.0,4.0,"$9,700"
