In [None]:
# import labs
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Reading Employees.CSV file into a pandas DataFrame
employees = pd.read_csv('employees.csv')

In [11]:
# Detection of missing values
# isnull() returns True for missing values
print("Missing values in the DataFrame:")
print(employees.isnull())

Missing values in the DataFrame:
     First Name  Gender  Start Date  Last Login Time  Salary  Bonus %  \
0         False   False       False            False   False    False   
1         False   False       False            False   False    False   
2         False   False       False            False   False    False   
3         False   False       False            False   False    False   
4         False   False       False            False   False    False   
..          ...     ...         ...              ...     ...      ...   
995       False    True       False            False   False    False   
996       False   False       False            False   False    False   
997       False   False       False            False   False    False   
998       False   False       False            False   False    False   
999       False   False       False            False   False    False   

     Senior Management   Team  
0                False  False  
1                False   T

In [12]:
# Count total missing values in each column
print("\nCount of missing values per column:")
print(employees.isnull().sum())


Count of missing values per column:
First Name            67
Gender               145
Start Date             0
Last Login Time        0
Salary                 0
Bonus %                0
Senior Management     67
Team                  43
dtype: int64


In [13]:
# Filter rows where 'Gender' is missing
missing_gender = employees[employees['Gender'].isnull()]
print("\nRows with missing Gender data:")
print(missing_gender)


Rows with missing Gender data:
    First Name Gender  Start Date Last Login Time  Salary  Bonus %  \
20        Lois    NaN   4/22/1995         7:18 PM   64714    4.934   
22      Joshua    NaN    3/8/2012         1:58 AM   90816   18.816   
27       Scott    NaN   7/11/1991         6:58 PM  122367    5.218   
31       Joyce    NaN   2/20/2005         2:40 PM   88657   12.752   
41   Christine    NaN   6/28/2015         1:08 AM   66582   11.308   
..         ...    ...         ...             ...     ...      ...   
961    Antonio    NaN   6/18/1989         9:37 PM  103050    3.050   
972     Victor    NaN   7/28/2006         2:49 PM   76381   11.159   
985    Stephen    NaN   7/10/1983         8:10 PM   85668    1.909   
989     Justin    NaN   2/10/1991         4:58 PM   38344    3.794   
995      Henry    NaN  11/23/2014         6:09 AM  132483   16.655   

    Senior Management                  Team  
20               True                 Legal  
22               True       Client 

In [14]:
# Filter rows with non-missing 'Gender'
valid_gender = employees[employees['Gender'].notnull()]
print("\nRows with valid Gender data:")
print(valid_gender)


Rows with valid Gender data:
    First Name  Gender Start Date Last Login Time  Salary  Bonus %  \
0      Douglas    Male   8/6/1993        12:42 PM   97308    6.945   
1       Thomas    Male  3/31/1996         6:53 AM   61933    4.170   
2        Maria  Female  4/23/1993        11:17 AM  130590   11.858   
3        Jerry    Male   3/4/2005         1:00 PM  138705    9.340   
4        Larry    Male  1/24/1998         4:47 PM  101004    1.389   
..         ...     ...        ...             ...     ...      ...   
994     George    Male  6/21/2013         5:47 PM   98874    4.479   
996    Phillip    Male  1/31/1984         6:30 AM   42392   19.675   
997    Russell    Male  5/20/2013        12:39 PM   96914    1.421   
998      Larry    Male  4/20/2013         4:45 PM   60500   11.985   
999     Albert    Male  5/15/2012         6:24 PM  129949   10.169   

    Senior Management                  Team  
0                True             Marketing  
1                True                

In [18]:
# Fill missing values in 'Gender' column with 'No Gender'
employees['Gender'] = employees['Gender'].fillna('No Gender')
print("\nGender column after filling missing values:")
print(employees['Gender'])


Gender column after filling missing values:
0           Male
1           Male
2         Female
3           Male
4           Male
         ...    
995    No Gender
996         Male
997         Male
998         Male
999         Male
Name: Gender, Length: 1000, dtype: object


In [21]:
# Fill all other numeric missing values with 0
employees.fillna(0, inplace=True)
print("\nDataFrame after filling all numeric NaNs with 0:")
employees


DataFrame after filling all numeric NaNs with 0:


Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
1,Thomas,Male,3/31/1996,6:53 AM,61933,4.170,True,0
2,Maria,Female,4/23/1993,11:17 AM,130590,11.858,False,Finance
3,Jerry,Male,3/4/2005,1:00 PM,138705,9.340,True,Finance
4,Larry,Male,1/24/1998,4:47 PM,101004,1.389,True,Client Services
...,...,...,...,...,...,...,...,...
995,Henry,No Gender,11/23/2014,6:09 AM,132483,16.655,False,Distribution
996,Phillip,Male,1/31/1984,6:30 AM,42392,19.675,False,Finance
997,Russell,Male,5/20/2013,12:39 PM,96914,1.421,False,Product
998,Larry,Male,4/20/2013,4:45 PM,60500,11.985,False,Business Development


In [20]:
# Interpolate only numeric columns
numeric_cols = employees.select_dtypes(include='number').columns
employees[numeric_cols] = employees[numeric_cols].interpolate(
    method='linear', limit_direction='forward'
)

print("\nDataFrame after linear interpolation of numeric columns:")
employees


DataFrame after linear interpolation of numeric columns:


Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
1,Thomas,Male,3/31/1996,6:53 AM,61933,4.170,True,0
2,Maria,Female,4/23/1993,11:17 AM,130590,11.858,False,Finance
3,Jerry,Male,3/4/2005,1:00 PM,138705,9.340,True,Finance
4,Larry,Male,1/24/1998,4:47 PM,101004,1.389,True,Client Services
...,...,...,...,...,...,...,...,...
995,Henry,No Gender,11/23/2014,6:09 AM,132483,16.655,False,Distribution
996,Phillip,Male,1/31/1984,6:30 AM,42392,19.675,False,Finance
997,Russell,Male,5/20/2013,12:39 PM,96914,1.421,False,Product
998,Larry,Male,4/20/2013,4:45 PM,60500,11.985,False,Business Development
