In [3]:
import pandas as pd

In [4]:
# 1) Instruction ( For Data Cleaning ) - Find all Null Values in the dataset. If there is any null value in any column, then fill it with the mean of that column

df = pd.read_csv("sample_data/cars.csv")

null_values = df.isnull().sum()
print("Null Values in the Dataset:")
print(null_values)

for column in df.columns:
    if df[column].isnull().sum() > 0:
        if pd.api.types.is_numeric_dtype(df[column]):
            mean_value = df[column].mean()
            df[column].fillna(mean_value, inplace=True)
        else:
            df[column].fillna("Unknown", inplace=True)

null_values_after_fill = df.isnull().sum()
print("\nNull Values After Filling:")
print(null_values_after_fill)

df.to_csv("cleaned_cars.csv", index=False)

Null Values in the Dataset:
Make           4
Model          4
Type           4
Origin         4
DriveTrain     4
MSRP           4
Invoice        4
EngineSize     4
Cylinders      6
Horsepower     4
MPG_City       4
MPG_Highway    4
Weight         4
Wheelbase      4
Length         4
dtype: int64

Null Values After Filling:
Make           0
Model          0
Type           0
Origin         0
DriveTrain     0
MSRP           0
Invoice        0
EngineSize     0
Cylinders      0
Horsepower     0
MPG_City       0
MPG_Highway    0
Weight         0
Wheelbase      0
Length         0
dtype: int64


In [6]:
# 2) Question ( Based on Value Counts )- Check what are the different types of Make are there in our dataset. And, what is the count (occurrence) of each Make in the data ?

make_counts = df['Make'].value_counts()

print("Different types of Make and their counts:")
print(make_counts)


Different types of Make and their counts:
Toyota           28
Chevrolet        27
Mercedes-Benz    26
Ford             23
BMW              20
Audi             19
Nissan           17
Honda            17
Chrysler         15
Volkswagen       15
Mitsubishi       13
Dodge            13
Hyundai          12
Jaguar           12
Volvo            12
Kia              11
Mazda            11
Lexus            11
Pontiac          11
Subaru           11
Lincoln           9
Mercury           9
Buick             9
Saturn            8
Infiniti          8
GMC               8
Cadillac          8
Suzuki            8
Porsche           7
Saab              7
Acura             7
Unknown           4
Oldsmobile        3
Jeep              3
Land Rover        3
MINI              2
Scion             2
Isuzu             2
Hummer            1
Name: Make, dtype: int64


In [7]:
# 3) Instruction ( Filtering ) - Show all the records where Origin is Asia or Europe

filtered_df = df[(df['Origin'] == 'Asia') | (df['Origin'] == 'Europe')]

print("Records where Origin is Asia or Europe:")
print(filtered_df)


Records where Origin is Asia or Europe:
      Make                    Model   Type  Origin DriveTrain      MSRP  \
0    Acura                      MDX    SUV    Asia        All  $36,945    
1    Acura           RSX Type S 2dr  Sedan    Asia      Front  $23,820    
2    Acura                  TSX 4dr  Sedan    Asia      Front  $26,990    
3    Acura                   TL 4dr  Sedan    Asia      Front  $33,195    
4    Acura               3.5 RL 4dr  Sedan    Asia      Front  $43,755    
..     ...                      ...    ...     ...        ...       ...   
427  Volvo  C70 LPT convertible 2dr  Sedan  Europe      Front  $40,565    
428  Volvo  C70 HPT convertible 2dr  Sedan  Europe      Front  $42,565    
429  Volvo               S80 T6 4dr  Sedan  Europe      Front  $45,210    
430  Volvo                      V40  Wagon  Europe      Front  $26,135    
431  Volvo                     XC70  Wagon  Europe        All  $35,145    

      Invoice  EngineSize  Cylinders  Horsepower  MPG_City 

In [9]:
# 4) Instruction ( Removing unwanted records ) - Remove all the records (rows) where Weight is above 4000.

filtered_df = df[df['Weight'] <= 4000]

print("Records where Weight is not above 4000:")
print(filtered_df)

Records where Weight is not above 4000:
      Make                    Model   Type  Origin DriveTrain      MSRP  \
1    Acura           RSX Type S 2dr  Sedan    Asia      Front  $23,820    
2    Acura                  TSX 4dr  Sedan    Asia      Front  $26,990    
3    Acura                   TL 4dr  Sedan    Asia      Front  $33,195    
4    Acura               3.5 RL 4dr  Sedan    Asia      Front  $43,755    
5    Acura  3.5 RL w/Navigation 4dr  Sedan    Asia      Front  $46,100    
..     ...                      ...    ...     ...        ...       ...   
427  Volvo  C70 LPT convertible 2dr  Sedan  Europe      Front  $40,565    
428  Volvo  C70 HPT convertible 2dr  Sedan  Europe      Front  $42,565    
429  Volvo               S80 T6 4dr  Sedan  Europe      Front  $45,210    
430  Volvo                      V40  Wagon  Europe      Front  $26,135    
431  Volvo                     XC70  Wagon  Europe        All  $35,145    

      Invoice  EngineSize  Cylinders  Horsepower  MPG_City 

In [10]:
# 5) Instruction ( Applying function on a column ) - Increase all the values of 'MPG_City' column by 3.

def increase_by_3(x):
    return x + 3

df['MPG_City'] = df['MPG_City'].apply(increase_by_3)

print("Updated MPG_City column:")
print(df['MPG_City'])

Updated MPG_City column:
0      20.0
1      27.0
2      25.0
3      23.0
4      21.0
       ... 
427    24.0
428    23.0
429    22.0
430    25.0
431    23.0
Name: MPG_City, Length: 432, dtype: float64
