# Practicing data cleaning basics

In [1]:
import numpy as np
import pandas as pd

In [2]:
laptops = pd.read_csv("laptops.csv", encoding="latin-1")

Lets clean the weight column

In [3]:
print(laptops["Weight"][:6])

0    1.37kg
1    1.34kg
2    1.86kg
3    1.83kg
4    1.37kg
5     2.1kg
Name: Weight, dtype: object


<br> 
1. We can see that all values have kg, so first we will confirm that:

In [4]:
for value in laptops["Weight"]:
    if "kg" not in value:
        print(value)

<br> 
2. So all strings contain "kg", now we will clean that up (obs: there is a kgs exception on the dataframe)

In [5]:
laptops["Weight"] = laptops["Weight"].str.replace("kg", "")
laptops["Weight"] = laptops["Weight"].str.replace("s", "")
print(laptops["Weight"][:6])

0    1.37
1    1.34
2    1.86
3    1.83
4    1.37
5     2.1
Name: Weight, dtype: object


<br> 
3. Now we can turn the values into numbers, its clear that float is a best choice for this values

In [6]:
laptops["Weight"] = laptops["Weight"].astype(float)
laptops["Weight"].dtype

dtype('float64')

<br> 
4. Since we still need to know wich measurement the column weight has, we will rename the column

In [7]:
laptops.rename({"Weight":"weight_kg"}, axis=1, inplace=True)
print(laptops.columns)

Index(['Manufacturer', 'Model Name', 'Category', 'Screen Size', 'Screen',
       'CPU', 'RAM', ' Storage', 'GPU', 'Operating System',
       'Operating System Version', 'weight_kg', 'Price (Euros)'],
      dtype='object')


<br> 
5. It is always a good idea to save the database after we clean it up

In [8]:
laptops.to_csv('laptops_weight_column_cleaned.csv',index=False)