# 1. Reading CSV Files with Encodings

In [1]:
import pandas as pd
import numpy as np

laptops=pd.read_csv("laptops.csv",encoding="Latin-1")
print(laptops.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Manufacturer              1303 non-null   object
 1   Model Name                1303 non-null   object
 2   Category                  1303 non-null   object
 3   Screen Size               1303 non-null   object
 4   Screen                    1303 non-null   object
 5   CPU                       1303 non-null   object
 6   RAM                       1303 non-null   object
 7    Storage                  1303 non-null   object
 8   GPU                       1303 non-null   object
 9   Operating System          1303 non-null   object
 10  Operating System Version  1133 non-null   object
 11  Weight                    1303 non-null   object
 12  Price (Euros)             1303 non-null   object
dtypes: object(13)
memory usage: 132.5+ KB
None


# 2. Cleaning Column Names

In [2]:
new_columns=[]

for column in laptops.columns:
    new_columns.append(column.strip())
laptops.columns=new_columns

# 3. Cleaning Column Names Continued

In [3]:
import pandas as pd
laptops = pd.read_csv('laptops.csv', encoding='Latin-1')

def clean(column):
    column=column.strip()
    column=column.replace("Operating System","os")
    column=column.replace("(","")
    column=column.replace(")","")
    column=column.replace(" ","_")
    column=column.lower()
    return column

new_laptops_columns=[]

for column in laptops.columns:
    column=clean(column)
    new_laptops_columns.append(column)
    
laptops.columns=new_laptops_columns

# 4. Converting String Columns to Numeric

In [4]:
unique_ram=laptops["ram"].unique()

# 5. Removing Non-Digit Characters 

In [5]:
laptops["ram"]=laptops["ram"].str.replace("GB","")
unique_ram=laptops["ram"].unique()

# 6. Converting Columns to Numeric Dtypes

In [6]:
# laptops["ram"]=laptops["ram"].str.replace("GB","")
laptops["ram"]=laptops["ram"].astype(int)
dtypes=laptops.dtypes

# 7. Renaming Columns

In [8]:
# laptops["ram"] = laptops["ram"].str.replace('GB','').astype(int)

laptops.rename({"ram":"ram_gb"},axis=1,inplace=True)

ram_gb_desc=laptops["ram_gb"].describe()

# 8. Extracting Values from Strings

In [10]:
laptops["gpu_manufacturer"] = (laptops["gpu"]
                                       .str.split()
                                       .str[0]
                              )

laptops["cpu_manufacturer"]=(laptops["cpu"].str.split().str[0])

cpu_manufacturer_counts=laptops["cpu_manufacturer"].value_counts()

# 9. Correcting Bad Values

In [11]:
mapping_dict = {
    'Android': 'Android',
    'Chrome OS': 'Chrome OS',
    'Linux': 'Linux',
    'Mac OS': 'macOS',
    'No OS': 'No OS',
    'Windows': 'Windows',
    'macOS': 'macOS'
}

laptops["os"]=laptops["os"].map(mapping_dict)

# 10. Dropping Missing Values

In [13]:
laptops_no_null_rows=laptops.dropna()
laptops_no_null_cols=laptops.dropna(axis=1)

# 11. Filling Missing Values

In [None]:
value_counts_before = laptops.loc[laptops["os_version"].isnull(), "os"].value_counts()
laptops.loc[laptops["os"] == "macOS", "os_version"] = "X"

