# Read CSV #

In [1]:
import pandas as pd

def read_csv_file(file_path):
    """
    Reads a CSV file into a Pandas DataFrame.

    Args:
        file_path (str): The path to the CSV file.

    Returns:
        pd.DataFrame: The contents of the CSV file as a Pandas DataFrame.
    """
    try:
        # Attempt to read the CSV file
        df = pd.read_csv(file_path)
        return df
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
        return None
    except pd.errors.EmptyDataError:
        print(f"Error: The file '{file_path}' is empty.")
        return None
    except pd.errors.ParserError as e:
        print(f"Error: An error occurred while parsing the file '{file_path}': {e}")
        return None

# Example usage:
file_path = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DA0101EN-Coursera/laptop_pricing_dataset_mod1.csv'  
df = read_csv_file(file_path)
if df is not None:
    print(df.head())  # print the first few rows of the DataFrame

   Unnamed: 0 Manufacturer  Category     Screen  GPU  OS  CPU_core  \
0           0         Acer         4  IPS Panel    2   1         5   
1           1         Dell         3    Full HD    1   1         3   
2           2         Dell         3    Full HD    1   1         7   
3           3         Dell         4  IPS Panel    2   1         5   
4           4           HP         4    Full HD    2   1         7   

   Screen_Size_cm  CPU_frequency  RAM_GB  Storage_GB_SSD  Weight_kg  Price  
0          35.560            1.6       8             256       1.60    978  
1          39.624            2.0       4             256       2.20    634  
2          39.624            2.7       8             256       2.20    946  
3          33.782            1.6       8             128       1.22   1244  
4          39.624            1.8       8             256       1.91    837  


# Identify Missing Values #

In [2]:
def find_missing_values(df):
    """
    Identifies the columns with missing values in a Pandas DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to check for missing values.

    Returns:
        dict: A dictionary with column names as keys and the number of missing values as values.
    """
    missing_values = df.isnull().sum()
    return missing_values


missing_values = find_missing_values(df)
print(missing_values)

Unnamed: 0        0
Manufacturer      0
Category          0
Screen            0
GPU               0
OS                0
CPU_core          0
Screen_Size_cm    4
CPU_frequency     0
RAM_GB            0
Storage_GB_SSD    0
Weight_kg         5
Price             0
dtype: int64


# Replace Missing Values #

In [3]:
import numpy as np

# Replace missing values in Screen_Size_cm with the most frequent value
df["Screen_Size_cm"] = df["Screen_Size_cm"].fillna(df["Screen_Size_cm"].mode().iloc[0])

# Replace missing values in Weight_kg with the mean value
df["Weight_kg"] = df["Weight_kg"].fillna(df["Weight_kg"].mean())

df.head()

Unnamed: 0.1,Unnamed: 0,Manufacturer,Category,Screen,GPU,OS,CPU_core,Screen_Size_cm,CPU_frequency,RAM_GB,Storage_GB_SSD,Weight_kg,Price
0,0,Acer,4,IPS Panel,2,1,5,35.56,1.6,8,256,1.6,978
1,1,Dell,3,Full HD,1,1,3,39.624,2.0,4,256,2.2,634
2,2,Dell,3,Full HD,1,1,7,39.624,2.7,8,256,2.2,946
3,3,Dell,4,IPS Panel,2,1,5,33.782,1.6,8,128,1.22,1244
4,4,HP,4,Full HD,2,1,7,39.624,1.8,8,256,1.91,837


# Modify Data Types #

In [5]:
# Change the data type of Screen_Size_cm and Weight_kg to float
df["Screen_Size_cm"] = df["Screen_Size_cm"].astype(float)
df["Weight_kg"] = df["Weight_kg"].astype(float)

df.dtypes

Unnamed: 0          int64
Manufacturer       object
Category            int64
Screen             object
GPU                 int64
OS                  int64
CPU_core            int64
Screen_Size_cm    float64
CPU_frequency     float64
RAM_GB              int64
Storage_GB_SSD      int64
Weight_kg         float64
Price               int64
dtype: object

# Standardization and Normilization #

In [6]:
# Convert 'Screen_Size_cm' from centimeters to inches and modify the attribute name
df['Screen_Size_inch'] = df['Screen_Size_cm'] * 0.393701
df.drop('Screen_Size_cm', axis=1, inplace=True)

# Convert 'Weight_kg' from kilograms to pounds and modify the attribute name
df['Weight_pounds'] = df['Weight_kg'] * 2.20462
df.drop('Weight_kg', axis=1, inplace=True)

In [7]:
# Normalize the content under 'CPU_frequency' with respect to its maximum value
max_value = df['CPU_frequency'].max()
df['CPU_frequency'] = df['CPU_frequency'] / max_value

# Categorical to Numerical #

In [8]:
# Convert the 'Screen' attribute into indicator variables
df1 = pd.get_dummies(df['Screen'], prefix='Screen')

# Append df1 into the original data frame df
df = pd.concat([df, df1], axis=1)

# Drop the original 'Screen' attribute from the data frame
df.drop('Screen', axis=1, inplace=True)

 # USD to Euros #

In [10]:
# Convert Price_USD to Price_EUR
df["Price_EUR"] = df["Price"] * 0.88

df.head()

Unnamed: 0.1,Unnamed: 0,Manufacturer,Category,GPU,OS,CPU_core,CPU_frequency,RAM_GB,Storage_GB_SSD,Price,Screen_Size_inch,Weight_pounds,Screen_Full HD,Screen_IPS Panel,Price_EUR
0,0,Acer,4,2,1,5,0.551724,8,256,978,14.000008,3.527392,False,True,860.64
1,1,Dell,3,1,1,3,0.689655,4,256,634,15.600008,4.850164,True,False,557.92
2,2,Dell,3,1,1,7,0.931034,8,256,946,15.600008,4.850164,True,False,832.48
3,3,Dell,4,2,1,5,0.551724,8,128,1244,13.300007,2.689636,False,True,1094.72
4,4,HP,4,2,1,7,0.62069,8,256,837,15.600008,4.210824,True,False,736.56


# Min-Max Normalization #

In [11]:
# Perform min-max normalization on CPU_frequency
df["Normalized_CPU_frequency"] = (df["CPU_frequency"] - df["CPU_frequency"].min()) / (df["CPU_frequency"].max() - df["CPU_frequency"].min())

df.head()

Unnamed: 0.1,Unnamed: 0,Manufacturer,Category,GPU,OS,CPU_core,CPU_frequency,RAM_GB,Storage_GB_SSD,Price,Screen_Size_inch,Weight_pounds,Screen_Full HD,Screen_IPS Panel,Price_EUR,Normalized_CPU_frequency
0,0,Acer,4,2,1,5,0.551724,8,256,978,14.000008,3.527392,False,True,860.64,0.235294
1,1,Dell,3,1,1,3,0.689655,4,256,634,15.600008,4.850164,True,False,557.92,0.470588
2,2,Dell,3,1,1,7,0.931034,8,256,946,15.600008,4.850164,True,False,832.48,0.882353
3,3,Dell,4,2,1,5,0.551724,8,128,1244,13.300007,2.689636,False,True,1094.72,0.235294
4,4,HP,4,2,1,7,0.62069,8,256,837,15.600008,4.210824,True,False,736.56,0.352941
