In [3]:
# Import Dependencies
import pandas as pd

In [4]:
# Load Shopping Data

file_path = "Resources/shopping_data.csv"

shopping_df = pd.read_csv(file_path, encoding="ISO-8859-1")

shopping_df.head()

Unnamed: 0,CustomerID,Card Member,Age,Annual Income,Spending Score (1-100)
0,1,Yes,19.0,15000,39.0
1,2,Yes,21.0,15000,81.0
2,3,No,20.0,16000,6.0
3,4,No,23.0,16000,77.0
4,5,No,31.0,17000,40.0


In [5]:
# Check DataFrame Columns

shopping_df.columns

Index(['CustomerID', 'Card Member', 'Age', 'Annual Income',
       'Spending Score (1-100)'],
      dtype='object')

In [6]:
# Check DataFrame Data Types
shopping_df.dtypes

CustomerID                  int64
Card Member                object
Age                       float64
Annual Income               int64
Spending Score (1-100)    float64
dtype: object

In [7]:
# Check for Null Values
shopping_df.isnull().sum()

CustomerID                0
Card Member               2
Age                       2
Annual Income             0
Spending Score (1-100)    1
dtype: int64

In [8]:
# Drop Null Rows
shopping_df = shopping_df.dropna()

In [9]:
# Check for Duplicate Rows
print(f"Duplicate entries: {shopping_df.duplicated().sum()}")

Duplicate entries: 0


In [10]:
# Remove the CustomerID Column
shopping_df.drop(columns=["CustomerID"], inplace=True)
shopping_df.head()

Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,Yes,19.0,15000,39.0
1,Yes,21.0,15000,81.0
2,No,20.0,16000,6.0
3,No,23.0,16000,77.0
4,No,31.0,17000,40.0


In [12]:
# Transform 'Card Member' Column from String to Numeric Bool
def change_string(member):
    if member == "Yes":
        return 1
    else:
        return 0
    
shopping_df["Card Member"] = shopping_df["Card Member"].apply(change_string)

shopping_df.head()

Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,1,19.0,15000,39.0
1,1,21.0,15000,81.0
2,0,20.0,16000,6.0
3,0,23.0,16000,77.0
4,0,31.0,17000,40.0


In [14]:
# Skill Drill:
# Reformat Column Names so they contain no Spaces or Numbers

new_col_names = ["card_member", "age", "annual_income", "spending_score"]

shopping_df.columns = new_col_names

shopping_df.columns

Index(['card_member', 'age', 'annual_income', 'spending_score'], dtype='object')

In [15]:
# Save Cleaned data as csv

file_path = "Resources/shopping_data_cleaned.csv"

shopping_df.to_csv(file_path, index=False)