In [19]:
import pandas as pd
import csv

In [20]:
df = pd.read_csv('shopping_data.csv')
df.columns

Index(['CustomerID', 'Card Member', 'Age', 'Annual Income',
       'Spending Score (1-100)'],
      dtype='object')

In [21]:
df.dtypes

CustomerID                  int64
Card Member                object
Age                       float64
Annual Income               int64
Spending Score (1-100)    float64
dtype: object

In [22]:
# find null values
for column in df.columns:
    print(f"Columns {column} has {df[column].isnull().sum()}")

Columns CustomerID has 0
Columns Card Member has 2
Columns Age has 2
Columns Annual Income has 0
Columns Spending Score (1-100) has 1


In [23]:
# Drop null rows
df = df.dropna()

In [24]:
# find duplicated entries
print(f"Duplicate entries: {df.duplicated().sum()}")

Duplicate entries: 0


In [25]:
# remove the customer_id column
df.drop(columns=['CustomerID'], inplace=True)
df.head()

Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,Yes,19.0,15000,39.0
1,Yes,21.0,15000,81.0
2,No,20.0,16000,6.0
3,No,23.0,16000,77.0
4,No,31.0,17000,40.0


In [26]:
# Transform string column
def change_string(member):
    if member == "Yes":
        return 1
    else:
        return 0 
    
df['Card Member'] = df['Card Member'].apply(change_string)
df


Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,1,19.0,15000,39.0
1,1,21.0,15000,81.0
2,0,20.0,16000,6.0
3,0,23.0,16000,77.0
4,0,31.0,17000,40.0
...,...,...,...,...
198,0,35.0,120000,79.0
199,0,45.0,126000,28.0
200,1,32.0,126000,74.0
201,1,32.0,137000,18.0


In [27]:
# transform annual income (scale)
df['Annual Income'] = df['Annual Income'] / 1000
df

Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,1,19.0,15.0,39.0
1,1,21.0,15.0,81.0
2,0,20.0,16.0,6.0
3,0,23.0,16.0,77.0
4,0,31.0,17.0,40.0
...,...,...,...,...
198,0,35.0,120.0,79.0
199,0,45.0,126.0,28.0
200,1,32.0,126.0,74.0
201,1,32.0,137.0,18.0


In [29]:
# Saving cleaned data
file_path = "shopping_data_cleaned.csv"
df.to_csv(file_path, index=False)