In [2]:
#  Mall Customer Segmentation – Task 1 Data Cleaning

import pandas as pd

# Load the dataset
df = pd.read_csv("/content/Mall_Customers.csv")

#  Step 1: Standardize column headers
df.columns = df.columns.str.lower().str.replace(" ", "_")

# Print columns after standardization to check the exact name
print("Columns after standardization:", df.columns)

#  Step 2: Remove duplicates
df.drop_duplicates(inplace=True)

#  Step 3: Handle missing values
# Use the confirmed column name 'customerid' from the print statement above
df.fillna({
    'customerid': df['customerid'].mode()[0],
    'gender': 'unknown',
    'age': df['age'].mean(),
    'annual_income_(k$)': df['annual_income_(k$)'].median(),
    'spending_score_(1-100)': df['spending_score_(1-100)'].median()
}, inplace=True)

#  Step 4: Standardize text values
df['gender'] = df['gender'].str.strip().str.lower()

#  Step 5: Fix data types
# Use the confirmed column name 'customerid'
df['customerid'] = df['customerid'].astype(int)
df['age'] = df['age'].astype(int)
df['annual_income_(k$)'] = df['annual_income_(k$)'].astype(int)
df['spending_score_(1-100)'] = df['spending_score_(1-100)'].astype(int)

#  Step 6: Save cleaned dataset
df.to_csv("/content/cleaned_mall_customers.csv", index=False)
print(" Cleaned dataset saved to /content/cleaned_mall_customers.csv")

#  Optional: Show result summary
print("\n Summary:")
print(f"Rows: {df.shape[0]}, Columns: {df.shape[1]}")
print("Nulls after cleaning:\n", df.isnull().sum())
print("Sample:\n", df.head())

Columns after standardization: Index(['customerid', 'gender', 'age', 'annual_income_(k$)',
       'spending_score_(1-100)'],
      dtype='object')
 Cleaned dataset saved to /content/cleaned_mall_customers.csv

 Summary:
Rows: 200, Columns: 5
Nulls after cleaning:
 customerid                0
gender                    0
age                       0
annual_income_(k$)        0
spending_score_(1-100)    0
dtype: int64
Sample:
    customerid  gender  age  annual_income_(k$)  spending_score_(1-100)
0           1    male   19                  15                      39
1           2    male   21                  15                      81
2           3  female   20                  16                       6
3           4  female   23                  16                      77
4           5  female   31                  17                      40
