In [2]:
import pandas as pd

# Load the dataset
df = pd.read_csv("Mall_Customers.csv")

# Display first few rows
print("Original Data:")
print(df.head())

# Check missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Drop missing values if any
df.dropna(inplace=True)

# Remove duplicates
df.drop_duplicates(inplace=True)

# Standardize text values in Gender column
df['Gender'] = df['Gender'].str.strip().str.lower()
df['Gender'] = df['Gender'].replace({'male': 'Male', 'female': 'Female'})

# Rename column headers: lowercase with underscores
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")

# Check and fix data types
print("\nData Types:")
print(df.dtypes)

# Save cleaned dataset
df.to_csv("Cleaned_Mall_Customers.csv", index=False)

print("\nCleaned dataset saved as 'Cleaned_Mall_Customers.csv'")


Original Data:
   CustomerID  Gender  Age  Annual Income (k$)  Spending Score (1-100)
0           1    Male   19                  15                      39
1           2    Male   21                  15                      81
2           3  Female   20                  16                       6
3           4  Female   23                  16                      77
4           5  Female   31                  17                      40

Missing Values:
CustomerID                0
Gender                    0
Age                       0
Annual Income (k$)        0
Spending Score (1-100)    0
dtype: int64

Data Types:
customerid                 int64
gender                    object
age                        int64
annual_income_(k$)         int64
spending_score_(1-100)     int64
dtype: object

Cleaned dataset saved as 'Cleaned_Mall_Customers.csv'


In [3]:
from google.colab import files
files.download("Cleaned_Mall_Customers.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [4]:
readme_content = """# Task 1 - Mall Customers Data Cleaning

- Cleaned Gender column
- Removed duplicates
- Renamed column headers
- Checked missing values
- Saved as Cleaned_Mall_Customers.csv
"""

# Save to a file
with open("README.md", "w") as f:
    f.write(readme_content)

print("✅ README.md file created.")

✅ README.md file created.


In [5]:
from google.colab import files
files.download("README.md")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>