<a href="https://colab.research.google.com/github/PriyadarshiniMaddela/Walmart-Customers-Behavior-Analysis/blob/main/Walmart.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

# Load the dataset you already uploaded
df = pd.read_csv("Walmart_customer_purchases.csv")

print(f"Raw data: {df.shape[0]:,} rows × {df.shape[1]} columns")

# 1. Fix the only real issue → Purchase_Date (8/30/2024 format)
# The 'infer_datetime_format' argument is deprecated; removing it as a strict version is now the default.
df['Purchase_Date'] = pd.to_datetime(df['Purchase_Date'], errors='coerce')

# 2. Clean Purchase_Amount + create pretty $ column (exactly like your Tableau dashboard)
df['Purchase_Amount'] = pd.to_numeric(df['Purchase_Amount'], errors='coerce')
df['Amount_$'] = df['Purchase_Amount'].map('${:,.2f}'.format)

# 3. Convert Yes/No → proper booleans
df['Discount_Applied'] = df['Discount_Applied'].map({'Yes': True, 'No': False})
df['Repeat_Customer']   = df['Repeat_Customer'].map({'Yes': True, 'No': False})

# 4. Optimize categories (makes everything faster & cleaner)
cat_cols = ['Gender', 'City', 'Category', 'Product_Name', 'Payment_Method']
for col in cat_cols:
    df[col] = df[col].astype('category')

# 5. Final check
print("\nCleaning 100% complete!")
print("few lines for video")
print(f"Date range       : {df['Purchase_Date'].min().date()} to {df['Purchase_Date'].max().date()}")
print(f"Total revenue    : ${df['Purchase_Amount'].sum():,.0f}")

# 6. Save clean files
df.to_csv("Walmart_Customer_Purchases_CLEANED.csv", index=False)
df.to_excel("Walmart_Customer_Purchases_CLEANED.xlsx", index=False)

print("\nClean files ready → download from left panel")
df.head()

Raw data: 50,000 rows × 12 columns

Cleaning 100% complete!
few lines for video
Date range       : 2024-02-10 to 2025-02-09
Total revenue    : $12,776,611

Clean files ready → download from left panel


Unnamed: 0,Customer_ID,Age,Gender,City,Category,Product_Name,Purchase_Date,Purchase_Amount,Payment_Method,Discount_Applied,Rating,Repeat_Customer,Amount_$
0,84607c1f-910c-44d5-b89f-e1ee06dd34c0,49,Female,New Cynthia,Electronics,Smartphone,2024-08-30,253.26,Cash on Delivery,False,1,True,$253.26
1,f2a81712-a73e-4424-8b39-4c615a0bd4ea,36,Other,Cruzport,Clothing,T-Shirt,2024-12-21,73.19,Debit Card,True,1,False,$73.19
2,da9be287-8b0e-4688-bccd-1a2cdd7567c6,52,Male,Jeffreytown,Beauty,Perfume,2024-12-26,125.62,Credit Card,True,1,False,$125.62
3,50ec6932-3ac7-492f-9e55-4b148212f302,47,Female,Jenniferburgh,Electronics,Smartwatch,2024-11-04,450.32,Credit Card,False,2,True,$450.32
4,8fdc3098-fc75-4b0f-983c-d8d8168c6362,43,Other,Kingshire,Electronics,Smartphone,2024-10-07,369.28,Credit Card,True,2,True,$369.28
