<a href="https://colab.research.google.com/github/DegaOmprakash/Data-Cleaning-Processing/blob/main/Data_Cleaning_%26_Processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import pandas as pd

# Step 1: Load dataset
df = pd.read_csv("/content/sales_data.csv")

print("Initial Shape:", df.shape)
print("First 5 rows:")
print(df.head())

# Step 2: Check info and missing values
print("\nDataset Info:")
print(df.info())
print("\nMissing Values:")
print(df.isnull().sum())

# Step 3: Handle missing values
for col in df.columns:
    if df[col].dtype == 'object':  # categorical
        df[col] = df[col].fillna(df[col].mode()[0])
    else:  # numeric
        df[col] = df[col].fillna(df[col].median())

# Step 4: Remove duplicate rows
df = df.drop_duplicates()

# Step 5: Standardize text columns
for col in df.select_dtypes(include='object').columns:
    df[col] = df[col].str.lower().str.strip()

# Step 6: Convert date columns (if any contain 'date')
for col in df.columns:
    if 'date' in col.lower():
        df[col] = pd.to_datetime(df[col], errors='coerce')

# Step 7: Fix numeric data types
for col in df.select_dtypes(include='number').columns:
    if (df[col] % 1 == 0).all():  # if all values are integers
        df[col] = df[col].astype(int)

# Step 8: Save cleaned dataset
df.to_csv("/content/sales_data_cleaned.csv", index=False)

print("\nFinal Shape:", df.shape)
print("Cleaned dataset saved as sales_data_cleaned.csv")


Initial Shape: (113036, 18)
First 5 rows:
         Date  Day     Month  Year  Customer_Age       Age_Group  \
0  2013-11-26   26  November  2013            19     Youth (<25)   
1  2015-11-26   26  November  2015            19     Youth (<25)   
2  2014-03-23   23     March  2014            49  Adults (35-64)   
3  2016-03-23   23     March  2016            49  Adults (35-64)   
4  2014-05-15   15       May  2014            47  Adults (35-64)   

  Customer_Gender    Country             State Product_Category Sub_Category  \
0               M     Canada  British Columbia      Accessories   Bike Racks   
1               M     Canada  British Columbia      Accessories   Bike Racks   
2               M  Australia   New South Wales      Accessories   Bike Racks   
3               M  Australia   New South Wales      Accessories   Bike Racks   
4               F  Australia   New South Wales      Accessories   Bike Racks   

               Product  Order_Quantity  Unit_Cost  Unit_Price  Profi