# 🧹 Task 1 – Data Cleaning (Elevate Labs Internship)

This notebook demonstrates how I cleaned and preprocessed a raw order dataset as part of my data analyst internship task.

In [None]:
import pandas as pd

In [None]:
df = pd.read_excel('Task_1DataCleaning_Elevate_Labs.xlsx')
df.head()

In [None]:
df_clean = df.copy()

# Fill missing values
df_clean['Customer Name'] = df_clean['Customer Name'].fillna('Unknown')
df_clean['Region'] = df_clean['Region'].fillna('Unknown')

# Convert Order Date to consistent format
df_clean['Order Date'] = pd.to_datetime(df_clean['Order Date'], infer_datetime_format=True, errors='coerce')

# Clean Quantity column
df_clean['Quantity'] = pd.to_numeric(df_clean['Quantity'], errors='coerce').fillna(0).astype(int)

# Clean Price column
df_clean['Price'] = df_clean['Price'].replace(r'[\$,]', '', regex=True).astype(float)

# Rename columns
df_clean.columns = [c.strip().lower().replace(' ', '_') for c in df_clean.columns]

# Remove duplicates
df_clean = df_clean.drop_duplicates()

df_clean.head()

In [None]:
df_clean.to_csv('cleaned_dataset.csv', index=False)
print('Cleaned dataset saved!')