In [None]:
import pandas as pd

# Load the raw data into a DataFrame (assuming it's in a CSV file)
raw_data = pd.read_csv('raw_data.csv')

# Step 2: Data Wrangling (Cleaning)

# 2.1 Handling Missing Values
# Check for missing values in the DataFrame
missing_values = raw_data.isnull().sum()

# Impute missing values with the mean for numeric columns
raw_data['age'].fillna(raw_data['age'].mean(), inplace=True)

# Drop rows with missing values in specific columns (e.g., 'product_id')
raw_data.dropna(subset=['product_id'], inplace=True)

# 2.2 Removing Duplicates
# Check for duplicate rows
duplicate_rows = raw_data.duplicated()

# Remove duplicate rows
raw_data = raw_data[~duplicate_rows]

# 2.3 Data Type Conversion
# Convert a column to a different data type (e.g., 'date' to datetime)
raw_data['date'] = pd.to_datetime(raw_data['date'])

# 2.4 Renaming Columns
# Rename columns for clarity
raw_data.rename(columns={'old_column_name': 'new_column_name'}, inplace=True)

# After cleaning, you can save the cleaned data to a new CSV file
raw_data.to_csv('cleaned_data.csv', index=False)
