# Cafe Sales Data Cleaning
This notebook performs data cleaning operations on the `dirty_cafe_sales.csv` dataset.

In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv('dirty_cafe_sales.csv')
df.head()

In [None]:
# Replace 'ERROR' and 'UNKNOWN' with NaN
df.replace(['ERROR', 'UNKNOWN'], pd.NA, inplace=True)

# Convert data types
df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce')
df['Price Per Unit'] = pd.to_numeric(df['Price Per Unit'], errors='coerce')
df['Total Spent'] = pd.to_numeric(df['Total Spent'], errors='coerce')
df['Transaction Date'] = pd.to_datetime(df['Transaction Date'], errors='coerce')

In [None]:
# Fill missing values
df['Item'].fillna('Unknown Item', inplace=True)
df['Quantity'].fillna(1, inplace=True)
df['Price Per Unit'].fillna(df['Price Per Unit'].median(), inplace=True)
df['Total Spent'] = df['Quantity'] * df['Price Per Unit']
df['Payment Method'].fillna('Other', inplace=True)
df['Location'].fillna('Unknown Location', inplace=True)
df['Transaction Date'].fillna(method='ffill', inplace=True)

In [None]:
# Save cleaned data
df.to_csv('cleaned_cafe_sales.csv', index=False)
df.info()